Below is my code for loading csv data into dataframe and applying the difference on two columns and appending to a new one using withColumn.The two columns I am trying to find the difference is of kind Double. Please help me in figuring out the following exception:
import org.apache.log4j.{Level, Logger}
import org.apache.spark.sql.SparkSession
/**
* Created by Guest1 on 5/10/2017.
*/
object arith extends App {
Logger.getLogger("org").setLevel(Level.ERROR)
Logger.getLogger("akka").setLevel(Level.ERROR)
val spark = SparkSession.builder().appName("Arithmetics").
config("spark.master", "local").getOrCreate()
val df =spark.read.option("header","true")
.option("inferSchema",true")
.csv("./Input/Arith.csv").persist()
// df.printSchema()
val sim =df("Average Total Payments") -df("Average Medicare Payments").show(5)
}
I am getting the following exception:
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
Exception in thread "main" org.apache.spark.sql.AnalysisException: Cannot resolve column name "Average Total Payments" among (DRG Definition, Provider Id, Provider Name, Provider Street Address, Provider City, Provider State, Provider Zip Code, Hospital Referral Region Description, Total Discharges , Average Covered Charges , Average Total Payments , Average Medicare Payments);
at org.apache.spark.sql.Dataset$$anonfun$resolve$1.apply(Dataset.scala:219)
at org.apache.spark.sql.Dataset$$anonfun$resolve$1.apply(Dataset.scala:219)
at scala.Option.getOrElse(Option.scala:121)
at org.apache.spark.sql.Dataset.resolve(Dataset.scala:218)
at org.apache.spark.sql.Dataset.col(Dataset.scala:1073)
at org.apache.spark.sql.Dataset.apply(Dataset.scala:1059)
at arith$.delayedEndpoint$arith$1(arith.scala:19)
at arith$delayedInit$body.apply(arith.scala:7)
at scala.Function0$class.apply$mcV$sp(Function0.scala:34)
at scala.runtime.AbstractFunction0.apply$mcV$sp(AbstractFunction0.scala:12)
at scala.App$$anonfun$main$1.apply(App.scala:76)
at scala.App$$anonfun$main$1.apply(App.scala:76)
at scala.collection.immutable.List.foreach(List.scala:381)
at scala.collection.generic.TraversableForwarder$class.foreach(TraversableForwarder.scala:35)
at scala.App$class.main(App.scala:76)
at arith$.main(arith.scala:7)
at arith.main(arith.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at com.intellij.rt.execution.application.AppMain.main(AppMain.java:144)