@@ -17,13 +17,17 @@ public class TypedDataset {
1717 public static void main (String [] args ) throws Exception {
1818
1919 Logger .getLogger ("org" ).setLevel (Level .ERROR );
20- SparkSession session = SparkSession .builder ().appName ("StackOverFlowSurvey" ).master ("local[1 ]" ).getOrCreate ();
20+ SparkSession session = SparkSession .builder ().appName ("StackOverFlowSurvey" ).master ("local[* ]" ).getOrCreate ();
2121
2222 DataFrameReader dataFrameReader = session .read ();
2323
2424 Dataset <Row > responses = dataFrameReader .option ("header" ,"true" ).csv ("in/2016-stack-overflow-survey-responses.csv" );
2525
26- Dataset <Row > responseWithSelectedColumns = responses .select (col ("country" ), col ("age_midpoint" ).as ("ageMidPoint" ).cast ("integer" ), col ("occupation" ), col ("salary_midpoint" ).as ("salaryMidPoint" ).cast ("integer" ));
26+ Dataset <Row > responseWithSelectedColumns = responses .select (
27+ col ("country" ),
28+ col ("age_midpoint" ).as ("ageMidPoint" ).cast ("integer" ),
29+ col ("occupation" ),
30+ col ("salary_midpoint" ).as ("salaryMidPoint" ).cast ("integer" ));
2731
2832 Dataset <Response > typedDataset = responseWithSelectedColumns .as (Encoders .bean (Response .class ));
2933
@@ -33,13 +37,13 @@ public static void main(String[] args) throws Exception {
3337 System .out .println ("=== Print 20 records of responses table ===" );
3438 typedDataset .show (20 );
3539
36- System .out .println ("=== Print records where the response is from Afghanistan ===" );
40+ System .out .println ("=== Print the responses from Afghanistan ===" );
3741 typedDataset .filter (response -> response .getCountry ().equals ("Afghanistan" )).show ();
3842
3943 System .out .println ("=== Print the count of occupations ===" );
4044 typedDataset .groupBy (typedDataset .col ("occupation" )).count ().show ();
4145
42- System .out .println ("=== Print records with average mid age less than 20 ===" );
46+ System .out .println ("=== Print responses with average mid age less than 20 ===" );
4347 typedDataset .filter (response -> response .getAgeMidPoint () !=null && response .getAgeMidPoint () < 20 ).show ();
4448
4549 System .out .println ("=== Print the result with salary middle point in descending order ===" );
0 commit comments