forked from LearningJournal/Spark-Programming-In-Python
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutils.py
More file actions
27 lines (19 loc) · 638 Bytes
/
utils.py
File metadata and controls
27 lines (19 loc) · 638 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import configparser
from pyspark import SparkConf
def load_survey_df(spark, data_file):
return spark.read \
.option("header", "true") \
.option("inferSchema", "true") \
.csv(data_file)
def count_by_country(survey_df):
return survey_df.filter("Age < 40") \
.select("Age", "Gender", "Country", "state") \
.groupBy("Country") \
.count()
def get_spark_app_config():
spark_conf = SparkConf()
config = configparser.ConfigParser()
config.read("spark.conf")
for (key, val) in config.items("SPARK_APP_CONFIGS"):
spark_conf.set(key, val)
return spark_conf