Skip to content

Commit 6e55fad

Browse files
committed
Getting ready for publishing next article
1 parent 7f0017c commit 6e55fad

File tree

3 files changed

+102
-0
lines changed

3 files changed

+102
-0
lines changed

code/pandas_gui.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
Simple Pandas program to combine Excel files and summarize data.
4+
See http://pbpython.com/pandas-gui.html for details on this script
5+
This demonstrates the use of Gooey to add a simple UI on top of the script
6+
"""
7+
from __future__ import print_function
8+
import pandas as pd
9+
import numpy as np
10+
import glob
11+
import os
12+
import json
13+
from gooey import Gooey, GooeyParser
14+
15+
16+
@Gooey(program_name="Create Quarterly Marketing Report")
17+
def parse_args():
18+
""" Use GooeyParser to build up the arguments we will use in our script
19+
Save the arguments in a default json file so that we can retrieve them
20+
every time we run the script.
21+
"""
22+
stored_args = {}
23+
# get the script name without the extension & use it to build up
24+
# the json filename
25+
script_name = os.path.splitext(os.path.basename(__file__))[0]
26+
args_file = "{}-args.json".format(script_name)
27+
# Read in the prior arguments as a dictionary
28+
if os.path.isfile(args_file):
29+
with open(args_file) as data_file:
30+
stored_args = json.load(data_file)
31+
parser = GooeyParser(description='Create Quarterly Marketing Report')
32+
parser.add_argument('data_directory',
33+
action='store',
34+
default=stored_args.get('data_directory'),
35+
widget='DirChooser',
36+
help="Source directory that contains Excel files")
37+
parser.add_argument('output_directory',
38+
action='store',
39+
widget='DirChooser',
40+
default=stored_args.get('output_directory'),
41+
help="Output directory to save summary report")
42+
parser.add_argument('cust_file',
43+
action='store',
44+
default=stored_args.get('cust_file'),
45+
widget='FileChooser',
46+
help='Customer Account Status File')
47+
parser.add_argument('-d', help='Start date to include',
48+
default=stored_args.get('d'),
49+
widget='DateChooser')
50+
args = parser.parse_args()
51+
# Store the values of the arguments so we have them next time we run
52+
with open(args_file, 'w') as data_file:
53+
# Using vars(args) returns the data as a dictionary
54+
json.dump(vars(args), data_file)
55+
return args
56+
57+
58+
def combine_files(src_directory):
59+
""" Read in all of the sales xlsx files and combine into 1
60+
combined DataFrame
61+
"""
62+
all_data = pd.DataFrame()
63+
for f in glob.glob(os.path.join(src_directory, "sales-*.xlsx")):
64+
df = pd.read_excel(f)
65+
all_data = all_data.append(df, ignore_index=True)
66+
all_data['date'] = pd.to_datetime(all_data['date'])
67+
return all_data
68+
69+
70+
def add_customer_status(sales_data, customer_file):
71+
""" Read in the customer file and combine with the sales data
72+
Return the customer with their status as an ordered category
73+
"""
74+
df = pd.read_excel(customer_file)
75+
all_data = pd.merge(sales_data, df, how='left')
76+
# Default everyone to bronze if no data included
77+
all_data['status'].fillna('bronze', inplace=True)
78+
# Convert the status to a category and order it
79+
all_data["status"] = all_data["status"].astype("category")
80+
all_data["status"].cat.set_categories(["gold", "silver", "bronze"], inplace=True)
81+
return all_data
82+
83+
84+
def save_results(sales_data, output):
85+
""" Perform a summary of the data and save the data as an excel file
86+
"""
87+
summarized_sales = sales_data.groupby(["status"])["unit price"].agg([np.mean])
88+
output_file = os.path.join(output, "sales-report.xlsx")
89+
writer = pd.ExcelWriter(output_file, engine='xlsxwriter')
90+
summarized_sales = summarized_sales.reset_index()
91+
summarized_sales.to_excel(writer)
92+
93+
94+
if __name__ == '__main__':
95+
conf = parse_args()
96+
print("Reading sales files")
97+
sales_df = combine_files(conf.data_directory)
98+
print("Reading customer data and combining with sales")
99+
customer_status_sales = add_customer_status(sales_df, conf.cust_file)
100+
print("Saving sales and customer summary data")
101+
save_results(customer_status_sales, conf.output_directory)
102+
print("Done")

data/sales-mar-2014.xlsx

-78.9 KB
Binary file not shown.

output/sales-report.xlsx

5.37 KB
Binary file not shown.

0 commit comments

Comments
 (0)