|
| 1 | +# -*- coding: utf-8 -*- |
| 2 | +""" |
| 3 | +Simple Pandas program to combine Excel files and summarize data. |
| 4 | +See http://pbpython.com/pandas-gui.html for details on this script |
| 5 | +This demonstrates the use of Gooey to add a simple UI on top of the script |
| 6 | +""" |
| 7 | +from __future__ import print_function |
| 8 | +import pandas as pd |
| 9 | +import numpy as np |
| 10 | +import glob |
| 11 | +import os |
| 12 | +import json |
| 13 | +from gooey import Gooey, GooeyParser |
| 14 | + |
| 15 | + |
| 16 | +@Gooey(program_name="Create Quarterly Marketing Report") |
| 17 | +def parse_args(): |
| 18 | + """ Use GooeyParser to build up the arguments we will use in our script |
| 19 | + Save the arguments in a default json file so that we can retrieve them |
| 20 | + every time we run the script. |
| 21 | + """ |
| 22 | + stored_args = {} |
| 23 | + # get the script name without the extension & use it to build up |
| 24 | + # the json filename |
| 25 | + script_name = os.path.splitext(os.path.basename(__file__))[0] |
| 26 | + args_file = "{}-args.json".format(script_name) |
| 27 | + # Read in the prior arguments as a dictionary |
| 28 | + if os.path.isfile(args_file): |
| 29 | + with open(args_file) as data_file: |
| 30 | + stored_args = json.load(data_file) |
| 31 | + parser = GooeyParser(description='Create Quarterly Marketing Report') |
| 32 | + parser.add_argument('data_directory', |
| 33 | + action='store', |
| 34 | + default=stored_args.get('data_directory'), |
| 35 | + widget='DirChooser', |
| 36 | + help="Source directory that contains Excel files") |
| 37 | + parser.add_argument('output_directory', |
| 38 | + action='store', |
| 39 | + widget='DirChooser', |
| 40 | + default=stored_args.get('output_directory'), |
| 41 | + help="Output directory to save summary report") |
| 42 | + parser.add_argument('cust_file', |
| 43 | + action='store', |
| 44 | + default=stored_args.get('cust_file'), |
| 45 | + widget='FileChooser', |
| 46 | + help='Customer Account Status File') |
| 47 | + parser.add_argument('-d', help='Start date to include', |
| 48 | + default=stored_args.get('d'), |
| 49 | + widget='DateChooser') |
| 50 | + args = parser.parse_args() |
| 51 | + # Store the values of the arguments so we have them next time we run |
| 52 | + with open(args_file, 'w') as data_file: |
| 53 | + # Using vars(args) returns the data as a dictionary |
| 54 | + json.dump(vars(args), data_file) |
| 55 | + return args |
| 56 | + |
| 57 | + |
| 58 | +def combine_files(src_directory): |
| 59 | + """ Read in all of the sales xlsx files and combine into 1 |
| 60 | + combined DataFrame |
| 61 | + """ |
| 62 | + all_data = pd.DataFrame() |
| 63 | + for f in glob.glob(os.path.join(src_directory, "sales-*.xlsx")): |
| 64 | + df = pd.read_excel(f) |
| 65 | + all_data = all_data.append(df, ignore_index=True) |
| 66 | + all_data['date'] = pd.to_datetime(all_data['date']) |
| 67 | + return all_data |
| 68 | + |
| 69 | + |
| 70 | +def add_customer_status(sales_data, customer_file): |
| 71 | + """ Read in the customer file and combine with the sales data |
| 72 | + Return the customer with their status as an ordered category |
| 73 | + """ |
| 74 | + df = pd.read_excel(customer_file) |
| 75 | + all_data = pd.merge(sales_data, df, how='left') |
| 76 | + # Default everyone to bronze if no data included |
| 77 | + all_data['status'].fillna('bronze', inplace=True) |
| 78 | + # Convert the status to a category and order it |
| 79 | + all_data["status"] = all_data["status"].astype("category") |
| 80 | + all_data["status"].cat.set_categories(["gold", "silver", "bronze"], inplace=True) |
| 81 | + return all_data |
| 82 | + |
| 83 | + |
| 84 | +def save_results(sales_data, output): |
| 85 | + """ Perform a summary of the data and save the data as an excel file |
| 86 | + """ |
| 87 | + summarized_sales = sales_data.groupby(["status"])["unit price"].agg([np.mean]) |
| 88 | + output_file = os.path.join(output, "sales-report.xlsx") |
| 89 | + writer = pd.ExcelWriter(output_file, engine='xlsxwriter') |
| 90 | + summarized_sales = summarized_sales.reset_index() |
| 91 | + summarized_sales.to_excel(writer) |
| 92 | + |
| 93 | + |
| 94 | +if __name__ == '__main__': |
| 95 | + conf = parse_args() |
| 96 | + print("Reading sales files") |
| 97 | + sales_df = combine_files(conf.data_directory) |
| 98 | + print("Reading customer data and combining with sales") |
| 99 | + customer_status_sales = add_customer_status(sales_df, conf.cust_file) |
| 100 | + print("Saving sales and customer summary data") |
| 101 | + save_results(customer_status_sales, conf.output_directory) |
| 102 | + print("Done") |
0 commit comments