GitHub - SuswithGaddam/MatPlot: This repo contains solution to Matplot HW

import pandas as pd
import matplotlib.pyplot as plt

city_df = pd.read_csv("Resources/city_data.csv")
ride_df = pd.read_csv("Resources/ride_data.csv")
# city_df.head()
# ride_df.head()

#Merge both dfs on = city
city_ride_df = pd.merge(city_df, ride_df, on='city')
city_ride_df.head()
# city_ride_df.info()

.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

</style>

	city	driver_count	type	date	fare	ride_id
0	Richardfort	38	Urban	2018-02-24 08:40:38	13.93	5628545007794
1	Richardfort	38	Urban	2018-02-13 12:46:07	14.00	910050116494
2	Richardfort	38	Urban	2018-02-16 13:52:19	17.92	820639054416
3	Richardfort	38	Urban	2018-02-01 20:18:28	10.26	9554935945413
4	Richardfort	38	Urban	2018-04-17 02:26:37	23.00	720020655850

#To create bubble plot, need to get the value by grouping the merged df by city
city_ride_gb = city_ride_df.groupby('city')
avg_fare_by_city = city_ride_gb['fare'].mean()
total_fare_by_city = city_ride_gb['fare'].sum()
total_cityRides = city_ride_gb['ride_id'].count()
total_cityDrivers = city_ride_gb['driver_count'].max()
city_type = city_ride_gb['type'].max()
# print(total_cityRides)
# print(city_type) --> Need to use a function to be able to print the values!
# print(city_type)
bubble_dict = {"Average Fare By City":avg_fare_by_city,
               "Total Fare By City": total_fare_by_city,
               "City Type":city_type,
               "Total Rides By City":total_cityRides,
               "Total Drivers By City":total_cityDrivers
              }

#create a df with above dict to manipulat the scatter/bubble plot variables
bubble_df = pd.DataFrame(data = bubble_dict)
bubble_df.head()

.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

</style>

	Average Fare By City	City Type	Total Drivers By City	Total Fare By City	Total Rides By City
city
Amandaburgh	24.641667	Urban	12	443.55	18
Barajasview	25.332273	Urban	26	557.31	22
Barronchester	36.422500	Suburban	11	582.76	16
Bethanyland	32.956111	Suburban	22	593.21	18
Bradshawfurt	40.064000	Rural	7	400.64	10

#Urban Data
urban_city_df = bubble_df.loc[bubble_df["City Type"]=="Urban"]
average_fare_urban = urban_city_df["Average Fare By City"]
# type(average_fare_urban)
total_rides_urban = urban_city_df["Total Rides By City"]
total_drivers_urban = urban_city_df["Total Drivers By City"]

#Suburban Data
suburban_city_df = bubble_df.loc[bubble_df["City Type"]=="Suburban"]
average_fare_suburban = suburban_city_df["Average Fare By City"]
total_rides_suburban = suburban_city_df["Total Rides By City"]
total_drivers_suburban = suburban_city_df["Total Drivers By City"]
# print(len(total_drivers_suburban))

#Rural Data
rural_city_df = bubble_df.loc[bubble_df["City Type"]=="Rural"]
average_fare_rural = rural_city_df["Average Fare By City"]
total_rides_rural = rural_city_df["Total Rides By City"]
total_drivers_rural = rural_city_df["Total Drivers By City"]

#Plot the Bubble Graph - Urban(Light Coral), Suburbab(Light Sky Blue), Rural(Gold)
plt.scatter(total_rides_urban,average_fare_urban,s=6*total_drivers_urban,c="lightcoral",edgecolor ="black",label = "Urban")
plt.scatter(total_rides_suburban,average_fare_suburban,s=6*total_drivers_suburban,c="lightskyblue",edgecolor ="black",label = "Suburban")
plt.scatter(total_rides_rural,average_fare_rural,s=6*total_drivers_rural,c="gold",edgecolor ="black",label = "Rural")
plt.title("Pyber Ride Sharing Data (2018)")
plt.xlabel("Total Number of Riders (Per City)")
plt.ylabel("Average Fare ($)")
plt.xlim(0,40)
plt.ylim(15,45)
plt.grid('on')
lgnd = plt.legend(loc = 1, title = "City Types", markerscale = 0.5)
lgnd.legendHandles[0]._sizes = [30]
lgnd.legendHandles[1]._sizes = [30]
lgnd.legendHandles[2]._sizes = [30]
plt.text(42,35,"Note:\nCircle size correlates with driver count per city")
plt.tight_layout()
plt.show()

#groupby the merged df by type
city_ride_type_gb = city_ride_df.groupby('type')
total_fare_percent_type = (city_ride_type_gb['fare'].sum()/city_ride_df['fare'].sum())*100
total_ride_percent_type = (city_ride_type_gb['ride_id'].count()/city_ride_df['fare'].count())*100
total_driver_percent_type = (city_ride_type_gb['driver_count'].sum()/city_ride_df['driver_count'].sum())*100

#plot the pie chart --> % of Total Fares by City Type
labels = ['Rural','Suburban','Urban']
colors = ['gold','lightskyblue','lightcoral']
explode = [0,0,0.5]
plt.pie(total_fare_percent_type, labels=labels, colors=colors,explode=explode,autopct='%1.1f%%',startangle= 270)
plt.title("% of Total Fares by City Type")
plt.show()

#plot the pie chart --> % of Total Rides by City Type
labels = ['Rural','Suburban','Urban']
colors = ['gold','lightskyblue','lightcoral']
explode = [0.15,0.2,0.1]
plt.pie(total_ride_percent_type, labels=labels, colors=colors,explode=explode,autopct='%1.1f%%',startangle= 90)
plt.title("% of Total Rides by City Type")
plt.show()

#plot the pie chart --> % of Total Drivers by City Type
labels = ['Rural','Suburban','Urban']
colors = ['gold','lightskyblue','lightcoral']
explode = [0.25,0.25,0.1]
plt.pie(total_driver_percent_type, labels=labels, colors=colors,explode=explode,autopct='%1.1f%%',startangle= 140)
plt.title("% of Total Drivers by City Type")
plt.show()

Name		Name	Last commit message	Last commit date
Latest commit History 9 Commits
.ipynb_checkpoints		.ipynb_checkpoints
Images		Images
Resources		Resources
.DS_Store		.DS_Store
Pyber.ipynb		Pyber.ipynb
README.md		README.md
observations.doc		observations.doc

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Repository files navigation

About

Uh oh!

Releases

Packages

Uh oh!

Contributors

Uh oh!

Languages

Folders and files

Latest commit

History

Repository files navigation

About

Resources

Uh oh!

Stars

Watchers

Forks

Releases

Packages 0

Uh oh!

Contributors

Uh oh!

Languages

Packages