Skip to content

SuswithGaddam/MatPlot

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

9 Commits
 
 
 
 
 
 
 
 
 
 
 
 
 
 

Repository files navigation

import pandas as pd
import matplotlib.pyplot as plt
city_df = pd.read_csv("Resources/city_data.csv")
ride_df = pd.read_csv("Resources/ride_data.csv")
# city_df.head()
# ride_df.head()
#Merge both dfs on = city
city_ride_df = pd.merge(city_df, ride_df, on='city')
city_ride_df.head()
# city_ride_df.info()
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
city driver_count type date fare ride_id
0 Richardfort 38 Urban 2018-02-24 08:40:38 13.93 5628545007794
1 Richardfort 38 Urban 2018-02-13 12:46:07 14.00 910050116494
2 Richardfort 38 Urban 2018-02-16 13:52:19 17.92 820639054416
3 Richardfort 38 Urban 2018-02-01 20:18:28 10.26 9554935945413
4 Richardfort 38 Urban 2018-04-17 02:26:37 23.00 720020655850
#To create bubble plot, need to get the value by grouping the merged df by city
city_ride_gb = city_ride_df.groupby('city')
avg_fare_by_city = city_ride_gb['fare'].mean()
total_fare_by_city = city_ride_gb['fare'].sum()
total_cityRides = city_ride_gb['ride_id'].count()
total_cityDrivers = city_ride_gb['driver_count'].max()
city_type = city_ride_gb['type'].max()
# print(total_cityRides)
# print(city_type) --> Need to use a function to be able to print the values!
# print(city_type)
bubble_dict = {"Average Fare By City":avg_fare_by_city,
               "Total Fare By City": total_fare_by_city,
               "City Type":city_type,
               "Total Rides By City":total_cityRides,
               "Total Drivers By City":total_cityDrivers
              }

#create a df with above dict to manipulat the scatter/bubble plot variables
bubble_df = pd.DataFrame(data = bubble_dict)
bubble_df.head()
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
Average Fare By City City Type Total Drivers By City Total Fare By City Total Rides By City
city
Amandaburgh 24.641667 Urban 12 443.55 18
Barajasview 25.332273 Urban 26 557.31 22
Barronchester 36.422500 Suburban 11 582.76 16
Bethanyland 32.956111 Suburban 22 593.21 18
Bradshawfurt 40.064000 Rural 7 400.64 10
#Urban Data
urban_city_df = bubble_df.loc[bubble_df["City Type"]=="Urban"]
average_fare_urban = urban_city_df["Average Fare By City"]
# type(average_fare_urban)
total_rides_urban = urban_city_df["Total Rides By City"]
total_drivers_urban = urban_city_df["Total Drivers By City"]
#Suburban Data
suburban_city_df = bubble_df.loc[bubble_df["City Type"]=="Suburban"]
average_fare_suburban = suburban_city_df["Average Fare By City"]
total_rides_suburban = suburban_city_df["Total Rides By City"]
total_drivers_suburban = suburban_city_df["Total Drivers By City"]
# print(len(total_drivers_suburban))
#Rural Data
rural_city_df = bubble_df.loc[bubble_df["City Type"]=="Rural"]
average_fare_rural = rural_city_df["Average Fare By City"]
total_rides_rural = rural_city_df["Total Rides By City"]
total_drivers_rural = rural_city_df["Total Drivers By City"]
#Plot the Bubble Graph - Urban(Light Coral), Suburbab(Light Sky Blue), Rural(Gold)
plt.scatter(total_rides_urban,average_fare_urban,s=6*total_drivers_urban,c="lightcoral",edgecolor ="black",label = "Urban")
plt.scatter(total_rides_suburban,average_fare_suburban,s=6*total_drivers_suburban,c="lightskyblue",edgecolor ="black",label = "Suburban")
plt.scatter(total_rides_rural,average_fare_rural,s=6*total_drivers_rural,c="gold",edgecolor ="black",label = "Rural")
plt.title("Pyber Ride Sharing Data (2018)")
plt.xlabel("Total Number of Riders (Per City)")
plt.ylabel("Average Fare ($)")
plt.xlim(0,40)
plt.ylim(15,45)
plt.grid('on')
lgnd = plt.legend(loc = 1, title = "City Types", markerscale = 0.5)
lgnd.legendHandles[0]._sizes = [30]
lgnd.legendHandles[1]._sizes = [30]
lgnd.legendHandles[2]._sizes = [30]
plt.text(42,35,"Note:\nCircle size correlates with driver count per city")
plt.tight_layout()
plt.show()

png

#groupby the merged df by type
city_ride_type_gb = city_ride_df.groupby('type')
total_fare_percent_type = (city_ride_type_gb['fare'].sum()/city_ride_df['fare'].sum())*100
total_ride_percent_type = (city_ride_type_gb['ride_id'].count()/city_ride_df['fare'].count())*100
total_driver_percent_type = (city_ride_type_gb['driver_count'].sum()/city_ride_df['driver_count'].sum())*100
#plot the pie chart --> % of Total Fares by City Type
labels = ['Rural','Suburban','Urban']
colors = ['gold','lightskyblue','lightcoral']
explode = [0,0,0.5]
plt.pie(total_fare_percent_type, labels=labels, colors=colors,explode=explode,autopct='%1.1f%%',startangle= 270)
plt.title("% of Total Fares by City Type")
plt.show()

png

#plot the pie chart --> % of Total Rides by City Type
labels = ['Rural','Suburban','Urban']
colors = ['gold','lightskyblue','lightcoral']
explode = [0.15,0.2,0.1]
plt.pie(total_ride_percent_type, labels=labels, colors=colors,explode=explode,autopct='%1.1f%%',startangle= 90)
plt.title("% of Total Rides by City Type")
plt.show()

png

#plot the pie chart --> % of Total Drivers by City Type
labels = ['Rural','Suburban','Urban']
colors = ['gold','lightskyblue','lightcoral']
explode = [0.25,0.25,0.1]
plt.pie(total_driver_percent_type, labels=labels, colors=colors,explode=explode,autopct='%1.1f%%',startangle= 140)
plt.title("% of Total Drivers by City Type")
plt.show()

png

About

This repo contains solution to Matplot HW

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

 
 
 

Contributors