forked from chris1610/pbpython
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbeer_analysis.py
More file actions
100 lines (79 loc) · 2 KB
/
beer_analysis.py
File metadata and controls
100 lines (79 loc) · 2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
"""
Beer Analysis - Using Jupyter Code Cells
Accompanying article on https://pbpython.com/notebook-alternative.html
"""
# %%
import pandas as pd
import seaborn as sns
import plotly.express as px
# %%
# Load in the Craft Beer analysis
df_beers = pd.read_csv(
'https://github.com/nickhould/craft-beers-dataset/blob/master/data/processed/beers.csv?raw=True',
index_col=0)
df_breweries = pd.read_csv(
'https://github.com/nickhould/craft-beers-dataset/blob/master/data/processed/breweries.csv?raw=True',
index_col=0)
# %%
sns.set_style('whitegrid')
# %%
df_beers.head()
# %%
df_breweries.head()
# %%
df_beers.info()
# %%
df_breweries.info()
# %%
all_beer = pd.merge(df_beers,
df_breweries,
how='left',
left_on="brewery_id",
right_on="id",
suffixes=('_beer', '_brewery'))
# %%
all_beer.head()
# %%
# Useful to check for null values
empty_data = all_beer.isna().sum()
# %%
all_beer.info()
# %%
all_beer['ounces'].plot(kind='hist', title='Beer Size')
# %%
all_beer['IPA'] = all_beer['style'].str.contains('IPA', case=False)
# %%
all_beer['IPA'].value_counts()
# %%
all_beer_types = all_beer['style'].value_counts()
# %%
sns.catplot(data=all_beer, x='IPA', y='ibu', kind='box')
# %%
sns.catplot(data=all_beer, x='IPA', y='ibu', kind='swarm')
# %%
fig = px.scatter(all_beer, x="abv", y="ibu")
fig.show()
# %%
fig = px.scatter(all_beer,
x="abv",
y="ibu",
color='state',
hover_name='name_beer',
hover_data=['name_brewery'])
fig.show()
# %%
# Do some analysis on MN beers
mn_beer = all_beer[all_beer['state'].str.contains('MN')].copy()
# %%
all_beer['state'].value_counts()
# %%
all_beer.describe()
# %%
# Any relationship betwee alcohol volume and IBU?
fig = px.scatter(mn_beer,
x="abv",
y="ibu",
hover_name='name_beer',
hover_data=['name_brewery'])
fig.show()
# %%