Dummy Variables + Changes

Xylambda · Xylambda · commit 4534f09e71bc · 2019-04-30T16:20:16.000+02:00
diff --git a/1. Clustering/Clustering.py b/1. Clustering/Clustering.py
@@ -53,8 +53,8 @@
 
 
 # ===================== CREDITS =====================
-# Jake VanderPlas:
-    # -> https://jakevdp.github.io/PythonDataScienceHandbook/05.11-k-means.html
+### Jake VanderPlas:
+# https://jakevdp.github.io/PythonDataScienceHandbook/05.11-k-means.html
     
-# Aletta Smits:
-    # -> Big Data and Social Media / Data Learning Class (week 1 - Day 1)
+### Aletta Smits:
+# Big Data and Social Media / Data Learning Class (week 1 - Day 1)
diff --git a/1. Clustering/Dummy_Variables.py b/1. Clustering/Dummy_Variables.py
@@ -0,0 +1,26 @@
+import pandas as pd
+
+# Read dataset
+absenteeism_df = pd.read_csv("Absenteeism_at_work.csv", sep = ";")
+
+# Transform a categorical variable to dummy variables (one hot)
+dummy_var = pd.get_dummies(absenteeism_df["Day of the week"])
+
+# Add dummy_var to the original dataset
+absenteeism_df = pd.concat([absenteeism_df, dummy_var], axis = 1)
+
+# Remove old column to prevent errors
+absenteeism_df = absenteeism_df.drop("Day of the week", axis = 1)
+
+# See the results
+print(absenteeism_df.head())
+
+# ===================== CREDITS =====================
+### Aletta Smits:
+# Big Data and Social Media / Data Learning Class (week 1 - Day 2)
+
+### Rowan Langford:
+# https://towardsdatascience.com/the-dummys-guide-to-creating-dummy-variables-f21faddb1d40
+
+### Shanelynn:
+# https://www.shanelynn.ie/using-pandas-dataframe-creating-editing-viewing-data-in-python/