|
| 1 | +import numpy as np |
| 2 | +import pandas as pd |
| 3 | + |
| 4 | +from sklearn import preprocessing |
| 5 | + |
| 6 | + |
| 7 | +# ===================== TYPERS OF TRANFORMATIONS ===================== |
| 8 | +my_data = [1267, 7, 5432, 987, 1703, 123, 9098, 4072, 540, 3078, 54] |
| 9 | + |
| 10 | +# ---- Logarithmic Transformation ---- |
| 11 | +log_data = np.log(my_data) |
| 12 | + |
| 13 | + |
| 14 | +# ---- Normalization: values between 0 and 1 ---- |
| 15 | +# This function expects a 2D array: [my_data] |
| 16 | +normalized_data = preprocessing.normalize([my_data]) |
| 17 | + |
| 18 | +# Remove 1 dimension |
| 19 | +normalized_data_1D = [i for i in normalized_data[0]] |
| 20 | + |
| 21 | +# ---- Standardization: mean: 0, s.dev: 1 ---- |
| 22 | +standardized_data = preprocessing.scale(my_data) |
| 23 | + |
| 24 | +print("Original data: ", my_data) |
| 25 | +print("-------------") |
| 26 | +print("Logarithmic Transformation: ", log_data) |
| 27 | +print("-------------") |
| 28 | +print("Normalized data: ", normalized_data_1D) |
| 29 | +print("-------------") |
| 30 | +print("Standardized data: ", standardized_data) |
| 31 | + |
| 32 | +# ===================== EXAMPLE WITH DATASET ===================== |
| 33 | +absenteeism_df = pd.read_csv("Absenteeism_at_work.csv", sep = ";") |
| 34 | + |
| 35 | +absenteeism_df["Age"] = preprocessing.scale(absenteeism_df["Age"].values) |
| 36 | + |
| 37 | +print(absenteeism_df["Age"]) |
| 38 | + |
| 39 | +# ===================== CREDITS ===================== |
| 40 | +### Scikit: |
| 41 | +# https://scikit-learn.org/stable/modules/preprocessing.html |
| 42 | + |
| 43 | +### Aletta Smits: |
| 44 | +# Big Data and Social Media / Data Learning Class (week 1 - Day 2) |
| 45 | + |
| 46 | +### Robert R.F. DeFilippi |
| 47 | +# https://medium.com/@rrfd/standardize-or-normalize-examples-in-python-e3f174b65dfc |
0 commit comments