-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpreprocessingdata.py
More file actions
28 lines (21 loc) · 1.22 KB
/
preprocessingdata.py
File metadata and controls
28 lines (21 loc) · 1.22 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
# Load training data set from CSV file
training_data_df = pd.read_csv("Training.csv")
# Load testing data set from CSV file
test_data_df = pd.read_csv("Testing.csv")
# Data needs to be scaled to a small range like 0 to 1 for the neural
# network to work well.
scaler = MinMaxScaler(feature_range=(0, 1))
# Scale both the training inputs and outputs
scaled_training = scaler.fit_transform(training_data_df)
scaled_testing = scaler.transform(test_data_df)
# Print out the adjustment that the scaler applied to the total_earnings column of data
print("Note: total_earnings values were scaled by multiplying by {:.10f} and adding {:.6f}".format(scaler.scale_[1], scaler.min_[1]))
##Note: total_earnings values were scaled by multiplying by 0.0039682540 and adding 0.000000
# Create new pandas DataFrame objects from the scaled data
scaled_training_df = pd.DataFrame(scaled_training, columns=training_data_df.columns.values)
scaled_testing_df = pd.DataFrame(scaled_testing, columns=test_data_df.columns.values)
# Save scaled data dataframes to new CSV files
scaled_training_df.to_csv("Training_scaled.csv", index=False)
scaled_testing_df.to_csv("Testing_scaled.csv", index=False)