-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
90 lines (76 loc) · 2.49 KB
/
main.py
File metadata and controls
90 lines (76 loc) · 2.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import pandas as pd
import numpy as np
from data_preprocessing import normalization
from data_preprocessing import build_s_a
from rdpg_agent import RDPG
import matplotlib.pyplot as plt
import os
##################### hyper parameters ####################
N_FEATURES = 6
A_LOW = 0
A_HIGH = 1
LR_A = 0.001
LR_C = 0.003
N_ACTOR_HIDDEN = 30
N_CRITIC_HIDDEN = 30
MAX_EPISODES = 20
MAX_STEPS = 1000
GAMMA = 0.9 # 折扣因子
TAU = 0.1 # 软更新因子
MEMORY_CAPACITY = 10000 #记忆库大小
BATCH_SIZE = 128 #批梯度下降的m
#############################################################
#Load data
data_dir = 'V6.csv' #directory of time series data
data = pd.read_csv(data_dir,encoding = 'gbk')
data = data.iloc[:,0]
#Build state matrix and best action
state,action = build_s_a(data,N_FEATURES,1)
#Data split
SPLIT_RATE = 0.75
split_index = round(len(state)*SPLIT_RATE)
train_s,train_a = state[:split_index],action[:split_index]
test_s,test_a = state[split_index:],action[split_index:]
#Normalization
train_s_scaled,test_s_scaled = normalization(train_s,test_s)
A,B = train_a.max(),train_a.min()
train_a_scaled,test_a_scaled = (train_a-B)/(A-B),(test_a-B)/(A-B)
# Training
rdpg = RDPG(N_FEATURES, N_ACTOR_HIDDEN, N_CRITIC_HIDDEN, LR_A, LR_C, MEMORY_CAPACITY, BATCH_SIZE, GAMMA, TAU)
for episode in range(MAX_EPISODES):
index = np.random.choice(range(len(train_s_scaled)))
s = train_s_scaled[index]
ep_reward = 0
for step in range(MAX_STEPS):
a = rdpg.choose_action(s)
# print(a.shape, train_a_scaled[index].shape)
r = -abs(a-train_a_scaled[index])
ep_reward += r
index += 1
s_ = train_s_scaled[index]
rdpg.store_transition(s,a,r,s_)
rdpg.learn()
if (index == len(train_s_scaled)-1) or (step == MAX_STEPS-1):
print('Episode %d : %.2f'%(episode,ep_reward))
break
s = s_
# Testing
pred = []
for i in range(len(test_s_scaled)):
state = test_s_scaled[i]
action = rdpg.choose_action(state)
pred.append(action)
pred = [pred[i][0] for i in range(len(test_s_scaled))]
pred = np.array(pred)
p_max, p_min = pred.max(),pred.min()
pred = (pred-p_min)/(p_max-p_min)
pred = pd.Series(pred)
# pred = pred*(A-B)+B
test_max, test_min = test_a.max(),test_a.min()
pred = (pred*(test_max-test_min))+test_min
actual = pd.Series(test_a)
# actual = actual*(A-B)+B
plt.plot(pred,label='pred')
plt.plot(actual,label='true')
plt.legend()
plt.show()