-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathq_learning.py
More file actions
258 lines (193 loc) · 8.95 KB
/
q_learning.py
File metadata and controls
258 lines (193 loc) · 8.95 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
import numpy as np
import random
from switch_migration import calculate_controller_cluster_load, calculate_controller_cluster_load_ratio, calculate_controller_load_capacity, calculate_controller_mean_load_ratio, calculate_discrete_coefficient
# Supporting functions for Q-Learning
def egreedy_policy(state, epsilon, Q_matrix):
'''
This function returns the action that should be taken
Given the state and epsilon
'''
if np.random.random() < epsilon:
return np.random.choice(3)
else:
return np.random.choice(np.flatnonzero(Q_matrix[state] == np.max(Q_matrix[state])))
def update_epsilon(epsilon, epsilon_dec, epsilon_min):
'''
This function updates the epsilon value needed for the EGreedy approach
'''
if epsilon > epsilon_min:
epsilon -= epsilon_dec
else:
epsilon = epsilon_min
return epsilon
def q_learning(controller_sets_Q, controllers_Q, k, switch_count, load_array):
'''
When the migration efficiency can't be optimized by using the greedy strategy only
to make decisions, exploring non-greedy situations at the case of probability in each
decision and combining ε-greedy and decision-making to select migration triples, so as
to obtain global optimal switch migration decision based on reinforcement learning
'''
# Initialising a few things for Q-Learning
# Q_matrix is initialised in a way that each state is represented as a tuple and mapped to 3 actions
# namely do nothing, add a switch or remove a switch.
Q_matrix = {tuple([0]*k): [0]*3}
# Setting the values for
# gamma - discount factor
# alpha - learning rate
gamma = 0.99
def alpha(n): return 1/(pow(n+1, 0.6))
def b(n): return 1/(n+1)
# Handling epsilon values for epgreedy
epsilon = 0.7
epsilon_dec = 0.0025
epsilon_min = 0.05
# Setting the initial state to all zeros
state = tuple([0]*k)
# Store a list of states to plot later
states_list = []
# List to store discounted rewards
disc_reward_list = []
l_n_list = []
cum_disc_reward = 0
# List to store number of switch exchanges
num_switch_exchanges = 0
num_switch_exchanges_list = []
# Store Sn & Smax to generate the Lagrange multiplier
S_max = 4000
S_n = 0
l_n = 1
for n in range(0, len(load_array)):
print('\n----------------Iteration {}-----------------\n'.format(n))
# Use the previous state - (initial or transient) and reconfigure
# it with the new data obtained. Change the state to inculcate the
# new flows in the switches.
old_load = [0]*switch_count if n == 0 else load_array[n-1]
new_load = load_array[n]
# Find the switch that has a different flow count this time (add or remove)
for i in range(0, switch_count):
if(old_load[i] != new_load[i]):
diff_switch = i
break
# Modify the state after obtaining the appropriate controller cluster and
# change the number of flows - increase or decrease based on event
for i in range(0, len(controller_sets_Q)):
if diff_switch in controller_sets_Q[i]:
state = list(state)
state[i] += new_load[diff_switch] - old_load[diff_switch]
state = tuple(state)
break
states_list.append(state)
# ------------------------------ Start Q-Learning ------------------------------
# Randomly choose a controller to transfer switch from/to
C_i = np.random.choice(k)
# Add the state to the Q-matrix dict if not already present
if state in Q_matrix:
print(
"Already present state {} with Q-value {}".format(state, Q_matrix[state]))
else:
Q_matrix[state] = [0]*3
# Find the action corresponding to the state based on an E-Greedy approach
action = egreedy_policy(state, epsilon, Q_matrix)
# Safegaurd against impossible actions -- check/change
if(state[C_i] == 0 and action == 2):
action = 0
print("Action = " + ["do nothing", "add 1 switch to {}".format(
controllers_Q[C_i]), "remove 1 switch from {}".format(controllers_Q[C_i])][action])
cost = 0
# Find the next state and the reward corresponding to the action
if action == 0:
# Do nothing
next_state = state
num_switch_exchanges_list.append(num_switch_exchanges)
elif action == 1:
# Add 1 switch to Ci
# In this case choose the most overloaded switch
# The switch should not be in the same controller as destination
S_n += pow(alpha(n), n)
num_switch_exchanges += 1
num_switch_exchanges_list.append(num_switch_exchanges)
controller_dest = C_i
controller_src = controller_dest
switch = 0
temp_load = -np.inf
for i in range(0, switch_count):
if(load_array[n][i] >= temp_load and (i not in controller_sets_Q[controller_dest])):
switch = i
temp_load = load_array[n][i]
for i in range(0, k):
if(switch in controller_sets_Q[i]):
controller_src = i
no_of_flows = load_array[n][switch]
cost = no_of_flows
# Transfer the switch between controllers
controller_sets_Q[controller_src].remove(switch)
controller_sets_Q[controller_dest].append(switch)
print("Source controller = {}, Destination controller = {}, Switch selected = {}, Number of flows on switch = {}".format(
controller_src, controller_dest, switch, no_of_flows))
next_state = state
next_state = list(next_state)
next_state[controller_src] -= no_of_flows
next_state[controller_dest] += no_of_flows
next_state = tuple(next_state)
else:
# Remove 1 switch from Ci
# In this case choose the most underloaded controller
# Transfer a random switch from the source controller to this destination controller
S_n += pow(alpha(n), n)
num_switch_exchanges += 1
num_switch_exchanges_list.append(num_switch_exchanges)
controller_src = C_i
controller_dest = controller_src
temp_flows = np.inf
for i in range(0, k):
if(state[i] <= temp_flows and i != controller_src):
controller_dest = i
temp_flows = state[i]
switch = random.choice(controller_sets_Q[controller_src])
no_of_flows = load_array[n][switch]
cost = no_of_flows
# Transfer the switch between controllers
controller_sets_Q[controller_src].remove(switch)
controller_sets_Q[controller_dest].append(switch)
print("Source controller = {}, Destination controller = {}, Switch selected = {}, Number of flows on switch = {}".format(
controller_src, controller_dest, switch, no_of_flows))
next_state = state
next_state = list(next_state)
next_state[controller_src] -= no_of_flows
next_state[controller_dest] += no_of_flows
next_state = tuple(next_state)
print("Current State = {}".format(state))
print("Next State = {}".format(next_state))
# Check if the Q_matrix dict has a tuple or not. If no, create an entry
if next_state in Q_matrix:
print(
"Already present next state {} and Q-value {}".format(next_state, Q_matrix[next_state]))
else:
Q_matrix[next_state] = [0]*3
# Reward calculation
# The reward in this case is given as -
# Reward = -D'ij; where D'ij is the new discrete coefficient of the system
controller_cluster_load = {}
for i in range(0, k):
controller_cluster_load[controllers_Q[i]] = next_state[i]
controller_cluster_load_ratio = calculate_controller_cluster_load_ratio(
controller_cluster_load, load_array)
Dij_new = calculate_discrete_coefficient(controller_cluster_load_ratio)
reward = -Dij_new
l_n += b(n)*(S_n - S_max)
if(l_n < 0):
l_n = 0
if(l_n > 10000):
l_n = 10000
discounted_reward = reward*pow(gamma, n)
cum_disc_reward += discounted_reward
print(cum_disc_reward)
disc_reward_list.append(cum_disc_reward)
l_n_list.append(l_n)
Q_matrix[state][action] = (1-alpha(n))*Q_matrix[state][action] + \
alpha(n)*((reward - l_n*cost) + gamma*max(Q_matrix[next_state]))
# Update the state to the new state
state = next_state
# Update epsilon for the next iteration
epsilon = update_epsilon(epsilon, epsilon_dec, epsilon_min)
return states_list, disc_reward_list, num_switch_exchanges_list