Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions Vavmoudakis Actor Critic/Kontoudis.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
function [currentState, time] = Kontoudis(initializations, currentState, xfstate)%, uvec)
global uvec

%% Decrypt Init Vars
T = initializations{6};
N = initializations{8};

time = [];

%% Solve ODE
options = odeset('RelTol',1e-5,'AbsTol',1e-5,'MaxStep',.01);

for i=1:N
sol_fnt = ode45(@(t,x) actorCritic(t,x,xfstate,initializations),[(i-1)*T,i*T],currentState(end,:),options);

time = [time;sol_fnt.x']; % save time
currentState = [currentState;sol_fnt.y']; % save state

uDelayInterpFcn = interp2PWC(uvec,0,i*T); % interpolate control input
x1DelayInterpFcn = interp2PWC(currentState(:,1),0,i*T);
x2DelayInterpFcn = interp2PWC(currentState(:,2),0,i*T);
delays = {uDelayInterpFcn; x1DelayInterpFcn; x2DelayInterpFcn};
initializations(end) = {delays};
end
end
101 changes: 101 additions & 0 deletions Vavmoudakis Actor Critic/actorCritic.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
function [dotx] = actorCritic(t,x, xfstate, params)%, uvec)
global uvec

%% Decompose Parameters Cell
A = params(1);
B = params(2);
M = params(3);
R = params(4);
Pt = params(5);
T = params(6);
Tf = params(7);
alphaa = params(9);
alphac = params(10);
amplitude = params(end-4);
percent = params(end-3);
ufinal = params(end-2);
Wcfinal = params(end-1);
delays = params(end);
% Note that every variable above is a struct, thus we get its value by
% using the {} operator or by using cell2mat
A = cell2mat(A);
B = cell2mat(B);
M = cell2mat(M);
R = cell2mat(R);
T = cell2mat(T);
Wcfinal = cell2mat(Wcfinal);
ufinal = cell2mat(ufinal);

uDelayInterpFcn = delays{1, 1}(1);
x1DelayInterpFcn = delays{1, 1}(2);
x2DelayInterpFcn = delays{1, 1}(3);

Wc = [x(3);x(4);x(5);x(6);x(7);x(8)]; %critic
Wa = [x(9);x(10)]; %actor
p = x(11); %integral

UkUfinal = [xfstate(1)^2 ; xfstate(1)*xfstate(2); xfstate(1)*ufinal; xfstate(2)^2 ; xfstate(2)*ufinal; ufinal^2];
Pfinal = 0.5*(xfstate)'*Pt{1}*(xfstate); % equation 19

% Update control
ud = Wa'*(x(1:2)-xfstate); % equation 17


% State and control delays
uddelay = ppval(uDelayInterpFcn{1},t-T);
xdelay(1) = ppval(x1DelayInterpFcn{1},t-T);
xdelay(2) = ppval(x2DelayInterpFcn{1},t-T);

%Augmented state and Kronecker products
U = [x(1:2)-xfstate;ud-ufinal]; % Augmented state
UkU = [U(1)^2 ; U(1)*U(2); U(1)*ud; U(2)^2 ; U(2)*ud; ud^2]; % U kron U
% This is not the exact UkronU (9x1 matrix), but we do it to lower the
% dimensionality. This way, Wc' would be 1x9, so the matrix multiplication
% can actually happen in ec.
UkUdelay = [xdelay(1)^2 ; xdelay(1)*xdelay(2); xdelay(1)*uddelay; xdelay(2)^2 ;...
xdelay(2)*uddelay; uddelay^2];

Qxx = x(3:5); % equations 14-16
Quu = x(8);
Qxu = [x(6) x(7)]';
Qux = Qxu';

sigma = UkU - UkUdelay;

% integral reinforcement
dp = 0.5*((x(1:2)-xfstate)'*M*(x(1:2)-xfstate) -xdelay(1:2)*M*xdelay(1:2)' ...
+ud'*R*ud - uddelay'*R*uddelay);

% mine
QStar = Wc'*UkU;
uStar = -inv(Quu)*Qux*U(1:2);

% Approximation Errors
ec = p + Wc'*UkU - Wc'*UkUdelay; % Critic approximator error
ecfinal = Pfinal - Wcfinal'*UkUfinal; % Critic approximator error final state
ea = Wa'*U(1:2)+.5*inv(Quu)*Qux*U(1:2); % Actor approximator error
% or x(1:2) - xfstate instead of U(1:2) % or ud instead of Wa'*U(1:2)
% BASICALLY, looking at eq30, ud = Wa'*U(1:2) = Wa'(x(1:2)-xfstate) wants
% to reach u* = inv(Quu)*Qux*U(1:2), that's why ea is defined like this.
% ud is slowly getting to u*, aka
% Wa'*U(1:2) -> inv(Quu)*Qux*U(1:2)


% critic update
dWc = -alphac{1}*((sigma./(sigma'*sigma+1).^2)*ec'+(UkUfinal./((UkUfinal'*UkUfinal+1).^2)*ecfinal')); % eq 22

% actor update
dWa = -alphaa{1}*U(1:2)*ea'; % equation 23

% Persistent Excitation
if t<=(percent{1}/100)*Tf{1}
unew=(ud+amplitude{1}*exp(-0.009*t)*2*(sin(t)^2*cos(t)+sin(2*t)^2*cos(0.1*t)+...
sin(-1.2*t)^2*cos(0.5*t)+sin(t)^5+sin(1.12*t)^2+cos(2.4*t)*sin(2.4*t)^3));
else
unew=ud;
end

dx=A*[U(1);U(2)]+B*unew;
uvec = [uvec;unew];
dotx = [dx;dWc;dWa;dp;QStar;uStar]; % augmented state
end
5 changes: 4 additions & 1 deletion Vavmoudakis Actor Critic/baby_fnt.m
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,10 @@
ecfinal = Pfinal - Wcfinal'*UkUfinal; % Critic approximator error final state
ea = Wa'*U(1:2)+.5*inv(Quu)*Qux*U(1:2); % Actor approximator error
% or x(1:2) - xfstate instead of U(1:2) % or ud instead of Wa'*U(1:2)

% BASICALLY, looking at eq30, ud = Wa'*U(1:2) = Wa'(x(1:2)-xfstate) wants
% to reach u* = inv(Quu)*Qux*U(1:2), that's why ea is defined like this.
% ud is slowly getting to u*, aka
% Wa'*U(1:2) -> inv(Quu)*Qux*U(1:2)

% critic update
dWc = -alphac*((sigma./(sigma'*sigma+1).^2)*ec'+(UkUfinal./((UkUfinal'*UkUfinal+1).^2)*ecfinal')); % eq 22
Expand Down
6 changes: 3 additions & 3 deletions Vavmoudakis Actor Critic/interp2PWC.m
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
% piecewise continuous function given the initial and final time provided

function [f] = interp2PWC(y,xi,xf)
[m,n] = size(y); % get size of the input vec
[m,~] = size(y); % get size of the input vec
x = linspace(xi,xf,m); %create x vector corresponding to y
[c,ia,ic] = unique(y,'stable'); % figure out who and where are the unique points
[c,ia,~] = unique(y,'stable'); % figure out who and where are the unique points
f = mkpp([xi x(ia(2:end)') xf],c); % interpolate in a piecewise continuous fcn with corresponding time vec
end
end
12 changes: 3 additions & 9 deletions Vavmoudakis Actor Critic/main.m
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
global T Tf % ODE paramters
global alphaa alphac % GD parameters
global amplitude percent % Noise parameters
global uvec uDelayInterpFcn x1DelayInterpFcn x2DelayInterpFcn sigma UkU UkUdelay
global uvec uDelayInterpFcn x1DelayInterpFcn x2DelayInterpFcn

A = [-1 0;0 -2];
B = [0 1]';
[n,m]=size(B); % States and controls
Expand Down Expand Up @@ -51,9 +52,6 @@

t_save_fnt = []; % time vec
x_save_fnt = [x0state;Wc0;Wa0;p0;QStar0;uStar0]';%;xfstate]'; % build the initial cond state vec. This is x initially
sigma_save_fnt = [];
UkU_save = [];
UkUdelay_save =[];

uDelayInterpFcn = interp2PWC(0,0,1);
x1DelayInterpFcn = interp2PWC(x_save_fnt(:,1),0,0);
Expand All @@ -64,15 +62,11 @@
for i=1:N
options = odeset('RelTol',1e-5,'AbsTol',1e-5,'MaxStep',.01); % 'OutputFcn',@odeplot,
tic;
sol_fnt = ode23(@baby_fnt,[(i-1)*T,i*T],x_save_fnt(end,:),options);
sol_fnt = ode45(@baby_fnt,[(i-1)*T,i*T],x_save_fnt(end,:),options);
toc;

t_save_fnt = [t_save_fnt;sol_fnt.x']; % save time
x_save_fnt = [x_save_fnt;sol_fnt.y']; % save state
sigma_save_fnt = [sigma_save_fnt sigma];
UkU_save = [UkU_save [sqrt(UkU(1)); sqrt(UkU(4))]];
UkUdelay_save =[UkUdelay_save [sqrt(UkUdelay(1)); sqrt(UkUdelay(4))]];


uDelayInterpFcn = interp2PWC(uvec,0,i*T); % interpolate control input
x1DelayInterpFcn = interp2PWC(x_save_fnt(:,1),0,i*T);
Expand Down
194 changes: 194 additions & 0 deletions Vavmoudakis Actor Critic/reachabilityMain.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
function reachabilityMain()
global uvec
close all;
clc;

%% Dynamics and System Parameters
A = [-1 0;0 -2];
B = [0 1]';
[~,m] = size(B); % States and controls
M = diag([1,.1]);
R = .1;
Pt = diag([.5,.5]); % Ricatti at final time

% ODEs parameters
T = 0.05; % Delay
Tf = 6; % Finite horizon in seconds
N = Tf/T; % Number of simulation rounds

% Gradient descent parameters
alphac = 90;
alphaa = 1.5;

% Noise paramaters
amplitude = .1; % changes the amplitude of the PE (kyriakos pe)
percent = 50;

Wc0 = [rand(5,1);10*rand(1)]; % critic weights
Wa0 = .5*ones(2,1); % actor weights
ufinal = 0.001;
Wcfinal = 12*ones(6,1);

% Initial condition of the augmented system
x0state = [32 -15]'; % initial state
xfstate = [-11.5 7.5]'; % final state

p0 = x0state'*M*x0state; % integral RL, initil control 0 % from equation 8

QStar0 = rand(m);
uStar0 = rand(m);

x_save_fnt = [x0state;Wc0;Wa0;p0;QStar0;uStar0]'; % build the initial cond state vec. This is x initially

uDelayInterpFcn = interp2PWC(0,0,1);
x1DelayInterpFcn = interp2PWC(x_save_fnt(:,1),0,0);
x2DelayInterpFcn = interp2PWC(x_save_fnt(:,2),0,0);

delays = {uDelayInterpFcn; x1DelayInterpFcn; x2DelayInterpFcn};
initializations = {A; B; M; R; Pt; T; Tf; N; ...
alphaa; alphac; amplitude; ...
percent; ufinal; Wcfinal; delays};

%% Set target
[xVal, yVal] = circle(-11.5,7.5,4);%(0,0,8);
figure
hold on;
plot(xVal,yVal, 'r');
plot(x0state(1),x0state(2),'.b')
xlim([-20 40])
ylim([-20 40])
title('Initial Point(Blue) and Target(Red)')
pause(0.1);
hold off;

% Retrieve actual number of points of the target
[~, xPoints] = size(xVal); % no need for yPoints cause they are the same

allFinalStatesVector = [];

%% Call Kontoudis' algorithm
for i=1:xPoints
xfstate = [xVal(i) yVal(i)]';
tic
uvec = [];
[stateVector, time] = Kontoudis(initializations, x_save_fnt, xfstate);
allFinalStatesVector = [allFinalStatesVector; stateVector(end, 1:2)];
toc
end

%% Plots
figure
hold on;
plot(xVal,yVal, 'r'); hold on; % target
plot(x0state(1),x0state(2),'.b'); hold on; % initial State
plot(allFinalStatesVector(1:end,1),allFinalStatesVector(1:end,2),'-b'); hold on;
xlim([-20 40])
ylim([-20 40])
title('Reachable Set')
grid on; hold off;

figure
hold on;
plot(xVal,yVal, 'r'); hold on;
plot(allFinalStatesVector(1:end,1),allFinalStatesVector(1:end,2),'-b'); hold on;
plot(x0state(1),x0state(2),'.b'); hold on;
% find closest point of the target to the initial point
dist = sqrt((x0state(1) - allFinalStatesVector(:,1)).^2 + (x0state(2) - allFinalStatesVector(:,2)).^2);
[~, indexOfMin] = min(dist);
closestX = xVal(indexOfMin);
closestY = yVal(indexOfMin);
% actually plot
plot(closestX, closestY, '-g'); hold on;
line([x0state(1), closestX], [x0state(2), closestY], 'LineWidth', 2, 'Color', 'g');
xlim([-20 40])
ylim([-20 40])
title('Optimal Trajectory to Reachable Set = Target')
grid on; hold off;

figure
hold on;
plot(xVal,yVal, 'r'); hold on;
plot(x0state(1),x0state(2),'.b'); hold on;
% find closest point of the target to the initial point
dist = sqrt((x0state(1) - xVal(:,1))^2 + (x0state(2) - yVal(:,1))^2);
[~, indexOfMin] = min(dist);
closestX = xVal(indexOfMin);
closestY = yVal(indexOfMin);
% actually plot
plot(closestX, closestY, '-g'); hold on;
line([x0state(1), closestX], [x0state(2), closestY], 'LineWidth', 2, 'Color', 'g');
xlim([-20 40])
ylim([-20 40])
title('Optimal Trajectory to Target = Reachable Set')
grid on; hold off;

% x1, x2 to show at the reachability DID happen; it reached xfstate
figure
set(gca,'FontSize',26); hold on;
plot(time,stateVector(1:end-1,1),'LineWidth',2,'Color', 'b'); hold on;
plot(time,stateVector(1:end-1,2),'LineWidth',2,'Color', 'm'); hold on;
xlabel('Time [s]'); ylabel('States'); legend('x_1','x_2');
grid on; hold off;

figure
set(gca,'FontSize',26)
hold on;
for i=1:6
plot(time,stateVector(1:end-1,i+2),'-','LineWidth',2)
hold on;
end
xlabel('Time [s]');ylabel('Critic Weights W_c');
legend('Wc_1','Wc_2','Wc_3','Wc_4','Wc_5','Wc_6');
grid on;
hold off;

% mine
figure
set(gca,'FontSize',26)
hold on;
plot(time,stateVector(1:end-1,9),'-','LineWidth',2)
plot(time,stateVector(1:end-1,10),'-','LineWidth',2)
xlabel('Time [s]');ylabel('Actor Weights W_a');
legend('Wa_1','Wa_2');
grid on;
hold off;

% For the Q = Value function, either plot equation 19 or Q* before eq 16
% Equation 19 is a number
valueFunctionLastExpectedValue = xfstate'*Pt*xfstate; % integral RL, initil control 0 % from equation 8
disp(valueFunctionLastExpectedValue)

valueFunctionLastActualValue = [stateVector(end,1); stateVector(end,2)]'*Pt*[stateVector(end,1); stateVector(end,2)]; % integral RL, initil control 0 % from equation 8
disp(valueFunctionLastActualValue)

% Q* before eq 16
figure
set(gca,'FontSize',26)
hold on;
plot(time,stateVector(1:end-1,12),'-','LineWidth',2)
xlabel('Time [s]');ylabel('Q*');
grid on;
hold off;

% u* from eq 15
figure
set(gca,'FontSize',26)
hold on;
plot(time,stateVector(1:end-1, 13),'-','LineWidth',2)
xlabel('Time [s]');ylabel('u*');
grid on;
hold off;
end

function [xVal, yVal] = circle(x,y,r)
% Creates a target circle and returns its points
% x and y are the coordinates of the center of the circle
% r is the radius of the circle
% 0.01 is the angle step, bigger values will draw the circle faster but
% you might notice imperfections (not very smooth)
ang = linspace(0,2*pi,100);
xp = r*cos(ang);
yp = r*sin(ang);
xVal = x+xp;
yVal = y+yp;
end