Costopoulos · Costopoulos · Jul 18, 2022 · Jul 18, 2022 · Jul 18, 2022 · Jul 18, 2022
diff --git a/Vavmoudakis Actor Critic/Kontoudis.m b/Vavmoudakis Actor Critic/Kontoudis.m
@@ -0,0 +1,25 @@
+function [currentState, time] = Kontoudis(initializations, currentState, xfstate)%, uvec)
+global uvec
+
+%% Decrypt Init Vars
+T = initializations{6};
+N = initializations{8};
+
+time = [];
+
+%% Solve ODE
+options = odeset('RelTol',1e-5,'AbsTol',1e-5,'MaxStep',.01);
+
+for i=1:N
+    sol_fnt = ode45(@(t,x) actorCritic(t,x,xfstate,initializations),[(i-1)*T,i*T],currentState(end,:),options);
+
+    time = [time;sol_fnt.x'];    % save time
+    currentState = [currentState;sol_fnt.y'];    % save state
+
+    uDelayInterpFcn = interp2PWC(uvec,0,i*T);    % interpolate control input
+    x1DelayInterpFcn = interp2PWC(currentState(:,1),0,i*T);
+    x2DelayInterpFcn = interp2PWC(currentState(:,2),0,i*T);
+    delays = {uDelayInterpFcn; x1DelayInterpFcn; x2DelayInterpFcn};
+    initializations(end) = {delays};
+end
+end
diff --git a/Vavmoudakis Actor Critic/actorCritic.m b/Vavmoudakis Actor Critic/actorCritic.m
@@ -0,0 +1,101 @@
+function [dotx] = actorCritic(t,x, xfstate, params)%, uvec)
+global uvec
+
+%% Decompose Parameters Cell
+A = params(1);
+B = params(2);
+M = params(3);
+R = params(4);
+Pt = params(5);
+T = params(6);
+Tf = params(7);
+alphaa = params(9);
+alphac = params(10);
+amplitude = params(end-4);
+percent = params(end-3);
+ufinal = params(end-2);
+Wcfinal = params(end-1);
+delays = params(end);
+% Note that every variable above is a struct, thus we get its value by
+% using the {} operator or by using cell2mat
+A = cell2mat(A);
+B = cell2mat(B);
+M = cell2mat(M);
+R = cell2mat(R);
+T = cell2mat(T);
+Wcfinal = cell2mat(Wcfinal);
+ufinal = cell2mat(ufinal);
+
+uDelayInterpFcn = delays{1, 1}(1);
+x1DelayInterpFcn = delays{1, 1}(2);
+x2DelayInterpFcn = delays{1, 1}(3);
+
+Wc = [x(3);x(4);x(5);x(6);x(7);x(8)]; %critic
+Wa = [x(9);x(10)]; %actor
+p = x(11); %integral 
+
+UkUfinal = [xfstate(1)^2 ; xfstate(1)*xfstate(2); xfstate(1)*ufinal; xfstate(2)^2 ; xfstate(2)*ufinal; ufinal^2];
+Pfinal = 0.5*(xfstate)'*Pt{1}*(xfstate); % equation 19
+
+% Update control
+ud = Wa'*(x(1:2)-xfstate); % equation 17
+
+
+% State and control delays
+uddelay = ppval(uDelayInterpFcn{1},t-T);
+xdelay(1) = ppval(x1DelayInterpFcn{1},t-T);
+xdelay(2) = ppval(x2DelayInterpFcn{1},t-T);
+
+%Augmented state and Kronecker products
+U = [x(1:2)-xfstate;ud-ufinal]; % Augmented state
+UkU = [U(1)^2 ; U(1)*U(2); U(1)*ud; U(2)^2 ; U(2)*ud; ud^2]; % U kron U
+% This is not the exact UkronU (9x1 matrix), but we do it to lower the
+% dimensionality. This way, Wc' would be 1x9, so the matrix multiplication
+% can actually happen in ec.
+UkUdelay = [xdelay(1)^2 ; xdelay(1)*xdelay(2); xdelay(1)*uddelay; xdelay(2)^2 ;...
+    xdelay(2)*uddelay; uddelay^2];
+
+Qxx = x(3:5); % equations 14-16
+Quu = x(8);
+Qxu = [x(6) x(7)]';
+Qux = Qxu';
+
+sigma = UkU - UkUdelay;
+
+% integral reinforcement
+dp =  0.5*((x(1:2)-xfstate)'*M*(x(1:2)-xfstate) -xdelay(1:2)*M*xdelay(1:2)' ...
+    +ud'*R*ud - uddelay'*R*uddelay); 
+
+% mine
+QStar = Wc'*UkU;
+uStar = -inv(Quu)*Qux*U(1:2);
+
+% Approximation Errors 
+ec = p + Wc'*UkU - Wc'*UkUdelay; % Critic approximator error
+ecfinal = Pfinal - Wcfinal'*UkUfinal; % Critic approximator error final state
+ea = Wa'*U(1:2)+.5*inv(Quu)*Qux*U(1:2); % Actor approximator error 
+% or x(1:2) - xfstate instead of U(1:2) % or ud instead of Wa'*U(1:2)
+% BASICALLY, looking at eq30, ud = Wa'*U(1:2) = Wa'(x(1:2)-xfstate) wants
+% to reach u* = inv(Quu)*Qux*U(1:2), that's why ea is defined like this.
+% ud is slowly getting to u*, aka
+% Wa'*U(1:2) -> inv(Quu)*Qux*U(1:2)
+
+
+% critic update
+dWc = -alphac{1}*((sigma./(sigma'*sigma+1).^2)*ec'+(UkUfinal./((UkUfinal'*UkUfinal+1).^2)*ecfinal')); % eq 22
+
+% actor update
+dWa = -alphaa{1}*U(1:2)*ea'; % equation 23
+
+% Persistent Excitation
+if t<=(percent{1}/100)*Tf{1}
+    unew=(ud+amplitude{1}*exp(-0.009*t)*2*(sin(t)^2*cos(t)+sin(2*t)^2*cos(0.1*t)+...
+        sin(-1.2*t)^2*cos(0.5*t)+sin(t)^5+sin(1.12*t)^2+cos(2.4*t)*sin(2.4*t)^3));
+else
+    unew=ud;
+end
+
+dx=A*[U(1);U(2)]+B*unew;
+uvec = [uvec;unew];
+dotx = [dx;dWc;dWa;dp;QStar;uStar]; % augmented state
+end
diff --git a/Vavmoudakis Actor Critic/baby_fnt.m b/Vavmoudakis Actor Critic/baby_fnt.m
@@ -52,7 +52,10 @@
 ecfinal = Pfinal - Wcfinal'*UkUfinal; % Critic approximator error final state
 ea = Wa'*U(1:2)+.5*inv(Quu)*Qux*U(1:2); % Actor approximator error 
 % or x(1:2) - xfstate instead of U(1:2) % or ud instead of Wa'*U(1:2)
-
+% BASICALLY, looking at eq30, ud = Wa'*U(1:2) = Wa'(x(1:2)-xfstate) wants
+% to reach u* = inv(Quu)*Qux*U(1:2), that's why ea is defined like this.
+% ud is slowly getting to u*, aka
+% Wa'*U(1:2) -> inv(Quu)*Qux*U(1:2)
 
 % critic update
 dWc = -alphac*((sigma./(sigma'*sigma+1).^2)*ec'+(UkUfinal./((UkUfinal'*UkUfinal+1).^2)*ecfinal')); % eq 22

diff --git a/Vavmoudakis Actor Critic/interp2PWC.m b/Vavmoudakis Actor Critic/interp2PWC.m
@@ -2,8 +2,8 @@
 % piecewise continuous function given the initial and final time provided
 
 function [f] = interp2PWC(y,xi,xf)
-[m,n] = size(y); % get size of the input vec
+[m,~] = size(y); % get size of the input vec
 x = linspace(xi,xf,m); %create x vector corresponding to y
-[c,ia,ic] = unique(y,'stable'); % figure out who and where are the unique points
+[c,ia,~] = unique(y,'stable'); % figure out who and where are the unique points
 f = mkpp([xi x(ia(2:end)') xf],c); % interpolate in a piecewise continuous fcn with corresponding time vec
-end
+end
diff --git a/Vavmoudakis Actor Critic/main.m b/Vavmoudakis Actor Critic/main.m
@@ -11,7 +11,8 @@
 global T Tf % ODE paramters
 global alphaa alphac % GD parameters
 global amplitude percent % Noise parameters
-global uvec uDelayInterpFcn x1DelayInterpFcn x2DelayInterpFcn sigma UkU UkUdelay
+global uvec uDelayInterpFcn x1DelayInterpFcn x2DelayInterpFcn
+
 A  = [-1 0;0 -2];
 B  = [0 1]';
 [n,m]=size(B); % States and controls
@@ -51,9 +52,6 @@
 
 t_save_fnt = [];        % time vec
 x_save_fnt = [x0state;Wc0;Wa0;p0;QStar0;uStar0]';%;xfstate]'; % build the initial cond state vec. This is x initially
-sigma_save_fnt = [];
-UkU_save = [];
-UkUdelay_save =[];
 
 uDelayInterpFcn = interp2PWC(0,0,1);
 x1DelayInterpFcn = interp2PWC(x_save_fnt(:,1),0,0);
@@ -64,15 +62,11 @@
 for i=1:N
     options = odeset('RelTol',1e-5,'AbsTol',1e-5,'MaxStep',.01); % 'OutputFcn',@odeplot,
     tic;
-    sol_fnt = ode23(@baby_fnt,[(i-1)*T,i*T],x_save_fnt(end,:),options);
+    sol_fnt = ode45(@baby_fnt,[(i-1)*T,i*T],x_save_fnt(end,:),options);
     toc;
 
     t_save_fnt = [t_save_fnt;sol_fnt.x'];    % save time
     x_save_fnt = [x_save_fnt;sol_fnt.y'];    % save state
-    sigma_save_fnt = [sigma_save_fnt sigma];
-    UkU_save = [UkU_save [sqrt(UkU(1)); sqrt(UkU(4))]];
-    UkUdelay_save =[UkUdelay_save [sqrt(UkUdelay(1)); sqrt(UkUdelay(4))]];
-
 
     uDelayInterpFcn = interp2PWC(uvec,0,i*T);    % interpolate control input
     x1DelayInterpFcn = interp2PWC(x_save_fnt(:,1),0,i*T);

diff --git a/Vavmoudakis Actor Critic/reachabilityMain.m b/Vavmoudakis Actor Critic/reachabilityMain.m
@@ -0,0 +1,194 @@
+function reachabilityMain()
+global uvec
+close all;
+clc;
+
+%% Dynamics and System Parameters
+A  = [-1 0;0 -2];
+B  = [0 1]';
+[~,m] = size(B); % States and controls
+M  = diag([1,.1]);
+R  = .1;
+Pt = diag([.5,.5]); % Ricatti at final time
+
+% ODEs parameters
+T  = 0.05; % Delay
+Tf = 6; % Finite horizon in seconds
+N  = Tf/T; % Number of simulation rounds
+
+% Gradient descent parameters
+alphac = 90;
+alphaa = 1.5;
+
+% Noise paramaters
+amplitude = .1; % changes the amplitude of the PE (kyriakos pe)
+percent = 50;
+
+Wc0 = [rand(5,1);10*rand(1)]; % critic weights
+Wa0 = .5*ones(2,1); % actor weights
+ufinal = 0.001;
+Wcfinal = 12*ones(6,1);
+
+% Initial condition of the augmented system 
+x0state = [32 -15]';  % initial state
+xfstate = [-11.5 7.5]';  % final state
+
+p0 = x0state'*M*x0state;  % integral RL, initil control 0 % from equation 8
+
+QStar0 = rand(m);
+uStar0 = rand(m);
+
+x_save_fnt = [x0state;Wc0;Wa0;p0;QStar0;uStar0]'; % build the initial cond state vec. This is x initially
+
+uDelayInterpFcn = interp2PWC(0,0,1);
+x1DelayInterpFcn = interp2PWC(x_save_fnt(:,1),0,0);
+x2DelayInterpFcn = interp2PWC(x_save_fnt(:,2),0,0);
+
+delays = {uDelayInterpFcn; x1DelayInterpFcn; x2DelayInterpFcn};
+initializations = {A; B; M; R; Pt; T; Tf; N; ...
+                   alphaa; alphac; amplitude; ...
+                   percent; ufinal; Wcfinal; delays};
+
+%% Set target
+[xVal, yVal] = circle(-11.5,7.5,4);%(0,0,8);
+figure 
+hold on;
+plot(xVal,yVal, 'r');
+plot(x0state(1),x0state(2),'.b')
+xlim([-20 40])
+ylim([-20 40])
+title('Initial Point(Blue) and Target(Red)')
+pause(0.1);
+hold off;
+
+% Retrieve actual number of points of the target
+[~, xPoints] = size(xVal); % no need for yPoints cause they are the same
+
+allFinalStatesVector = [];
+
+%% Call Kontoudis' algorithm
+for i=1:xPoints
+    xfstate = [xVal(i) yVal(i)]';
+    tic
+    uvec = [];
+    [stateVector, time] = Kontoudis(initializations, x_save_fnt, xfstate);
+    allFinalStatesVector = [allFinalStatesVector; stateVector(end, 1:2)];
+    toc
+end
+
+%% Plots
+figure
+hold on;
+plot(xVal,yVal, 'r'); hold on; % target
+plot(x0state(1),x0state(2),'.b'); hold on; % initial State
+plot(allFinalStatesVector(1:end,1),allFinalStatesVector(1:end,2),'-b'); hold on;
+xlim([-20 40])
+ylim([-20 40])
+title('Reachable Set')
+grid on; hold off;
+
+figure
+hold on;
+plot(xVal,yVal, 'r'); hold on;
+plot(allFinalStatesVector(1:end,1),allFinalStatesVector(1:end,2),'-b'); hold on;
+plot(x0state(1),x0state(2),'.b'); hold on;
+% find closest point of the target to the initial point
+dist = sqrt((x0state(1) - allFinalStatesVector(:,1)).^2 + (x0state(2) - allFinalStatesVector(:,2)).^2);
+[~, indexOfMin] = min(dist);
+closestX = xVal(indexOfMin);
+closestY = yVal(indexOfMin);
+% actually plot
+plot(closestX, closestY, '-g'); hold on;
+line([x0state(1), closestX], [x0state(2), closestY], 'LineWidth', 2, 'Color', 'g');
+xlim([-20 40])
+ylim([-20 40])
+title('Optimal Trajectory to Reachable Set = Target')
+grid on; hold off;
+
+figure
+hold on;
+plot(xVal,yVal, 'r'); hold on;
+plot(x0state(1),x0state(2),'.b'); hold on;
+% find closest point of the target to the initial point
+dist = sqrt((x0state(1) - xVal(:,1))^2 + (x0state(2) - yVal(:,1))^2);
+[~, indexOfMin] = min(dist);
+closestX = xVal(indexOfMin);
+closestY = yVal(indexOfMin);
+% actually plot
+plot(closestX, closestY, '-g'); hold on;
+line([x0state(1), closestX], [x0state(2), closestY], 'LineWidth', 2, 'Color', 'g');
+xlim([-20 40])
+ylim([-20 40])
+title('Optimal Trajectory to Target = Reachable Set')
+grid on; hold off;
+
+% x1, x2 to show at the reachability DID happen; it reached xfstate
+figure 
+set(gca,'FontSize',26); hold on;
+plot(time,stateVector(1:end-1,1),'LineWidth',2,'Color', 'b'); hold on;
+plot(time,stateVector(1:end-1,2),'LineWidth',2,'Color', 'm'); hold on;
+xlabel('Time [s]'); ylabel('States'); legend('x_1','x_2');
+grid on; hold off;
+
+figure 
+set(gca,'FontSize',26)
+hold on;
+for i=1:6
+    plot(time,stateVector(1:end-1,i+2),'-','LineWidth',2)
+    hold on;
+end
+xlabel('Time [s]');ylabel('Critic Weights W_c');
+legend('Wc_1','Wc_2','Wc_3','Wc_4','Wc_5','Wc_6');
+grid on;
+hold off;
+
+% mine
+figure 
+set(gca,'FontSize',26)
+hold on;
+plot(time,stateVector(1:end-1,9),'-','LineWidth',2)
+plot(time,stateVector(1:end-1,10),'-','LineWidth',2)
+xlabel('Time [s]');ylabel('Actor Weights W_a');
+legend('Wa_1','Wa_2');
+grid on;
+hold off;
+
+% For the Q = Value function, either plot equation 19 or Q* before eq 16
+% Equation 19 is a number
+valueFunctionLastExpectedValue = xfstate'*Pt*xfstate;  % integral RL, initil control 0 % from equation 8
+disp(valueFunctionLastExpectedValue)
+
+valueFunctionLastActualValue = [stateVector(end,1); stateVector(end,2)]'*Pt*[stateVector(end,1); stateVector(end,2)];  % integral RL, initil control 0 % from equation 8
+disp(valueFunctionLastActualValue)
+
+% Q* before eq 16
+figure 
+set(gca,'FontSize',26)
+hold on;
+plot(time,stateVector(1:end-1,12),'-','LineWidth',2)
+xlabel('Time [s]');ylabel('Q*');
+grid on;
+hold off;
+
+% u* from eq 15
+figure 
+set(gca,'FontSize',26)
+hold on;
+plot(time,stateVector(1:end-1, 13),'-','LineWidth',2)
+xlabel('Time [s]');ylabel('u*');
+grid on;
+hold off;
+end
+
+function [xVal, yVal] = circle(x,y,r)
+% Creates a target circle and returns its points
+% x and y are the coordinates of the center of the circle
+% r is the radius of the circle
+% 0.01 is the angle step, bigger values will draw the circle faster but
+% you might notice imperfections (not very smooth)
+ang = linspace(0,2*pi,100);
+xp = r*cos(ang);
+yp = r*sin(ang);
+xVal = x+xp;
+yVal = y+yp;
+end