From b77ab00646c774a23b06ff97dc566de5f78c4217 Mon Sep 17 00:00:00 2001
From: Constantinos Costopoulos <dkns99@gmail.com>
Date: Mon, 18 Jul 2022 01:17:38 -0400
Subject: [PATCH 1/7] Remove unneeded stuff from main.m

---
 Vavmoudakis Actor Critic/main.m | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/Vavmoudakis Actor Critic/main.m b/Vavmoudakis Actor Critic/main.m
index 6c7d268..e8c5fae 100644
--- a/Vavmoudakis Actor Critic/main.m	
+++ b/Vavmoudakis Actor Critic/main.m	
@@ -11,7 +11,8 @@
 global T Tf % ODE paramters
 global alphaa alphac % GD parameters
 global amplitude percent % Noise parameters
-global uvec uDelayInterpFcn x1DelayInterpFcn x2DelayInterpFcn sigma UkU UkUdelay
+global uvec uDelayInterpFcn x1DelayInterpFcn x2DelayInterpFcn sigma
+
 A  = [-1 0;0 -2];
 B  = [0 1]';
 [n,m]=size(B); % States and controls
@@ -52,8 +53,6 @@
 t_save_fnt = [];        % time vec
 x_save_fnt = [x0state;Wc0;Wa0;p0;QStar0;uStar0]';%;xfstate]'; % build the initial cond state vec. This is x initially
 sigma_save_fnt = [];
-UkU_save = [];
-UkUdelay_save =[];
 
 uDelayInterpFcn = interp2PWC(0,0,1);
 x1DelayInterpFcn = interp2PWC(x_save_fnt(:,1),0,0);
@@ -70,9 +69,6 @@
     t_save_fnt = [t_save_fnt;sol_fnt.x'];    % save time
     x_save_fnt = [x_save_fnt;sol_fnt.y'];    % save state
     sigma_save_fnt = [sigma_save_fnt sigma];
-    UkU_save = [UkU_save [sqrt(UkU(1)); sqrt(UkU(4))]];
-    UkUdelay_save =[UkUdelay_save [sqrt(UkUdelay(1)); sqrt(UkUdelay(4))]];
-    
    
     uDelayInterpFcn = interp2PWC(uvec,0,i*T);    % interpolate control input
     x1DelayInterpFcn = interp2PWC(x_save_fnt(:,1),0,i*T);

From 24e386f377cdd018cbe323fc720691171ec5f241 Mon Sep 17 00:00:00 2001
From: Constantinos Costopoulos <dkns99@gmail.com>
Date: Mon, 18 Jul 2022 01:36:47 -0400
Subject: [PATCH 2/7] Remove more unneeded stuff

---
 Vavmoudakis Actor Critic/interp2PWC.m | 6 +++---
 Vavmoudakis Actor Critic/main.m       | 4 +---
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/Vavmoudakis Actor Critic/interp2PWC.m b/Vavmoudakis Actor Critic/interp2PWC.m
index e33f00b..43bc8af 100644
--- a/Vavmoudakis Actor Critic/interp2PWC.m	
+++ b/Vavmoudakis Actor Critic/interp2PWC.m	
@@ -2,8 +2,8 @@
 % piecewise continuous function given the initial and final time provided
 
 function [f] = interp2PWC(y,xi,xf)
-[m,n] = size(y); % get size of the input vec
+[m,~] = size(y); % get size of the input vec
 x = linspace(xi,xf,m); %create x vector corresponding to y
-[c,ia,ic] = unique(y,'stable'); % figure out who and where are the unique points
+[c,ia,~] = unique(y,'stable'); % figure out who and where are the unique points
 f = mkpp([xi x(ia(2:end)') xf],c); % interpolate in a piecewise continuous fcn with corresponding time vec
-end
\ No newline at end of file
+end
diff --git a/Vavmoudakis Actor Critic/main.m b/Vavmoudakis Actor Critic/main.m
index e8c5fae..593be81 100644
--- a/Vavmoudakis Actor Critic/main.m	
+++ b/Vavmoudakis Actor Critic/main.m	
@@ -11,7 +11,7 @@
 global T Tf % ODE paramters
 global alphaa alphac % GD parameters
 global amplitude percent % Noise parameters
-global uvec uDelayInterpFcn x1DelayInterpFcn x2DelayInterpFcn sigma
+global uvec uDelayInterpFcn x1DelayInterpFcn x2DelayInterpFcn
 
 A  = [-1 0;0 -2];
 B  = [0 1]';
@@ -52,7 +52,6 @@
 
 t_save_fnt = [];        % time vec
 x_save_fnt = [x0state;Wc0;Wa0;p0;QStar0;uStar0]';%;xfstate]'; % build the initial cond state vec. This is x initially
-sigma_save_fnt = [];
 
 uDelayInterpFcn = interp2PWC(0,0,1);
 x1DelayInterpFcn = interp2PWC(x_save_fnt(:,1),0,0);
@@ -68,7 +67,6 @@
     
     t_save_fnt = [t_save_fnt;sol_fnt.x'];    % save time
     x_save_fnt = [x_save_fnt;sol_fnt.y'];    % save state
-    sigma_save_fnt = [sigma_save_fnt sigma];
    
     uDelayInterpFcn = interp2PWC(uvec,0,i*T);    % interpolate control input
     x1DelayInterpFcn = interp2PWC(x_save_fnt(:,1),0,i*T);

From 939b4014cc322d0f5e630477bc91d8ff15e6bae0 Mon Sep 17 00:00:00 2001
From: Constantinos Costopoulos <dkns99@gmail.com>
Date: Mon, 18 Jul 2022 01:37:02 -0400
Subject: [PATCH 3/7] Change ode23 to ode45

---
 Vavmoudakis Actor Critic/main.m | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Vavmoudakis Actor Critic/main.m b/Vavmoudakis Actor Critic/main.m
index 593be81..68667d5 100644
--- a/Vavmoudakis Actor Critic/main.m	
+++ b/Vavmoudakis Actor Critic/main.m	
@@ -62,7 +62,7 @@
 for i=1:N
     options = odeset('RelTol',1e-5,'AbsTol',1e-5,'MaxStep',.01); % 'OutputFcn',@odeplot,
     tic;
-    sol_fnt = ode23(@baby_fnt,[(i-1)*T,i*T],x_save_fnt(end,:),options);
+    sol_fnt = ode45(@baby_fnt,[(i-1)*T,i*T],x_save_fnt(end,:),options);
     toc;
     
     t_save_fnt = [t_save_fnt;sol_fnt.x'];    % save time

From 2ea4396f7f8716a61c8441c56b1d49bd881debbf Mon Sep 17 00:00:00 2001
From: Constantinos Costopoulos <dkns99@gmail.com>
Date: Mon, 18 Jul 2022 16:55:04 -0400
Subject: [PATCH 4/7] Add func prog implementation for Kontoudis' algo

---
 Vavmoudakis Actor Critic/Kontoudis.m        |  27 +++++
 Vavmoudakis Actor Critic/actorCritic.m      |  97 ++++++++++++++++
 Vavmoudakis Actor Critic/reachabilityMain.m | 116 ++++++++++++++++++++
 3 files changed, 240 insertions(+)
 create mode 100644 Vavmoudakis Actor Critic/Kontoudis.m
 create mode 100644 Vavmoudakis Actor Critic/actorCritic.m
 create mode 100644 Vavmoudakis Actor Critic/reachabilityMain.m

diff --git a/Vavmoudakis Actor Critic/Kontoudis.m b/Vavmoudakis Actor Critic/Kontoudis.m
new file mode 100644
index 0000000..7cadd3b
--- /dev/null
+++ b/Vavmoudakis Actor Critic/Kontoudis.m	
@@ -0,0 +1,27 @@
+function [currentState, time] = Kontoudis(initializations, currentState, xfstate)%, uvec)
+global uvec
+
+%% Decrypt Init Vars
+T = initializations{6};
+N = initializations{8};
+
+time = [];
+
+%% Solve ODE
+options = odeset('RelTol',1e-5,'AbsTol',1e-5,'MaxStep',.01);
+
+for i=1:N
+    tic;
+    sol_fnt = ode45(@(t,x) actorCritic(t,x,xfstate,initializations),[(i-1)*T,i*T],currentState(end,:),options);
+    toc;
+    
+    time = [time;sol_fnt.x'];    % save time
+    currentState = [currentState;sol_fnt.y'];    % save state
+    
+    uDelayInterpFcn = interp2PWC(uvec,0,i*T);    % interpolate control input
+    x1DelayInterpFcn = interp2PWC(currentState(:,1),0,i*T);
+    x2DelayInterpFcn = interp2PWC(currentState(:,2),0,i*T);
+    delays = {uDelayInterpFcn; x1DelayInterpFcn; x2DelayInterpFcn};
+    initializations(end) = {delays};
+end
+end
diff --git a/Vavmoudakis Actor Critic/actorCritic.m b/Vavmoudakis Actor Critic/actorCritic.m
new file mode 100644
index 0000000..6479b8c
--- /dev/null
+++ b/Vavmoudakis Actor Critic/actorCritic.m	
@@ -0,0 +1,97 @@
+function [dotx] = actorCritic(t,x, xfstate, params)%, uvec)
+global uvec
+
+%% Decompose Parameters Cell
+A = params(1);
+B = params(2);
+M = params(3);
+R = params(4);
+Pt = params(5);
+T = params(6);
+Tf = params(7);
+alphaa = params(9);
+alphac = params(10);
+amplitude = params(end-4);
+percent = params(end-3);
+ufinal = params(end-2);
+Wcfinal = params(end-1);
+delays = params(end);
+% Note that every variable above is a struct, thus we get its value by
+% using the {} operator or by using cell2mat
+A = cell2mat(A);
+B = cell2mat(B);
+M = cell2mat(M);
+R = cell2mat(R);
+T = cell2mat(T);
+Wcfinal = cell2mat(Wcfinal);
+ufinal = cell2mat(ufinal);
+
+uDelayInterpFcn = delays{1, 1}(1);
+x1DelayInterpFcn = delays{1, 1}(2);
+x2DelayInterpFcn = delays{1, 1}(3);
+
+Wc = [x(3);x(4);x(5);x(6);x(7);x(8)]; %critic
+Wa = [x(9);x(10)]; %actor
+p = x(11); %integral 
+
+UkUfinal = [xfstate(1)^2 ; xfstate(1)*xfstate(2); xfstate(1)*ufinal; xfstate(2)^2 ; xfstate(2)*ufinal; ufinal^2];
+Pfinal = 0.5*(xfstate)'*Pt{1}*(xfstate); % equation 19
+
+% Update control
+ud = Wa'*(x(1:2)-xfstate); % equation 17
+
+
+% State and control delays
+uddelay = ppval(uDelayInterpFcn{1},t-T);
+xdelay(1) = ppval(x1DelayInterpFcn{1},t-T);
+xdelay(2) = ppval(x2DelayInterpFcn{1},t-T);
+
+%Augmented state and Kronecker products
+U = [x(1:2)-xfstate;ud-ufinal]; % Augmented state
+UkU = [U(1)^2 ; U(1)*U(2); U(1)*ud; U(2)^2 ; U(2)*ud; ud^2]; % U kron U
+% This is not the exact UkronU (9x1 matrix), but we do it to lower the
+% dimensionality. This way, Wc' would be 1x9, so the matrix multiplication
+% can actually happen in ec.
+UkUdelay = [xdelay(1)^2 ; xdelay(1)*xdelay(2); xdelay(1)*uddelay; xdelay(2)^2 ;...
+    xdelay(2)*uddelay; uddelay^2];
+
+Qxx = x(3:5); % equations 14-16
+Quu = x(8);
+Qxu = [x(6) x(7)]';
+Qux = Qxu';
+
+sigma = UkU - UkUdelay;
+
+% integral reinforcement
+dp =  0.5*((x(1:2)-xfstate)'*M*(x(1:2)-xfstate) -xdelay(1:2)*M*xdelay(1:2)' ...
+    +ud'*R*ud - uddelay'*R*uddelay); 
+
+% mine
+QStar = Wc'*UkU;
+uStar = -inv(Quu)*Qux*U(1:2);
+
+% Approximation Errors 
+ec = p + Wc'*UkU - Wc'*UkUdelay; % Critic approximator error
+ecfinal = Pfinal - Wcfinal'*UkUfinal; % Critic approximator error final state
+ea = Wa'*U(1:2)+.5*inv(Quu)*Qux*U(1:2); % Actor approximator error 
+% or x(1:2) - xfstate instead of U(1:2) % or ud instead of Wa'*U(1:2)
+
+
+% critic update
+dWc = -alphac{1}*((sigma./(sigma'*sigma+1).^2)*ec'+(UkUfinal./((UkUfinal'*UkUfinal+1).^2)*ecfinal')); % eq 22
+
+% actor update
+dWa = -alphaa{1}*U(1:2)*ea'; % equation 23
+   
+% Persistent Excitation
+if t<=(percent{1}/100)*Tf{1}
+    unew=(ud+amplitude{1}*exp(-0.009*t)*2*(sin(t)^2*cos(t)+sin(2*t)^2*cos(0.1*t)+...
+        sin(-1.2*t)^2*cos(0.5*t)+sin(t)^5+sin(1.12*t)^2+cos(2.4*t)*sin(2.4*t)^3));
+else
+    unew=ud;
+end
+
+dx=A*[U(1);U(2)]+B*unew;
+uvec = [uvec;unew];
+dotx = [dx;dWc;dWa;dp;QStar;uStar]; % augmented state
+end
diff --git a/Vavmoudakis Actor Critic/reachabilityMain.m b/Vavmoudakis Actor Critic/reachabilityMain.m
new file mode 100644
index 0000000..e2b1664
--- /dev/null
+++ b/Vavmoudakis Actor Critic/reachabilityMain.m	
@@ -0,0 +1,116 @@
+function reachabilityMain()
+global uvec
+% Clc all
+close all;
+clc;
+
+%% Dynamics and System Parameters
+A  = [-1 0;0 -2];
+B  = [0 1]';
+[~,m] = size(B); % States and controls
+M  = diag([1,.1]);
+R  = .1;
+Pt = diag([.5,.5]); % Ricatti at final time
+
+% ODEs parameters
+T  = 0.05; % Delay
+Tf = 6; % Finite horizon in seconds
+N  = Tf/T; % Number of simulation rounds
+ 
+% Gradient descent parameters
+alphac = 90;
+alphaa = 1.5;
+
+% Noise paramaters
+amplitude = .1; % changes the amplitude of the PE (kyriakos pe)
+percent = 50;
+
+Wc0 = [rand(5,1);10*rand(1)]; % critic weights
+Wa0 = .5*ones(2,1); % actor weights
+ufinal = 0.001;
+Wcfinal = 12*ones(6,1);
+
+% Initial condition of the augmented system 
+x0state = [32 -15]';  % initial state
+xfstate = [-11.5 7.5]';  % final state
+
+p0 = x0state'*M*x0state;  % integral RL, initil control 0 % from equation 8
+
+QStar0 = rand(m);
+uStar0 = rand(m);
+
+x_save_fnt = [x0state;Wc0;Wa0;p0;QStar0;uStar0]'; % build the initial cond state vec. This is x initially
+
+uDelayInterpFcn = interp2PWC(0,0,1);
+x1DelayInterpFcn = interp2PWC(x_save_fnt(:,1),0,0);
+x2DelayInterpFcn = interp2PWC(x_save_fnt(:,2),0,0);
+
+delays = {uDelayInterpFcn; x1DelayInterpFcn; x2DelayInterpFcn};
+initializations = {A; B; M; R; Pt; T; Tf; N; ...
+                   alphaa; alphac; amplitude; ...
+                   percent; ufinal; Wcfinal; delays};
+
+%% Call Kontoudis' algorithm
+uvec = []; % when the reachability part comes here, before every Kontoudis()
+           % call we will reset uvec.
+[stateVector, time] = Kontoudis(initializations, x_save_fnt, xfstate);%), uvec);
+
+%% Plots
+% x1, x2 to show at the reachability DID happen; it reached xfstate
+figure 
+set(gca,'FontSize',26); hold on;
+plot(time,stateVector(1:end-1,1),'LineWidth',2,'Color', 'b'); hold on;
+plot(time,stateVector(1:end-1,2),'LineWidth',2,'Color', 'm'); hold on;
+xlabel('Time [s]'); ylabel('States'); legend('x_1','x_2');
+grid on; hold off;
+
+figure 
+set(gca,'FontSize',26)
+hold on;
+for i=1:6
+    plot(time,stateVector(1:end-1,i+2),'-','LineWidth',2)
+    hold on;
+end
+xlabel('Time [s]');ylabel('Critic Weights W_c');
+legend('Wc_1','Wc_2','Wc_3','Wc_4','Wc_5','Wc_6');
+grid on;
+hold off;
+
+% mine
+figure 
+set(gca,'FontSize',26)
+hold on;
+plot(time,stateVector(1:end-1,9),'-','LineWidth',2)
+plot(time,stateVector(1:end-1,10),'-','LineWidth',2)
+xlabel('Time [s]');ylabel('Actor Weights W_a');
+legend('Wa_1','Wa_2');
+grid on;
+hold off;
+
+% For the Q = Value function, either plot equation 19 or Q* before eq 16
+% Equation 19 is a number
+valueFunctionLastExpectedValue = xfstate'*Pt*xfstate;  % integral RL, initil control 0 % from equation 8
+disp(valueFunctionLastExpectedValue)
+
+valueFunctionLastActualValue = [stateVector(end,1); stateVector(end,2)]'*Pt*[stateVector(end,1); stateVector(end,2)];  % integral RL, initil control 0 % from equation 8
+disp(valueFunctionLastActualValue)
+
+% Q* before eq 16
+figure 
+set(gca,'FontSize',26)
+hold on;
+plot(time,stateVector(1:end-1,12),'-','LineWidth',2)
+xlabel('Time [s]');ylabel('Q*');
+grid on;
+hold off;
+% FUCK YALL, IT ASYMPTOTICALLY CONVERGES, BADABEEM BADABOOM BADABAM
+
+% u* from eq 15
+figure 
+set(gca,'FontSize',26)
+hold on;
+plot(time,stateVector(1:end-1, 13),'-','LineWidth',2)
+xlabel('Time [s]');ylabel('u*');
+grid on;
+hold off;
+end

From e866951d2aa40fb7d316c3a55086898a111189c4 Mon Sep 17 00:00:00 2001
From: Constantinos Costopoulos <dkns99@gmail.com>
Date: Tue, 19 Jul 2022 17:00:26 -0400
Subject: [PATCH 5/7] Add initial implementation for reachability

---
 Vavmoudakis Actor Critic/Kontoudis.m        |  4 +-
 Vavmoudakis Actor Critic/reachabilityMain.m | 74 ++++++++++++++++++++-
 2 files changed, 73 insertions(+), 5 deletions(-)

diff --git a/Vavmoudakis Actor Critic/Kontoudis.m b/Vavmoudakis Actor Critic/Kontoudis.m
index 7cadd3b..d7a5a75 100644
--- a/Vavmoudakis Actor Critic/Kontoudis.m	
+++ b/Vavmoudakis Actor Critic/Kontoudis.m	
@@ -11,9 +11,9 @@
 options = odeset('RelTol',1e-5,'AbsTol',1e-5,'MaxStep',.01);
 
 for i=1:N
-    tic;
+%     tic;
     sol_fnt = ode45(@(t,x) actorCritic(t,x,xfstate,initializations),[(i-1)*T,i*T],currentState(end,:),options);
-    toc;
+%     toc;
     
     time = [time;sol_fnt.x'];    % save time
     currentState = [currentState;sol_fnt.y'];    % save state
diff --git a/Vavmoudakis Actor Critic/reachabilityMain.m b/Vavmoudakis Actor Critic/reachabilityMain.m
index e2b1664..3016739 100644
--- a/Vavmoudakis Actor Critic/reachabilityMain.m	
+++ b/Vavmoudakis Actor Critic/reachabilityMain.m	
@@ -50,12 +50,67 @@ function reachabilityMain()
                    alphaa; alphac; amplitude; ...
                    percent; ufinal; Wcfinal; delays};
 
+%% Set target
+[xVal, yVal] = circle(-11.5,7.5,4);%(0,0,8);
+figure 
+hold on;
+plot(xVal,yVal, 'r');
+plot(x0state(1),x0state(2),'.b')
+xlim([-20 40])
+ylim([-20 40])
+title('Initial Point(Blue) and Target(Red)')
+pause(0.1);
+hold off;
+
+% Retrieve actual points of the target
+[~, xPoints] = size(xVal);
+[~, yPoints] = size(yVal);
+
+allFinalStatesVector = [];
+
 %% Call Kontoudis' algorithm
-uvec = []; % when the reachability part comes here, before every Kontoudis()
-           % call we will reset uvec.
-[stateVector, time] = Kontoudis(initializations, x_save_fnt, xfstate);%), uvec);
+% tic
+for i=1:xPoints
+    xfstate(1) = xVal(i);
+    tic % around 38sec per j iteration => around 1900 sec to finish all, aka 31.6 mins
+    for j=1:yPoints
+        uvec = [];
+        xfstate(2) = yVal(j); % instead of xfstate = [xVal(i) yVal(j)]';
+        [stateVector, time] = Kontoudis(initializations, x_save_fnt, xfstate);
+        allFinalStatesVector = [allFinalStatesVector; stateVector(end, 1:2)];
+    end
+    toc
+end
+% toc
 
 %% Plots
+figure
+hold on;
+plot(xVal,yVal, 'r'); hold on; % target
+plot(x0state(1),x0state(2),'.b'); hold on; % initial State
+plot(allFinalStatesVector(1:end,1),allFinalStatesVector(1:end,2),'-b'); hold on;
+xlim([-20 40])
+ylim([-20 40])
+title('Reachable Set')
+grid on; hold off;
+
+figure
+hold on;
+plot(xVal,yVal, 'r'); hold on;
+plot(x0state(1),x0state(2),'.b'); hold on;
+% find closest point of the target to the initial point
+dist = sqrt((x0state(1) - xVal(:,1))^2 + (x0state(2) - yVal(:,1))^2);
+[minDist, indexOfMin] = min(dist);
+closestX = xVal(indexOfMin);
+closestY = yVal(indexOfMin);
+% actually plot
+plot(closestX, closestY, '-g'); hold on;
+line([x0state(1), closestX], [x0state(2), closestY], 'LineWidth', 2, 'Color', 'g');
+xlim([-20 40])
+ylim([-20 40])
+title('Optimal Trajectory')
+grid on; hold off;
+
 % x1, x2 to show at the reachability DID happen; it reached xfstate
 figure 
 set(gca,'FontSize',26); hold on;
@@ -114,3 +169,16 @@ function reachabilityMain()
 grid on;
 hold off;
 end
+
+function [xVal, yVal] = circle(x,y,r)
+% Creates a target circle and returns its points
+% x and y are the coordinates of the center of the circle
+% r is the radius of the circle
+% 0.01 is the angle step, bigger values will draw the circle faster but
+% you might notice imperfections (not very smooth)
+ang = linspace(0,2*pi,50); %50 % ang = 0:0.04:2*pi; % 158*158 points with that
+xp = r*cos(ang);
+yp = r*sin(ang);
+xVal = x+xp;
+yVal = y+yp;
+end

From ac6673061cde1765db37d5cbfe5a8e9955efaec2 Mon Sep 17 00:00:00 2001
From: Constantinos Costopoulos <dkns99@gmail.com>
Date: Tue, 19 Jul 2022 17:47:47 -0400
Subject: [PATCH 6/7] Finalize reachability implementation

---
 Vavmoudakis Actor Critic/Kontoudis.m        |  2 -
 Vavmoudakis Actor Critic/reachabilityMain.m | 46 +++++++++++++--------
 2 files changed, 28 insertions(+), 20 deletions(-)

diff --git a/Vavmoudakis Actor Critic/Kontoudis.m b/Vavmoudakis Actor Critic/Kontoudis.m
index d7a5a75..ad985a3 100644
--- a/Vavmoudakis Actor Critic/Kontoudis.m	
+++ b/Vavmoudakis Actor Critic/Kontoudis.m	
@@ -11,9 +11,7 @@
 options = odeset('RelTol',1e-5,'AbsTol',1e-5,'MaxStep',.01);
 
 for i=1:N
-%     tic;
     sol_fnt = ode45(@(t,x) actorCritic(t,x,xfstate,initializations),[(i-1)*T,i*T],currentState(end,:),options);
-%     toc;
     
     time = [time;sol_fnt.x'];    % save time
     currentState = [currentState;sol_fnt.y'];    % save state
diff --git a/Vavmoudakis Actor Critic/reachabilityMain.m b/Vavmoudakis Actor Critic/reachabilityMain.m
index 3016739..6e0d012 100644
--- a/Vavmoudakis Actor Critic/reachabilityMain.m	
+++ b/Vavmoudakis Actor Critic/reachabilityMain.m	
@@ -1,6 +1,5 @@
 function reachabilityMain()
 global uvec
-% Clc all
 close all;
 clc;
 
@@ -62,26 +61,20 @@ function reachabilityMain()
 pause(0.1);
 hold off;
 
-% Retrieve actual points of the target
-[~, xPoints] = size(xVal);
-[~, yPoints] = size(yVal);
+% Retrieve actual number of points of the target
+[~, xPoints] = size(xVal); % no need for yPoints cause they are the same
 
 allFinalStatesVector = [];
 
 %% Call Kontoudis' algorithm
-% tic
 for i=1:xPoints
-    xfstate(1) = xVal(i);
-    tic % around 38sec per j iteration => around 1900 sec to finish all, aka 31.6 mins
-    for j=1:yPoints
-        uvec = [];
-        xfstate(2) = yVal(j); % instead of xfstate = [xVal(i) yVal(j)]';
-        [stateVector, time] = Kontoudis(initializations, x_save_fnt, xfstate);
-        allFinalStatesVector = [allFinalStatesVector; stateVector(end, 1:2)];
-    end
+    xfstate = [xVal(i) yVal(i)]';
+    tic
+    uvec = [];
+    [stateVector, time] = Kontoudis(initializations, x_save_fnt, xfstate);
+    allFinalStatesVector = [allFinalStatesVector; stateVector(end, 1:2)];
     toc
 end
-% toc
 
 %% Plots
 figure
@@ -94,13 +87,31 @@ function reachabilityMain()
 title('Reachable Set')
 grid on; hold off;
 
+figure
+hold on;
+plot(xVal,yVal, 'r'); hold on;
+plot(allFinalStatesVector(1:end,1),allFinalStatesVector(1:end,2),'-b'); hold on;
+plot(x0state(1),x0state(2),'.b'); hold on;
+% find closest point of the target to the initial point
+dist = sqrt((x0state(1) - allFinalStatesVector(:,1)).^2 + (x0state(2) - allFinalStatesVector(:,2)).^2);
+[~, indexOfMin] = min(dist);
+closestX = xVal(indexOfMin);
+closestY = yVal(indexOfMin);
+% actually plot
+plot(closestX, closestY, '-g'); hold on;
+line([x0state(1), closestX], [x0state(2), closestY], 'LineWidth', 2, 'Color', 'g');
+xlim([-20 40])
+ylim([-20 40])
+title('Optimal Trajectory to Reachable Set = Target')
+grid on; hold off;
+
 figure
 hold on;
 plot(xVal,yVal, 'r'); hold on;
 plot(x0state(1),x0state(2),'.b'); hold on;
 % find closest point of the target to the initial point
 dist = sqrt((x0state(1) - xVal(:,1))^2 + (x0state(2) - yVal(:,1))^2);
-[minDist, indexOfMin] = min(dist);
+[~, indexOfMin] = min(dist);
 closestX = xVal(indexOfMin);
 closestY = yVal(indexOfMin);
 % actually plot
@@ -108,7 +119,7 @@ function reachabilityMain()
 line([x0state(1), closestX], [x0state(2), closestY], 'LineWidth', 2, 'Color', 'g');
 xlim([-20 40])
 ylim([-20 40])
-title('Optimal Trajectory')
+title('Optimal Trajectory to Target = Reachable Set')
 grid on; hold off;
 
 % x1, x2 to show at the reachability DID happen; it reached xfstate
@@ -158,7 +169,6 @@ function reachabilityMain()
 xlabel('Time [s]');ylabel('Q*');
 grid on;
 hold off;
-% FUCK YALL, IT ASYMPTOTICALLY CONVERGES, BADABEEM BADABOOM BADABAM
 
 % u* from eq 15
 figure 
@@ -176,7 +186,7 @@ function reachabilityMain()
 % r is the radius of the circle
 % 0.01 is the angle step, bigger values will draw the circle faster but
 % you might notice imperfections (not very smooth)
-ang = linspace(0,2*pi,50); %50 % ang = 0:0.04:2*pi; % 158*158 points with that
+ang = linspace(0,2*pi,100);
 xp = r*cos(ang);
 yp = r*sin(ang);
 xVal = x+xp;

From b5f01c61afa8fa6933885a5196f00713a1d15500 Mon Sep 17 00:00:00 2001
From: Constantinos Costopoulos <dkns99@gmail.com>
Date: Tue, 19 Jul 2022 20:08:23 -0400
Subject: [PATCH 7/7] Add comms

---
 Vavmoudakis Actor Critic/actorCritic.m | 4 ++++
 Vavmoudakis Actor Critic/baby_fnt.m    | 5 ++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/Vavmoudakis Actor Critic/actorCritic.m b/Vavmoudakis Actor Critic/actorCritic.m
index 6479b8c..a797cf1 100644
--- a/Vavmoudakis Actor Critic/actorCritic.m	
+++ b/Vavmoudakis Actor Critic/actorCritic.m	
@@ -75,6 +75,10 @@
 ecfinal = Pfinal - Wcfinal'*UkUfinal; % Critic approximator error final state
 ea = Wa'*U(1:2)+.5*inv(Quu)*Qux*U(1:2); % Actor approximator error 
 % or x(1:2) - xfstate instead of U(1:2) % or ud instead of Wa'*U(1:2)
+% BASICALLY, looking at eq30, ud = Wa'*U(1:2) = Wa'(x(1:2)-xfstate) wants
+% to reach u* = inv(Quu)*Qux*U(1:2), that's why ea is defined like this.
+% ud is slowly getting to u*, aka
+% Wa'*U(1:2) -> inv(Quu)*Qux*U(1:2)
 
 
 % critic update
diff --git a/Vavmoudakis Actor Critic/baby_fnt.m b/Vavmoudakis Actor Critic/baby_fnt.m
index 6e89e85..cac2ee2 100644
--- a/Vavmoudakis Actor Critic/baby_fnt.m	
+++ b/Vavmoudakis Actor Critic/baby_fnt.m	
@@ -52,7 +52,10 @@
 ecfinal = Pfinal - Wcfinal'*UkUfinal; % Critic approximator error final state
 ea = Wa'*U(1:2)+.5*inv(Quu)*Qux*U(1:2); % Actor approximator error 
 % or x(1:2) - xfstate instead of U(1:2) % or ud instead of Wa'*U(1:2)
-
+% BASICALLY, looking at eq30, ud = Wa'*U(1:2) = Wa'(x(1:2)-xfstate) wants
+% to reach u* = inv(Quu)*Qux*U(1:2), that's why ea is defined like this.
+% ud is slowly getting to u*, aka
+% Wa'*U(1:2) -> inv(Quu)*Qux*U(1:2)
 
 % critic update
 dWc = -alphac*((sigma./(sigma'*sigma+1).^2)*ec'+(UkUfinal./((UkUfinal'*UkUfinal+1).^2)*ecfinal')); % eq 22