From b77ab00646c774a23b06ff97dc566de5f78c4217 Mon Sep 17 00:00:00 2001 From: Constantinos Costopoulos Date: Mon, 18 Jul 2022 01:17:38 -0400 Subject: [PATCH 1/7] Remove unneeded stuff from main.m --- Vavmoudakis Actor Critic/main.m | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/Vavmoudakis Actor Critic/main.m b/Vavmoudakis Actor Critic/main.m index 6c7d268..e8c5fae 100644 --- a/Vavmoudakis Actor Critic/main.m +++ b/Vavmoudakis Actor Critic/main.m @@ -11,7 +11,8 @@ global T Tf % ODE paramters global alphaa alphac % GD parameters global amplitude percent % Noise parameters -global uvec uDelayInterpFcn x1DelayInterpFcn x2DelayInterpFcn sigma UkU UkUdelay +global uvec uDelayInterpFcn x1DelayInterpFcn x2DelayInterpFcn sigma + A = [-1 0;0 -2]; B = [0 1]'; [n,m]=size(B); % States and controls @@ -52,8 +53,6 @@ t_save_fnt = []; % time vec x_save_fnt = [x0state;Wc0;Wa0;p0;QStar0;uStar0]';%;xfstate]'; % build the initial cond state vec. This is x initially sigma_save_fnt = []; -UkU_save = []; -UkUdelay_save =[]; uDelayInterpFcn = interp2PWC(0,0,1); x1DelayInterpFcn = interp2PWC(x_save_fnt(:,1),0,0); @@ -70,9 +69,6 @@ t_save_fnt = [t_save_fnt;sol_fnt.x']; % save time x_save_fnt = [x_save_fnt;sol_fnt.y']; % save state sigma_save_fnt = [sigma_save_fnt sigma]; - UkU_save = [UkU_save [sqrt(UkU(1)); sqrt(UkU(4))]]; - UkUdelay_save =[UkUdelay_save [sqrt(UkUdelay(1)); sqrt(UkUdelay(4))]]; - uDelayInterpFcn = interp2PWC(uvec,0,i*T); % interpolate control input x1DelayInterpFcn = interp2PWC(x_save_fnt(:,1),0,i*T); From 24e386f377cdd018cbe323fc720691171ec5f241 Mon Sep 17 00:00:00 2001 From: Constantinos Costopoulos Date: Mon, 18 Jul 2022 01:36:47 -0400 Subject: [PATCH 2/7] Remove more unneeded stuff --- Vavmoudakis Actor Critic/interp2PWC.m | 6 +++--- Vavmoudakis Actor Critic/main.m | 4 +--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/Vavmoudakis Actor Critic/interp2PWC.m b/Vavmoudakis Actor Critic/interp2PWC.m index e33f00b..43bc8af 100644 --- a/Vavmoudakis Actor Critic/interp2PWC.m +++ b/Vavmoudakis Actor Critic/interp2PWC.m @@ -2,8 +2,8 @@ % piecewise continuous function given the initial and final time provided function [f] = interp2PWC(y,xi,xf) -[m,n] = size(y); % get size of the input vec +[m,~] = size(y); % get size of the input vec x = linspace(xi,xf,m); %create x vector corresponding to y -[c,ia,ic] = unique(y,'stable'); % figure out who and where are the unique points +[c,ia,~] = unique(y,'stable'); % figure out who and where are the unique points f = mkpp([xi x(ia(2:end)') xf],c); % interpolate in a piecewise continuous fcn with corresponding time vec -end \ No newline at end of file +end diff --git a/Vavmoudakis Actor Critic/main.m b/Vavmoudakis Actor Critic/main.m index e8c5fae..593be81 100644 --- a/Vavmoudakis Actor Critic/main.m +++ b/Vavmoudakis Actor Critic/main.m @@ -11,7 +11,7 @@ global T Tf % ODE paramters global alphaa alphac % GD parameters global amplitude percent % Noise parameters -global uvec uDelayInterpFcn x1DelayInterpFcn x2DelayInterpFcn sigma +global uvec uDelayInterpFcn x1DelayInterpFcn x2DelayInterpFcn A = [-1 0;0 -2]; B = [0 1]'; @@ -52,7 +52,6 @@ t_save_fnt = []; % time vec x_save_fnt = [x0state;Wc0;Wa0;p0;QStar0;uStar0]';%;xfstate]'; % build the initial cond state vec. This is x initially -sigma_save_fnt = []; uDelayInterpFcn = interp2PWC(0,0,1); x1DelayInterpFcn = interp2PWC(x_save_fnt(:,1),0,0); @@ -68,7 +67,6 @@ t_save_fnt = [t_save_fnt;sol_fnt.x']; % save time x_save_fnt = [x_save_fnt;sol_fnt.y']; % save state - sigma_save_fnt = [sigma_save_fnt sigma]; uDelayInterpFcn = interp2PWC(uvec,0,i*T); % interpolate control input x1DelayInterpFcn = interp2PWC(x_save_fnt(:,1),0,i*T); From 939b4014cc322d0f5e630477bc91d8ff15e6bae0 Mon Sep 17 00:00:00 2001 From: Constantinos Costopoulos Date: Mon, 18 Jul 2022 01:37:02 -0400 Subject: [PATCH 3/7] Change ode23 to ode45 --- Vavmoudakis Actor Critic/main.m | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Vavmoudakis Actor Critic/main.m b/Vavmoudakis Actor Critic/main.m index 593be81..68667d5 100644 --- a/Vavmoudakis Actor Critic/main.m +++ b/Vavmoudakis Actor Critic/main.m @@ -62,7 +62,7 @@ for i=1:N options = odeset('RelTol',1e-5,'AbsTol',1e-5,'MaxStep',.01); % 'OutputFcn',@odeplot, tic; - sol_fnt = ode23(@baby_fnt,[(i-1)*T,i*T],x_save_fnt(end,:),options); + sol_fnt = ode45(@baby_fnt,[(i-1)*T,i*T],x_save_fnt(end,:),options); toc; t_save_fnt = [t_save_fnt;sol_fnt.x']; % save time From 2ea4396f7f8716a61c8441c56b1d49bd881debbf Mon Sep 17 00:00:00 2001 From: Constantinos Costopoulos Date: Mon, 18 Jul 2022 16:55:04 -0400 Subject: [PATCH 4/7] Add func prog implementation for Kontoudis' algo --- Vavmoudakis Actor Critic/Kontoudis.m | 27 +++++ Vavmoudakis Actor Critic/actorCritic.m | 97 ++++++++++++++++ Vavmoudakis Actor Critic/reachabilityMain.m | 116 ++++++++++++++++++++ 3 files changed, 240 insertions(+) create mode 100644 Vavmoudakis Actor Critic/Kontoudis.m create mode 100644 Vavmoudakis Actor Critic/actorCritic.m create mode 100644 Vavmoudakis Actor Critic/reachabilityMain.m diff --git a/Vavmoudakis Actor Critic/Kontoudis.m b/Vavmoudakis Actor Critic/Kontoudis.m new file mode 100644 index 0000000..7cadd3b --- /dev/null +++ b/Vavmoudakis Actor Critic/Kontoudis.m @@ -0,0 +1,27 @@ +function [currentState, time] = Kontoudis(initializations, currentState, xfstate)%, uvec) +global uvec + +%% Decrypt Init Vars +T = initializations{6}; +N = initializations{8}; + +time = []; + +%% Solve ODE +options = odeset('RelTol',1e-5,'AbsTol',1e-5,'MaxStep',.01); + +for i=1:N + tic; + sol_fnt = ode45(@(t,x) actorCritic(t,x,xfstate,initializations),[(i-1)*T,i*T],currentState(end,:),options); + toc; + + time = [time;sol_fnt.x']; % save time + currentState = [currentState;sol_fnt.y']; % save state + + uDelayInterpFcn = interp2PWC(uvec,0,i*T); % interpolate control input + x1DelayInterpFcn = interp2PWC(currentState(:,1),0,i*T); + x2DelayInterpFcn = interp2PWC(currentState(:,2),0,i*T); + delays = {uDelayInterpFcn; x1DelayInterpFcn; x2DelayInterpFcn}; + initializations(end) = {delays}; +end +end diff --git a/Vavmoudakis Actor Critic/actorCritic.m b/Vavmoudakis Actor Critic/actorCritic.m new file mode 100644 index 0000000..6479b8c --- /dev/null +++ b/Vavmoudakis Actor Critic/actorCritic.m @@ -0,0 +1,97 @@ +function [dotx] = actorCritic(t,x, xfstate, params)%, uvec) +global uvec + +%% Decompose Parameters Cell +A = params(1); +B = params(2); +M = params(3); +R = params(4); +Pt = params(5); +T = params(6); +Tf = params(7); +alphaa = params(9); +alphac = params(10); +amplitude = params(end-4); +percent = params(end-3); +ufinal = params(end-2); +Wcfinal = params(end-1); +delays = params(end); +% Note that every variable above is a struct, thus we get its value by +% using the {} operator or by using cell2mat +A = cell2mat(A); +B = cell2mat(B); +M = cell2mat(M); +R = cell2mat(R); +T = cell2mat(T); +Wcfinal = cell2mat(Wcfinal); +ufinal = cell2mat(ufinal); + +uDelayInterpFcn = delays{1, 1}(1); +x1DelayInterpFcn = delays{1, 1}(2); +x2DelayInterpFcn = delays{1, 1}(3); + +Wc = [x(3);x(4);x(5);x(6);x(7);x(8)]; %critic +Wa = [x(9);x(10)]; %actor +p = x(11); %integral + +UkUfinal = [xfstate(1)^2 ; xfstate(1)*xfstate(2); xfstate(1)*ufinal; xfstate(2)^2 ; xfstate(2)*ufinal; ufinal^2]; +Pfinal = 0.5*(xfstate)'*Pt{1}*(xfstate); % equation 19 + +% Update control +ud = Wa'*(x(1:2)-xfstate); % equation 17 + + +% State and control delays +uddelay = ppval(uDelayInterpFcn{1},t-T); +xdelay(1) = ppval(x1DelayInterpFcn{1},t-T); +xdelay(2) = ppval(x2DelayInterpFcn{1},t-T); + +%Augmented state and Kronecker products +U = [x(1:2)-xfstate;ud-ufinal]; % Augmented state +UkU = [U(1)^2 ; U(1)*U(2); U(1)*ud; U(2)^2 ; U(2)*ud; ud^2]; % U kron U +% This is not the exact UkronU (9x1 matrix), but we do it to lower the +% dimensionality. This way, Wc' would be 1x9, so the matrix multiplication +% can actually happen in ec. +UkUdelay = [xdelay(1)^2 ; xdelay(1)*xdelay(2); xdelay(1)*uddelay; xdelay(2)^2 ;... + xdelay(2)*uddelay; uddelay^2]; + +Qxx = x(3:5); % equations 14-16 +Quu = x(8); +Qxu = [x(6) x(7)]'; +Qux = Qxu'; + +sigma = UkU - UkUdelay; + +% integral reinforcement +dp = 0.5*((x(1:2)-xfstate)'*M*(x(1:2)-xfstate) -xdelay(1:2)*M*xdelay(1:2)' ... + +ud'*R*ud - uddelay'*R*uddelay); + +% mine +QStar = Wc'*UkU; +uStar = -inv(Quu)*Qux*U(1:2); + +% Approximation Errors +ec = p + Wc'*UkU - Wc'*UkUdelay; % Critic approximator error +ecfinal = Pfinal - Wcfinal'*UkUfinal; % Critic approximator error final state +ea = Wa'*U(1:2)+.5*inv(Quu)*Qux*U(1:2); % Actor approximator error +% or x(1:2) - xfstate instead of U(1:2) % or ud instead of Wa'*U(1:2) + + +% critic update +dWc = -alphac{1}*((sigma./(sigma'*sigma+1).^2)*ec'+(UkUfinal./((UkUfinal'*UkUfinal+1).^2)*ecfinal')); % eq 22 + +% actor update +dWa = -alphaa{1}*U(1:2)*ea'; % equation 23 + +% Persistent Excitation +if t<=(percent{1}/100)*Tf{1} + unew=(ud+amplitude{1}*exp(-0.009*t)*2*(sin(t)^2*cos(t)+sin(2*t)^2*cos(0.1*t)+... + sin(-1.2*t)^2*cos(0.5*t)+sin(t)^5+sin(1.12*t)^2+cos(2.4*t)*sin(2.4*t)^3)); +else + unew=ud; +end + +dx=A*[U(1);U(2)]+B*unew; +uvec = [uvec;unew]; +dotx = [dx;dWc;dWa;dp;QStar;uStar]; % augmented state +end diff --git a/Vavmoudakis Actor Critic/reachabilityMain.m b/Vavmoudakis Actor Critic/reachabilityMain.m new file mode 100644 index 0000000..e2b1664 --- /dev/null +++ b/Vavmoudakis Actor Critic/reachabilityMain.m @@ -0,0 +1,116 @@ +function reachabilityMain() +global uvec +% Clc all +close all; +clc; + +%% Dynamics and System Parameters +A = [-1 0;0 -2]; +B = [0 1]'; +[~,m] = size(B); % States and controls +M = diag([1,.1]); +R = .1; +Pt = diag([.5,.5]); % Ricatti at final time + +% ODEs parameters +T = 0.05; % Delay +Tf = 6; % Finite horizon in seconds +N = Tf/T; % Number of simulation rounds + +% Gradient descent parameters +alphac = 90; +alphaa = 1.5; + +% Noise paramaters +amplitude = .1; % changes the amplitude of the PE (kyriakos pe) +percent = 50; + +Wc0 = [rand(5,1);10*rand(1)]; % critic weights +Wa0 = .5*ones(2,1); % actor weights +ufinal = 0.001; +Wcfinal = 12*ones(6,1); + +% Initial condition of the augmented system +x0state = [32 -15]'; % initial state +xfstate = [-11.5 7.5]'; % final state + +p0 = x0state'*M*x0state; % integral RL, initil control 0 % from equation 8 + +QStar0 = rand(m); +uStar0 = rand(m); + +x_save_fnt = [x0state;Wc0;Wa0;p0;QStar0;uStar0]'; % build the initial cond state vec. This is x initially + +uDelayInterpFcn = interp2PWC(0,0,1); +x1DelayInterpFcn = interp2PWC(x_save_fnt(:,1),0,0); +x2DelayInterpFcn = interp2PWC(x_save_fnt(:,2),0,0); + +delays = {uDelayInterpFcn; x1DelayInterpFcn; x2DelayInterpFcn}; +initializations = {A; B; M; R; Pt; T; Tf; N; ... + alphaa; alphac; amplitude; ... + percent; ufinal; Wcfinal; delays}; + +%% Call Kontoudis' algorithm +uvec = []; % when the reachability part comes here, before every Kontoudis() + % call we will reset uvec. +[stateVector, time] = Kontoudis(initializations, x_save_fnt, xfstate);%), uvec); + +%% Plots +% x1, x2 to show at the reachability DID happen; it reached xfstate +figure +set(gca,'FontSize',26); hold on; +plot(time,stateVector(1:end-1,1),'LineWidth',2,'Color', 'b'); hold on; +plot(time,stateVector(1:end-1,2),'LineWidth',2,'Color', 'm'); hold on; +xlabel('Time [s]'); ylabel('States'); legend('x_1','x_2'); +grid on; hold off; + +figure +set(gca,'FontSize',26) +hold on; +for i=1:6 + plot(time,stateVector(1:end-1,i+2),'-','LineWidth',2) + hold on; +end +xlabel('Time [s]');ylabel('Critic Weights W_c'); +legend('Wc_1','Wc_2','Wc_3','Wc_4','Wc_5','Wc_6'); +grid on; +hold off; + +% mine +figure +set(gca,'FontSize',26) +hold on; +plot(time,stateVector(1:end-1,9),'-','LineWidth',2) +plot(time,stateVector(1:end-1,10),'-','LineWidth',2) +xlabel('Time [s]');ylabel('Actor Weights W_a'); +legend('Wa_1','Wa_2'); +grid on; +hold off; + +% For the Q = Value function, either plot equation 19 or Q* before eq 16 +% Equation 19 is a number +valueFunctionLastExpectedValue = xfstate'*Pt*xfstate; % integral RL, initil control 0 % from equation 8 +disp(valueFunctionLastExpectedValue) + +valueFunctionLastActualValue = [stateVector(end,1); stateVector(end,2)]'*Pt*[stateVector(end,1); stateVector(end,2)]; % integral RL, initil control 0 % from equation 8 +disp(valueFunctionLastActualValue) + +% Q* before eq 16 +figure +set(gca,'FontSize',26) +hold on; +plot(time,stateVector(1:end-1,12),'-','LineWidth',2) +xlabel('Time [s]');ylabel('Q*'); +grid on; +hold off; +% FUCK YALL, IT ASYMPTOTICALLY CONVERGES, BADABEEM BADABOOM BADABAM + +% u* from eq 15 +figure +set(gca,'FontSize',26) +hold on; +plot(time,stateVector(1:end-1, 13),'-','LineWidth',2) +xlabel('Time [s]');ylabel('u*'); +grid on; +hold off; +end From e866951d2aa40fb7d316c3a55086898a111189c4 Mon Sep 17 00:00:00 2001 From: Constantinos Costopoulos Date: Tue, 19 Jul 2022 17:00:26 -0400 Subject: [PATCH 5/7] Add initial implementation for reachability --- Vavmoudakis Actor Critic/Kontoudis.m | 4 +- Vavmoudakis Actor Critic/reachabilityMain.m | 74 ++++++++++++++++++++- 2 files changed, 73 insertions(+), 5 deletions(-) diff --git a/Vavmoudakis Actor Critic/Kontoudis.m b/Vavmoudakis Actor Critic/Kontoudis.m index 7cadd3b..d7a5a75 100644 --- a/Vavmoudakis Actor Critic/Kontoudis.m +++ b/Vavmoudakis Actor Critic/Kontoudis.m @@ -11,9 +11,9 @@ options = odeset('RelTol',1e-5,'AbsTol',1e-5,'MaxStep',.01); for i=1:N - tic; +% tic; sol_fnt = ode45(@(t,x) actorCritic(t,x,xfstate,initializations),[(i-1)*T,i*T],currentState(end,:),options); - toc; +% toc; time = [time;sol_fnt.x']; % save time currentState = [currentState;sol_fnt.y']; % save state diff --git a/Vavmoudakis Actor Critic/reachabilityMain.m b/Vavmoudakis Actor Critic/reachabilityMain.m index e2b1664..3016739 100644 --- a/Vavmoudakis Actor Critic/reachabilityMain.m +++ b/Vavmoudakis Actor Critic/reachabilityMain.m @@ -50,12 +50,67 @@ function reachabilityMain() alphaa; alphac; amplitude; ... percent; ufinal; Wcfinal; delays}; +%% Set target +[xVal, yVal] = circle(-11.5,7.5,4);%(0,0,8); +figure +hold on; +plot(xVal,yVal, 'r'); +plot(x0state(1),x0state(2),'.b') +xlim([-20 40]) +ylim([-20 40]) +title('Initial Point(Blue) and Target(Red)') +pause(0.1); +hold off; + +% Retrieve actual points of the target +[~, xPoints] = size(xVal); +[~, yPoints] = size(yVal); + +allFinalStatesVector = []; + %% Call Kontoudis' algorithm -uvec = []; % when the reachability part comes here, before every Kontoudis() - % call we will reset uvec. -[stateVector, time] = Kontoudis(initializations, x_save_fnt, xfstate);%), uvec); +% tic +for i=1:xPoints + xfstate(1) = xVal(i); + tic % around 38sec per j iteration => around 1900 sec to finish all, aka 31.6 mins + for j=1:yPoints + uvec = []; + xfstate(2) = yVal(j); % instead of xfstate = [xVal(i) yVal(j)]'; + [stateVector, time] = Kontoudis(initializations, x_save_fnt, xfstate); + allFinalStatesVector = [allFinalStatesVector; stateVector(end, 1:2)]; + end + toc +end +% toc %% Plots +figure +hold on; +plot(xVal,yVal, 'r'); hold on; % target +plot(x0state(1),x0state(2),'.b'); hold on; % initial State +plot(allFinalStatesVector(1:end,1),allFinalStatesVector(1:end,2),'-b'); hold on; +xlim([-20 40]) +ylim([-20 40]) +title('Reachable Set') +grid on; hold off; + +figure +hold on; +plot(xVal,yVal, 'r'); hold on; +plot(x0state(1),x0state(2),'.b'); hold on; +% find closest point of the target to the initial point +dist = sqrt((x0state(1) - xVal(:,1))^2 + (x0state(2) - yVal(:,1))^2); +[minDist, indexOfMin] = min(dist); +closestX = xVal(indexOfMin); +closestY = yVal(indexOfMin); +% actually plot +plot(closestX, closestY, '-g'); hold on; +line([x0state(1), closestX], [x0state(2), closestY], 'LineWidth', 2, 'Color', 'g'); +xlim([-20 40]) +ylim([-20 40]) +title('Optimal Trajectory') +grid on; hold off; + % x1, x2 to show at the reachability DID happen; it reached xfstate figure set(gca,'FontSize',26); hold on; @@ -114,3 +169,16 @@ function reachabilityMain() grid on; hold off; end + +function [xVal, yVal] = circle(x,y,r) +% Creates a target circle and returns its points +% x and y are the coordinates of the center of the circle +% r is the radius of the circle +% 0.01 is the angle step, bigger values will draw the circle faster but +% you might notice imperfections (not very smooth) +ang = linspace(0,2*pi,50); %50 % ang = 0:0.04:2*pi; % 158*158 points with that +xp = r*cos(ang); +yp = r*sin(ang); +xVal = x+xp; +yVal = y+yp; +end From ac6673061cde1765db37d5cbfe5a8e9955efaec2 Mon Sep 17 00:00:00 2001 From: Constantinos Costopoulos Date: Tue, 19 Jul 2022 17:47:47 -0400 Subject: [PATCH 6/7] Finalize reachability implementation --- Vavmoudakis Actor Critic/Kontoudis.m | 2 - Vavmoudakis Actor Critic/reachabilityMain.m | 46 +++++++++++++-------- 2 files changed, 28 insertions(+), 20 deletions(-) diff --git a/Vavmoudakis Actor Critic/Kontoudis.m b/Vavmoudakis Actor Critic/Kontoudis.m index d7a5a75..ad985a3 100644 --- a/Vavmoudakis Actor Critic/Kontoudis.m +++ b/Vavmoudakis Actor Critic/Kontoudis.m @@ -11,9 +11,7 @@ options = odeset('RelTol',1e-5,'AbsTol',1e-5,'MaxStep',.01); for i=1:N -% tic; sol_fnt = ode45(@(t,x) actorCritic(t,x,xfstate,initializations),[(i-1)*T,i*T],currentState(end,:),options); -% toc; time = [time;sol_fnt.x']; % save time currentState = [currentState;sol_fnt.y']; % save state diff --git a/Vavmoudakis Actor Critic/reachabilityMain.m b/Vavmoudakis Actor Critic/reachabilityMain.m index 3016739..6e0d012 100644 --- a/Vavmoudakis Actor Critic/reachabilityMain.m +++ b/Vavmoudakis Actor Critic/reachabilityMain.m @@ -1,6 +1,5 @@ function reachabilityMain() global uvec -% Clc all close all; clc; @@ -62,26 +61,20 @@ function reachabilityMain() pause(0.1); hold off; -% Retrieve actual points of the target -[~, xPoints] = size(xVal); -[~, yPoints] = size(yVal); +% Retrieve actual number of points of the target +[~, xPoints] = size(xVal); % no need for yPoints cause they are the same allFinalStatesVector = []; %% Call Kontoudis' algorithm -% tic for i=1:xPoints - xfstate(1) = xVal(i); - tic % around 38sec per j iteration => around 1900 sec to finish all, aka 31.6 mins - for j=1:yPoints - uvec = []; - xfstate(2) = yVal(j); % instead of xfstate = [xVal(i) yVal(j)]'; - [stateVector, time] = Kontoudis(initializations, x_save_fnt, xfstate); - allFinalStatesVector = [allFinalStatesVector; stateVector(end, 1:2)]; - end + xfstate = [xVal(i) yVal(i)]'; + tic + uvec = []; + [stateVector, time] = Kontoudis(initializations, x_save_fnt, xfstate); + allFinalStatesVector = [allFinalStatesVector; stateVector(end, 1:2)]; toc end -% toc %% Plots figure @@ -94,13 +87,31 @@ function reachabilityMain() title('Reachable Set') grid on; hold off; +figure +hold on; +plot(xVal,yVal, 'r'); hold on; +plot(allFinalStatesVector(1:end,1),allFinalStatesVector(1:end,2),'-b'); hold on; +plot(x0state(1),x0state(2),'.b'); hold on; +% find closest point of the target to the initial point +dist = sqrt((x0state(1) - allFinalStatesVector(:,1)).^2 + (x0state(2) - allFinalStatesVector(:,2)).^2); +[~, indexOfMin] = min(dist); +closestX = xVal(indexOfMin); +closestY = yVal(indexOfMin); +% actually plot +plot(closestX, closestY, '-g'); hold on; +line([x0state(1), closestX], [x0state(2), closestY], 'LineWidth', 2, 'Color', 'g'); +xlim([-20 40]) +ylim([-20 40]) +title('Optimal Trajectory to Reachable Set = Target') +grid on; hold off; + figure hold on; plot(xVal,yVal, 'r'); hold on; plot(x0state(1),x0state(2),'.b'); hold on; % find closest point of the target to the initial point dist = sqrt((x0state(1) - xVal(:,1))^2 + (x0state(2) - yVal(:,1))^2); -[minDist, indexOfMin] = min(dist); +[~, indexOfMin] = min(dist); closestX = xVal(indexOfMin); closestY = yVal(indexOfMin); % actually plot @@ -108,7 +119,7 @@ function reachabilityMain() line([x0state(1), closestX], [x0state(2), closestY], 'LineWidth', 2, 'Color', 'g'); xlim([-20 40]) ylim([-20 40]) -title('Optimal Trajectory') +title('Optimal Trajectory to Target = Reachable Set') grid on; hold off; % x1, x2 to show at the reachability DID happen; it reached xfstate @@ -158,7 +169,6 @@ function reachabilityMain() xlabel('Time [s]');ylabel('Q*'); grid on; hold off; -% FUCK YALL, IT ASYMPTOTICALLY CONVERGES, BADABEEM BADABOOM BADABAM % u* from eq 15 figure @@ -176,7 +186,7 @@ function reachabilityMain() % r is the radius of the circle % 0.01 is the angle step, bigger values will draw the circle faster but % you might notice imperfections (not very smooth) -ang = linspace(0,2*pi,50); %50 % ang = 0:0.04:2*pi; % 158*158 points with that +ang = linspace(0,2*pi,100); xp = r*cos(ang); yp = r*sin(ang); xVal = x+xp; From b5f01c61afa8fa6933885a5196f00713a1d15500 Mon Sep 17 00:00:00 2001 From: Constantinos Costopoulos Date: Tue, 19 Jul 2022 20:08:23 -0400 Subject: [PATCH 7/7] Add comms --- Vavmoudakis Actor Critic/actorCritic.m | 4 ++++ Vavmoudakis Actor Critic/baby_fnt.m | 5 ++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/Vavmoudakis Actor Critic/actorCritic.m b/Vavmoudakis Actor Critic/actorCritic.m index 6479b8c..a797cf1 100644 --- a/Vavmoudakis Actor Critic/actorCritic.m +++ b/Vavmoudakis Actor Critic/actorCritic.m @@ -75,6 +75,10 @@ ecfinal = Pfinal - Wcfinal'*UkUfinal; % Critic approximator error final state ea = Wa'*U(1:2)+.5*inv(Quu)*Qux*U(1:2); % Actor approximator error % or x(1:2) - xfstate instead of U(1:2) % or ud instead of Wa'*U(1:2) +% BASICALLY, looking at eq30, ud = Wa'*U(1:2) = Wa'(x(1:2)-xfstate) wants +% to reach u* = inv(Quu)*Qux*U(1:2), that's why ea is defined like this. +% ud is slowly getting to u*, aka +% Wa'*U(1:2) -> inv(Quu)*Qux*U(1:2) % critic update diff --git a/Vavmoudakis Actor Critic/baby_fnt.m b/Vavmoudakis Actor Critic/baby_fnt.m index 6e89e85..cac2ee2 100644 --- a/Vavmoudakis Actor Critic/baby_fnt.m +++ b/Vavmoudakis Actor Critic/baby_fnt.m @@ -52,7 +52,10 @@ ecfinal = Pfinal - Wcfinal'*UkUfinal; % Critic approximator error final state ea = Wa'*U(1:2)+.5*inv(Quu)*Qux*U(1:2); % Actor approximator error % or x(1:2) - xfstate instead of U(1:2) % or ud instead of Wa'*U(1:2) - +% BASICALLY, looking at eq30, ud = Wa'*U(1:2) = Wa'(x(1:2)-xfstate) wants +% to reach u* = inv(Quu)*Qux*U(1:2), that's why ea is defined like this. +% ud is slowly getting to u*, aka +% Wa'*U(1:2) -> inv(Quu)*Qux*U(1:2) % critic update dWc = -alphac*((sigma./(sigma'*sigma+1).^2)*ec'+(UkUfinal./((UkUfinal'*UkUfinal+1).^2)*ecfinal')); % eq 22