diff --git a/Vavmoudakis Actor Critic/Kontoudis.m b/Vavmoudakis Actor Critic/Kontoudis.m new file mode 100644 index 0000000..ad985a3 --- /dev/null +++ b/Vavmoudakis Actor Critic/Kontoudis.m @@ -0,0 +1,25 @@ +function [currentState, time] = Kontoudis(initializations, currentState, xfstate)%, uvec) +global uvec + +%% Decrypt Init Vars +T = initializations{6}; +N = initializations{8}; + +time = []; + +%% Solve ODE +options = odeset('RelTol',1e-5,'AbsTol',1e-5,'MaxStep',.01); + +for i=1:N + sol_fnt = ode45(@(t,x) actorCritic(t,x,xfstate,initializations),[(i-1)*T,i*T],currentState(end,:),options); + + time = [time;sol_fnt.x']; % save time + currentState = [currentState;sol_fnt.y']; % save state + + uDelayInterpFcn = interp2PWC(uvec,0,i*T); % interpolate control input + x1DelayInterpFcn = interp2PWC(currentState(:,1),0,i*T); + x2DelayInterpFcn = interp2PWC(currentState(:,2),0,i*T); + delays = {uDelayInterpFcn; x1DelayInterpFcn; x2DelayInterpFcn}; + initializations(end) = {delays}; +end +end diff --git a/Vavmoudakis Actor Critic/actorCritic.m b/Vavmoudakis Actor Critic/actorCritic.m new file mode 100644 index 0000000..a797cf1 --- /dev/null +++ b/Vavmoudakis Actor Critic/actorCritic.m @@ -0,0 +1,101 @@ +function [dotx] = actorCritic(t,x, xfstate, params)%, uvec) +global uvec + +%% Decompose Parameters Cell +A = params(1); +B = params(2); +M = params(3); +R = params(4); +Pt = params(5); +T = params(6); +Tf = params(7); +alphaa = params(9); +alphac = params(10); +amplitude = params(end-4); +percent = params(end-3); +ufinal = params(end-2); +Wcfinal = params(end-1); +delays = params(end); +% Note that every variable above is a struct, thus we get its value by +% using the {} operator or by using cell2mat +A = cell2mat(A); +B = cell2mat(B); +M = cell2mat(M); +R = cell2mat(R); +T = cell2mat(T); +Wcfinal = cell2mat(Wcfinal); +ufinal = cell2mat(ufinal); + +uDelayInterpFcn = delays{1, 1}(1); +x1DelayInterpFcn = delays{1, 1}(2); +x2DelayInterpFcn = delays{1, 1}(3); + +Wc = [x(3);x(4);x(5);x(6);x(7);x(8)]; %critic +Wa = [x(9);x(10)]; %actor +p = x(11); %integral + +UkUfinal = [xfstate(1)^2 ; xfstate(1)*xfstate(2); xfstate(1)*ufinal; xfstate(2)^2 ; xfstate(2)*ufinal; ufinal^2]; +Pfinal = 0.5*(xfstate)'*Pt{1}*(xfstate); % equation 19 + +% Update control +ud = Wa'*(x(1:2)-xfstate); % equation 17 + + +% State and control delays +uddelay = ppval(uDelayInterpFcn{1},t-T); +xdelay(1) = ppval(x1DelayInterpFcn{1},t-T); +xdelay(2) = ppval(x2DelayInterpFcn{1},t-T); + +%Augmented state and Kronecker products +U = [x(1:2)-xfstate;ud-ufinal]; % Augmented state +UkU = [U(1)^2 ; U(1)*U(2); U(1)*ud; U(2)^2 ; U(2)*ud; ud^2]; % U kron U +% This is not the exact UkronU (9x1 matrix), but we do it to lower the +% dimensionality. This way, Wc' would be 1x9, so the matrix multiplication +% can actually happen in ec. +UkUdelay = [xdelay(1)^2 ; xdelay(1)*xdelay(2); xdelay(1)*uddelay; xdelay(2)^2 ;... + xdelay(2)*uddelay; uddelay^2]; + +Qxx = x(3:5); % equations 14-16 +Quu = x(8); +Qxu = [x(6) x(7)]'; +Qux = Qxu'; + +sigma = UkU - UkUdelay; + +% integral reinforcement +dp = 0.5*((x(1:2)-xfstate)'*M*(x(1:2)-xfstate) -xdelay(1:2)*M*xdelay(1:2)' ... + +ud'*R*ud - uddelay'*R*uddelay); + +% mine +QStar = Wc'*UkU; +uStar = -inv(Quu)*Qux*U(1:2); + +% Approximation Errors +ec = p + Wc'*UkU - Wc'*UkUdelay; % Critic approximator error +ecfinal = Pfinal - Wcfinal'*UkUfinal; % Critic approximator error final state +ea = Wa'*U(1:2)+.5*inv(Quu)*Qux*U(1:2); % Actor approximator error +% or x(1:2) - xfstate instead of U(1:2) % or ud instead of Wa'*U(1:2) +% BASICALLY, looking at eq30, ud = Wa'*U(1:2) = Wa'(x(1:2)-xfstate) wants +% to reach u* = inv(Quu)*Qux*U(1:2), that's why ea is defined like this. +% ud is slowly getting to u*, aka +% Wa'*U(1:2) -> inv(Quu)*Qux*U(1:2) + + +% critic update +dWc = -alphac{1}*((sigma./(sigma'*sigma+1).^2)*ec'+(UkUfinal./((UkUfinal'*UkUfinal+1).^2)*ecfinal')); % eq 22 + +% actor update +dWa = -alphaa{1}*U(1:2)*ea'; % equation 23 + +% Persistent Excitation +if t<=(percent{1}/100)*Tf{1} + unew=(ud+amplitude{1}*exp(-0.009*t)*2*(sin(t)^2*cos(t)+sin(2*t)^2*cos(0.1*t)+... + sin(-1.2*t)^2*cos(0.5*t)+sin(t)^5+sin(1.12*t)^2+cos(2.4*t)*sin(2.4*t)^3)); +else + unew=ud; +end + +dx=A*[U(1);U(2)]+B*unew; +uvec = [uvec;unew]; +dotx = [dx;dWc;dWa;dp;QStar;uStar]; % augmented state +end diff --git a/Vavmoudakis Actor Critic/baby_fnt.m b/Vavmoudakis Actor Critic/baby_fnt.m index 6e89e85..cac2ee2 100644 --- a/Vavmoudakis Actor Critic/baby_fnt.m +++ b/Vavmoudakis Actor Critic/baby_fnt.m @@ -52,7 +52,10 @@ ecfinal = Pfinal - Wcfinal'*UkUfinal; % Critic approximator error final state ea = Wa'*U(1:2)+.5*inv(Quu)*Qux*U(1:2); % Actor approximator error % or x(1:2) - xfstate instead of U(1:2) % or ud instead of Wa'*U(1:2) - +% BASICALLY, looking at eq30, ud = Wa'*U(1:2) = Wa'(x(1:2)-xfstate) wants +% to reach u* = inv(Quu)*Qux*U(1:2), that's why ea is defined like this. +% ud is slowly getting to u*, aka +% Wa'*U(1:2) -> inv(Quu)*Qux*U(1:2) % critic update dWc = -alphac*((sigma./(sigma'*sigma+1).^2)*ec'+(UkUfinal./((UkUfinal'*UkUfinal+1).^2)*ecfinal')); % eq 22 diff --git a/Vavmoudakis Actor Critic/interp2PWC.m b/Vavmoudakis Actor Critic/interp2PWC.m index e33f00b..43bc8af 100644 --- a/Vavmoudakis Actor Critic/interp2PWC.m +++ b/Vavmoudakis Actor Critic/interp2PWC.m @@ -2,8 +2,8 @@ % piecewise continuous function given the initial and final time provided function [f] = interp2PWC(y,xi,xf) -[m,n] = size(y); % get size of the input vec +[m,~] = size(y); % get size of the input vec x = linspace(xi,xf,m); %create x vector corresponding to y -[c,ia,ic] = unique(y,'stable'); % figure out who and where are the unique points +[c,ia,~] = unique(y,'stable'); % figure out who and where are the unique points f = mkpp([xi x(ia(2:end)') xf],c); % interpolate in a piecewise continuous fcn with corresponding time vec -end \ No newline at end of file +end diff --git a/Vavmoudakis Actor Critic/main.m b/Vavmoudakis Actor Critic/main.m index 6c7d268..68667d5 100644 --- a/Vavmoudakis Actor Critic/main.m +++ b/Vavmoudakis Actor Critic/main.m @@ -11,7 +11,8 @@ global T Tf % ODE paramters global alphaa alphac % GD parameters global amplitude percent % Noise parameters -global uvec uDelayInterpFcn x1DelayInterpFcn x2DelayInterpFcn sigma UkU UkUdelay +global uvec uDelayInterpFcn x1DelayInterpFcn x2DelayInterpFcn + A = [-1 0;0 -2]; B = [0 1]'; [n,m]=size(B); % States and controls @@ -51,9 +52,6 @@ t_save_fnt = []; % time vec x_save_fnt = [x0state;Wc0;Wa0;p0;QStar0;uStar0]';%;xfstate]'; % build the initial cond state vec. This is x initially -sigma_save_fnt = []; -UkU_save = []; -UkUdelay_save =[]; uDelayInterpFcn = interp2PWC(0,0,1); x1DelayInterpFcn = interp2PWC(x_save_fnt(:,1),0,0); @@ -64,15 +62,11 @@ for i=1:N options = odeset('RelTol',1e-5,'AbsTol',1e-5,'MaxStep',.01); % 'OutputFcn',@odeplot, tic; - sol_fnt = ode23(@baby_fnt,[(i-1)*T,i*T],x_save_fnt(end,:),options); + sol_fnt = ode45(@baby_fnt,[(i-1)*T,i*T],x_save_fnt(end,:),options); toc; t_save_fnt = [t_save_fnt;sol_fnt.x']; % save time x_save_fnt = [x_save_fnt;sol_fnt.y']; % save state - sigma_save_fnt = [sigma_save_fnt sigma]; - UkU_save = [UkU_save [sqrt(UkU(1)); sqrt(UkU(4))]]; - UkUdelay_save =[UkUdelay_save [sqrt(UkUdelay(1)); sqrt(UkUdelay(4))]]; - uDelayInterpFcn = interp2PWC(uvec,0,i*T); % interpolate control input x1DelayInterpFcn = interp2PWC(x_save_fnt(:,1),0,i*T); diff --git a/Vavmoudakis Actor Critic/reachabilityMain.m b/Vavmoudakis Actor Critic/reachabilityMain.m new file mode 100644 index 0000000..6e0d012 --- /dev/null +++ b/Vavmoudakis Actor Critic/reachabilityMain.m @@ -0,0 +1,194 @@ +function reachabilityMain() +global uvec +close all; +clc; + +%% Dynamics and System Parameters +A = [-1 0;0 -2]; +B = [0 1]'; +[~,m] = size(B); % States and controls +M = diag([1,.1]); +R = .1; +Pt = diag([.5,.5]); % Ricatti at final time + +% ODEs parameters +T = 0.05; % Delay +Tf = 6; % Finite horizon in seconds +N = Tf/T; % Number of simulation rounds + +% Gradient descent parameters +alphac = 90; +alphaa = 1.5; + +% Noise paramaters +amplitude = .1; % changes the amplitude of the PE (kyriakos pe) +percent = 50; + +Wc0 = [rand(5,1);10*rand(1)]; % critic weights +Wa0 = .5*ones(2,1); % actor weights +ufinal = 0.001; +Wcfinal = 12*ones(6,1); + +% Initial condition of the augmented system +x0state = [32 -15]'; % initial state +xfstate = [-11.5 7.5]'; % final state + +p0 = x0state'*M*x0state; % integral RL, initil control 0 % from equation 8 + +QStar0 = rand(m); +uStar0 = rand(m); + +x_save_fnt = [x0state;Wc0;Wa0;p0;QStar0;uStar0]'; % build the initial cond state vec. This is x initially + +uDelayInterpFcn = interp2PWC(0,0,1); +x1DelayInterpFcn = interp2PWC(x_save_fnt(:,1),0,0); +x2DelayInterpFcn = interp2PWC(x_save_fnt(:,2),0,0); + +delays = {uDelayInterpFcn; x1DelayInterpFcn; x2DelayInterpFcn}; +initializations = {A; B; M; R; Pt; T; Tf; N; ... + alphaa; alphac; amplitude; ... + percent; ufinal; Wcfinal; delays}; + +%% Set target +[xVal, yVal] = circle(-11.5,7.5,4);%(0,0,8); +figure +hold on; +plot(xVal,yVal, 'r'); +plot(x0state(1),x0state(2),'.b') +xlim([-20 40]) +ylim([-20 40]) +title('Initial Point(Blue) and Target(Red)') +pause(0.1); +hold off; + +% Retrieve actual number of points of the target +[~, xPoints] = size(xVal); % no need for yPoints cause they are the same + +allFinalStatesVector = []; + +%% Call Kontoudis' algorithm +for i=1:xPoints + xfstate = [xVal(i) yVal(i)]'; + tic + uvec = []; + [stateVector, time] = Kontoudis(initializations, x_save_fnt, xfstate); + allFinalStatesVector = [allFinalStatesVector; stateVector(end, 1:2)]; + toc +end + +%% Plots +figure +hold on; +plot(xVal,yVal, 'r'); hold on; % target +plot(x0state(1),x0state(2),'.b'); hold on; % initial State +plot(allFinalStatesVector(1:end,1),allFinalStatesVector(1:end,2),'-b'); hold on; +xlim([-20 40]) +ylim([-20 40]) +title('Reachable Set') +grid on; hold off; + +figure +hold on; +plot(xVal,yVal, 'r'); hold on; +plot(allFinalStatesVector(1:end,1),allFinalStatesVector(1:end,2),'-b'); hold on; +plot(x0state(1),x0state(2),'.b'); hold on; +% find closest point of the target to the initial point +dist = sqrt((x0state(1) - allFinalStatesVector(:,1)).^2 + (x0state(2) - allFinalStatesVector(:,2)).^2); +[~, indexOfMin] = min(dist); +closestX = xVal(indexOfMin); +closestY = yVal(indexOfMin); +% actually plot +plot(closestX, closestY, '-g'); hold on; +line([x0state(1), closestX], [x0state(2), closestY], 'LineWidth', 2, 'Color', 'g'); +xlim([-20 40]) +ylim([-20 40]) +title('Optimal Trajectory to Reachable Set = Target') +grid on; hold off; + +figure +hold on; +plot(xVal,yVal, 'r'); hold on; +plot(x0state(1),x0state(2),'.b'); hold on; +% find closest point of the target to the initial point +dist = sqrt((x0state(1) - xVal(:,1))^2 + (x0state(2) - yVal(:,1))^2); +[~, indexOfMin] = min(dist); +closestX = xVal(indexOfMin); +closestY = yVal(indexOfMin); +% actually plot +plot(closestX, closestY, '-g'); hold on; +line([x0state(1), closestX], [x0state(2), closestY], 'LineWidth', 2, 'Color', 'g'); +xlim([-20 40]) +ylim([-20 40]) +title('Optimal Trajectory to Target = Reachable Set') +grid on; hold off; + +% x1, x2 to show at the reachability DID happen; it reached xfstate +figure +set(gca,'FontSize',26); hold on; +plot(time,stateVector(1:end-1,1),'LineWidth',2,'Color', 'b'); hold on; +plot(time,stateVector(1:end-1,2),'LineWidth',2,'Color', 'm'); hold on; +xlabel('Time [s]'); ylabel('States'); legend('x_1','x_2'); +grid on; hold off; + +figure +set(gca,'FontSize',26) +hold on; +for i=1:6 + plot(time,stateVector(1:end-1,i+2),'-','LineWidth',2) + hold on; +end +xlabel('Time [s]');ylabel('Critic Weights W_c'); +legend('Wc_1','Wc_2','Wc_3','Wc_4','Wc_5','Wc_6'); +grid on; +hold off; + +% mine +figure +set(gca,'FontSize',26) +hold on; +plot(time,stateVector(1:end-1,9),'-','LineWidth',2) +plot(time,stateVector(1:end-1,10),'-','LineWidth',2) +xlabel('Time [s]');ylabel('Actor Weights W_a'); +legend('Wa_1','Wa_2'); +grid on; +hold off; + +% For the Q = Value function, either plot equation 19 or Q* before eq 16 +% Equation 19 is a number +valueFunctionLastExpectedValue = xfstate'*Pt*xfstate; % integral RL, initil control 0 % from equation 8 +disp(valueFunctionLastExpectedValue) + +valueFunctionLastActualValue = [stateVector(end,1); stateVector(end,2)]'*Pt*[stateVector(end,1); stateVector(end,2)]; % integral RL, initil control 0 % from equation 8 +disp(valueFunctionLastActualValue) + +% Q* before eq 16 +figure +set(gca,'FontSize',26) +hold on; +plot(time,stateVector(1:end-1,12),'-','LineWidth',2) +xlabel('Time [s]');ylabel('Q*'); +grid on; +hold off; + +% u* from eq 15 +figure +set(gca,'FontSize',26) +hold on; +plot(time,stateVector(1:end-1, 13),'-','LineWidth',2) +xlabel('Time [s]');ylabel('u*'); +grid on; +hold off; +end + +function [xVal, yVal] = circle(x,y,r) +% Creates a target circle and returns its points +% x and y are the coordinates of the center of the circle +% r is the radius of the circle +% 0.01 is the angle step, bigger values will draw the circle faster but +% you might notice imperfections (not very smooth) +ang = linspace(0,2*pi,100); +xp = r*cos(ang); +yp = r*sin(ang); +xVal = x+xp; +yVal = y+yp; +end