optimizationbook/collocation_method.tex at main · YinTat/optimizationbook · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
\selectlanguage{american}%

\section{Collocation Method}

From the definition of (\ref{eq:def_T_d_operator}), we note that
$\mathcal{T}_{\varphi}(x)$ depends only on $x(t)$ at $t=c_{j}$.
Therefore, we can only need to calculate $\mathcal{T}_{\varphi}(x)(t)$
at $t=c_{j}$. To simplify the notation, for any $x\in\mathcal{C}([0,T],\R^{d})$,
we define a corresponding matrix $[x]\in\R^{d\times D}$ by $[x]_{i,j}=x_{i}(c_{j})$.
For any $d\times D$ matrix $X$, we define $F(X,c)$ as an $d\times D$
matrix
\begin{equation}
F(X,c)_{i,j}=F(X_{*,j},c_{j})_{i}.\label{eq:bF}
\end{equation}
where $X_{*,j}$ is the $j$-th column of $X$. We also define $A_{\varphi}$
as a $D\times D$ matrix
\begin{equation}
(A_{\varphi})_{i,j}=\int_{0}^{c_{j}}\varphi_{i}(s)\d s.\label{eq:Aphi}
\end{equation}
Note that $A_{\phi}$ can be pre-computed. By inspecting the definition
of (\ref{eq:def_T_d_operator}), (\ref{eq:bF}) and (\ref{eq:Aphi}),
we have that
\[
[\mathcal{T}_{\varphi}(x)]=v\cdot1_{D}^{\top}+F([x],c)A_{\varphi}
\]
where $1_{D}$ is a column of all 1 vector of length $D$. Hence,
we can apply the map $\mathcal{T}_{\varphi}$ by simply multiply $F([x],c)$
by a pre-compute $D\times D$ matrix $A_{\varphi}$. For the basis
we consider here, each iteration takes only $\tilde{O}(dD)$ which
is nearly linear in the size of our representation of the solution.

\begin{algorithm2e}[H]

\caption{$\mathtt{CollocationMethod\}}$}

\SetAlgoLined

Input: $F,v,T,\text{\ensuremath{\varphi}},c$.

Let $N=\left\lceil \log\left(\frac{T}{\epsilon}\max_{s\in[0,T]}\left\Vert F(v,s)\right\Vert \right)\right\rceil $.
Let $(A_{\varphi})_{i,j}=\int_{0}^{c_{j}}\varphi_{i}(s)\d s.$

$X^{(0)}\leftarrow v\cdot1_{D}^{\top}.$

For $j=1,2,\cdots,N-1$:
\[
X^{(j)}\leftarrow v\cdot1_{D}^{\top}+F(X^{(j-1)},c)A_{\varphi}.
\]

$x^{(N)}(t)\leftarrow v+\int_{0}^{t}\sum_{i=1}^{D}F(X_{*,i}^{(N)},c_{i})\varphi_{i}(s)\mathrm{d}s$.

\Return $x^{(N)}$.

\end{algorithm2e}

We state our guarantee for a first-order ODE. It can be extended to
a $k$'th order ODE.
\begin{thm}
\label{thm:first_order_ode} Let $x^{*}(t)$ be the solution of an
$d$ dimensional ODE
\begin{align*}
x(0)=v,\frac{\d x(t)}{\d t}=F(x(t),t)\quad\text{for all \ensuremath{0\leq t\leq T}}.
\end{align*}
We are given a $D$ dimensional subspace $\mathcal{V}\subset\mathcal{C}([0,T],\R)$,
node points $\{c_{j}\}_{j=1}^{D}\subset[0,T]$ and a $\gamma_{\varphi}$
bounded basis $\{\varphi_{j}\}_{j=1}^{D}\subset\mathcal{V}$ (Definition
\ref{def:basis}). Let $L$ and $\epsilon>0$ be such that there exists
a function $q\in\mathcal{V}$ such that
\begin{align*}
\left\Vert q(t)-\frac{\d}{\d t}x^{*}(t)\right\Vert \leq\frac{\epsilon}{T},\forall t\in[0,T]
\end{align*}
and for any $y,z\in\R^{d}$,
\begin{align*}
\|F(y,t)-F(z,t)\|\leq L\|y-z\|,\forall t\in[0,T].
\end{align*}
Assume $\gamma_{\varphi}LT\leq1/2$. Then Algorithm $\textsc{CollocationMethod}$
outputs a function $x^{(N)}\in\mathcal{V}$ such that
\begin{align*}
\max_{t\in[0,T]}\|x^{(N)}(t)-x^{*}(t)\|\leq20\gamma_{\varphi}\epsilon
\end{align*}
using $O\left(D\log\left(\frac{T}{\epsilon}\max_{s\in[0,T]}\left\Vert F(v,s)\right\Vert \right)\right)$
evaluations of $F$.
\end{thm}


\subsection{Proof for first-order ODE}

First, we bound the Lipschitz constant of the map $\T_{\varphi}$.
Unlike in the Picard-Lindelöf theorem, we are not able to get an improved
bound of the Lipschitz constant by composing many copies of $\T_{\varphi}$.
Fortunately, the Lipschitz constant of the map $\T_{\varphi}$ can
be bounded.
\begin{lem}
\label{lem:Lipschitz_Tphi} Given any norm $\|\cdot\|$ on $\R^{d}$.
Let $L$ be the Lipschitz constant of $F$ in $x$, i.e.
\[
\|F(x,s)-F(y,s)\|\leq L\|x-y\|\quad\text{for all }x,y\in\R^{d},s\in[0,T].
\]
Then, the Lipschitz constant of $\T_{\varphi}$ in this norm is upper
bounded by $\gamma_{\varphi}LT$.
\end{lem}

\begin{proof}
For any $0\leq t\leq T$,
\begin{align*}
\|\T_{\varphi}(x)(t)-\T_{\varphi}(y)(t)\|= & ~\left\Vert \int_{0}^{t}\sum_{j=1}^{D}F(x(c_{j}),c_{j})\varphi_{j}(s)\d s-\int_{0}^{t}\sum_{j=1}^{D}F(y(c_{j}),c_{j})\varphi_{j}(s)\d s\right\Vert \\
\leq & ~\sum_{j=1}^{D}\left|\int_{0}^{t}\varphi_{j}(s)\d s\right|\cdot\max_{t\in[0,T]}\|F(x(t),t)-F(y(t),t)\|\\
\leq & ~\gamma_{\varphi}LT\cdot\max_{t\in[0,T]}\|x(t)-y(t)\|\\
\leq & ~\gamma_{\varphi}LT\|x-y\|.
\end{align*}
where the third step follows using $\sum_{j=1}^{D}|\int_{0}^{t}\varphi_{j}(s)\d s|\leq\gamma_{\varphi}T$
for all $0\leq t\leq T$.
\end{proof}
For the rest of the proof, let $x_{\varphi}^{*}$ denote the fixed
point of $\T_{\varphi}$, i.e., $\T_{\varphi}(x_{\varphi}^{*})=x_{\varphi}^{*}$.
The Banach fixed point theorem and Lemma \ref{lem:Lipschitz_Tphi}
imply that $x_{\varphi}^{*}$ exists and is unique if $T\leq\frac{1}{L\gamma_{\varphi}}$.

Let $x^{*}$ denote the solution of the ODE, i.e., the fixed point
of $\T$, with $\T(x^{*})=x^{*}$. Let $x^{(0)}$ denote the initial
solution given by $x^{(0)}(t)=v$ and $x^{(N)}$ denote the solution
obtained by applying operator $\T_{\varphi}$ for $N$ times. Note
that $x^{(N)}(t)$ is the output of \textsc{CollocationMethod}.

Let $q\in\mathcal{V}$ denote an approximation of $\frac{\d}{\d t}x^{*}$
such that
\begin{align*}
\left\Vert q(t)-\frac{\d}{\d t}x^{*}(t)\right\Vert \leq & ~\frac{\epsilon}{T}.\forall t\in[0,T]
\end{align*}

The next lemma summarizes how these objects are related and will allow
us to prove the main guarantee for first-order ODEs.
\begin{lem}
\label{lem:helper} %\tat{Check at the end if we always use the first three lemma in order.}
Let $L^{(j)}$ be the Lipschitz constant of the map $\T_{\varphi}^{\circ j}$.
Assume that $L^{(N)}\leq1/2$. Then, we have
\begin{eqnarray}
\|x^{(N)}-x^{*}\| & \leq & L^{(N)}\|x^{(0)}-x^{*}\|+2\|x_{\varphi}^{*}-x^{*}\|,\label{cla:ode_1_claim_1}\\
\|x_{\varphi}^{*}-x^{*}\| & \leq & 2\cdot\|\T_{\varphi}^{\circ N}(x^{*})-x^{*}\|,\label{cla:ode_1_claim_2}\\
\|x^{*}-\T_{\varphi}^{\circ N}(x^{*})\| & \leq & \sum_{i=0}^{N-1}L^{(i)}\cdot\|x^{*}-\T_{\varphi}(x^{*})\|,\label{cla:ode_1_claim_3}\\
\|x^{*}-\T_{\varphi}(x^{*})\| & \leq & 2\gamma_{\varphi}\cdot\epsilon.\label{cla:ode_1_claim_4}
\end{eqnarray}
\end{lem}

\begin{proof}
We prove the claims in order. For the first claim,
\begin{align*}
\|x^{(N)}-x^{*}\|\leq & ~\|x^{(N)}-x_{\varphi}^{*}\|+\|x_{\varphi}^{*}-x^{*}\| & \text{~by~triangle~inequality}\\
= & ~\|\T_{\varphi}^{\circ N}(x^{(0)})-\T_{\varphi}^{\circ N}(x_{\varphi}^{*})\|+\|x_{\varphi}^{*}-x^{*}\|\\
\leq & ~L^{(N)}\|x^{(0)}-x_{\varphi}^{*}\|+\|x_{\varphi}^{*}-x^{*}\|\\
\leq & ~L^{(N)}\|x^{(0)}-x^{*}\|+L^{(N)}\|x^{*}-x_{\varphi}^{*}\|+\|x_{\varphi}^{*}-x^{*}\|\\
\leq & ~L^{(N)}\|x^{(0)}-x^{*}\|+2\|x_{\varphi}^{*}-x^{*}\|
\end{align*}
where the last step follows by $L^{(N)}\leq1$.

For the second claim,
\begin{align*}
\|x_{\varphi}^{*}-x^{*}\|= & ~\|\T_{\varphi}^{\circ N}(x_{\varphi}^{*})-x^{*}\| & \text{~by~}x_{\varphi}^{*}=\T_{\varphi}^{\circ N}(x_{\varphi}^{*})\\
\leq & ~\|\T_{\varphi}^{\circ N}(x_{\varphi}^{*})-\T_{\varphi}^{\circ N}(x^{*})\|+\|\T_{\varphi}^{\circ N}(x^{*})-x^{*}\| & \text{~by~triangle~inequality}\\
\leq & ~L^{(N)}\cdot\|x_{\varphi}^{*}-x^{*}\|+\|\T_{\varphi}^{\circ N}(x^{*})-x^{*}\| & \text{~by the definition of \ensuremath{L^{(N)}}}\\
\leq & ~\frac{1}{2}\|x_{\varphi}^{*}-x^{*}\|+\|\T_{\varphi}^{\circ N}(x^{*})-x^{*}\| & \text{~by~}L^{(N)}\leq1/2
\end{align*}
For the third claim,
\begin{align*}
\|x^{*}-\T_{\varphi}^{\circ N}(x^{*})\|\leq & ~\sum_{i=0}^{N-1}\|\T_{\varphi}^{\circ i}(x^{*})-\T_{\varphi}^{\circ(i+1)}(x^{*})\|\\
\leq & ~\sum_{i=0}^{N-1}L^{(i)}\cdot\|x^{*}-\T_{\varphi}(x^{*})\|.
\end{align*}
For the last claim,
\begin{align*}
~\|x^{*}(t)-\T_{\varphi}(x^{*})(t)\|= & ~\|\T(x^{*})(t)-\T_{\varphi}(x^{*})(t)\|\\
%=
\end{align*}
where the first step uses ${\cal T}(x^{*})=x^{*}$, the second step
follows by the definition of ${\cal T}$ and ${\cal T}_{\varphi}$,
the third step follows by $x^{*}(t)$ is the solution of ODE, the
fourth step follows by triangle inequality, the second last step uses
$q\in\mathcal{V}$, and the last step follows from the assumption
$\|\frac{\d}{\d t}x^{*}-q\|\leq\frac{\epsilon}{T}$ and the definition
of $\gamma_{\varphi}$.
\end{proof}
Now, we are ready to prove Theorem~\ref{thm:first_order_ode}.
\begin{proof}
Using Lemma \ref{lem:helper}, we have
\begin{align*}
\|x^{(N)}-x^{*}\|\leq & ~L^{(N)}\|x^{(0)}-x^{*}\|+2\|x_{\varphi}^{*}-x^{*}\| & \text{~by~Eq.~(\ref{cla:ode_1_claim_1})}\\
\leq & ~L^{(N)}\|x^{(0)}-x^{*}\|+4\|\T_{\varphi}^{\circ N}(x^{*})-x^{*}\| & \text{~by~Eq.~(\ref{cla:ode_1_claim_2})}\\
\leq & ~L^{(N)}\|x^{(0)}-x^{*}\|+4\sum_{i=0}^{N-1}L^{(i)}\cdot\|x^{*}-\T_{\varphi}(x^{*})\| & \text{~by~Eq.~(\ref{cla:ode_1_claim_3})}\\
\leq & ~L^{(N)}\|x^{(0)}-x^{*}\|+8\sum_{i=0}^{N-1}L^{(i)}\cdot\gamma_{\varphi}\cdot\epsilon. & \text{~by~Eq.~(\ref{cla:ode_1_claim_4})}
\end{align*}
Using the assumption that $\gamma_{\varphi}LT\leq\frac{1}{2}$, Lemma
\ref{lem:Lipschitz_Tphi} shows that $L^{(1)}\leq\frac{1}{2}$ and
hence $L^{(j)}\leq\frac{1}{2^{j}}$. Therefore, we have
\begin{equation}
\|x^{(N)}-x^{*}\|\leq\frac{1}{2^{N}}\|x^{(0)}-x^{*}\|+16\gamma_{\varphi}\cdot\epsilon=\frac{1}{2^{N}}\|x^{*}-x^{*}(0)\|+16\gamma_{\varphi}\cdot\epsilon\label{eq:first_order_error}
\end{equation}
To bound $\|x^{*}-x^{*}(0)\|$, for any $0\leq t\leq T$
\[
x^{*}(t)=x^{*}(0)+\int_{0}^{t}F(x^{*}(s),s)\d s.
\]
Hence, we have that
\begin{align*}
\|x^{*}(t)-x^{*}(0)\| & \leq\left\Vert \int_{0}^{T}F(x^{*}(0),s)\d s\right\Vert +\left\Vert \int_{0}^{t}\left(F(x^{*}(s),s)-F(x^{*}(0),s)\right)\d s\right\Vert \\
 & \leq\left\Vert \int_{0}^{T}F(x^{*}(0),s)\d s\right\Vert +L\int_{0}^{t}\|x^{*}(s)-x^{*}(0)\|\d s.
\end{align*}
Solving this integral inequality (see Lemma~\ref{lem:exp_ODE}),
we have that
\[
\|x^{*}(t)-x^{*}(0)\|\leq e^{Lt}\left\Vert \int_{0}^{T}F(x^{*}(0),s)\d s\right\Vert .
\]
Now, we use $LT\leq\frac{1}{2}$ and get
\[
\|x^{*}(t)-x^{*}(0)\|\leq2\left\Vert \int_{0}^{T}F(x^{*}(0),s)\d s\right\Vert .
\]
Picking $N=\left\lceil \log_{2}\left(\frac{T}{\epsilon}\max_{s\in[0,T]}\left\Vert F(x^{*}(0),s)\right\Vert \right)\right\rceil $,
(\ref{eq:first_order_error}) shows that the error is less than $20\gamma_{\varphi}\epsilon$.
\end{proof}
\begin{lem}
\label{lem:exp_ODE} Given a continuous function $v(t)$ and positive
scalars $\beta,\gamma$ such that
\[
0\leq v(t)\leq\beta+\gamma\int_{0}^{t}v(s)\d s.
\]
We have that $v(t)\leq\beta e^{\gamma t}$ for all $t\geq0$.
\end{lem}

\begin{xca}
Prove the above lemma.
\end{xca}


\subsection{}\selectlanguage{english}%