diff --git a/ODE1.py b/ODE1.py
index 80698d3..60a3a31 100644
--- a/ODE1.py
+++ b/ODE1.py
@@ -48,6 +48,8 @@ def train_step(model, optimizer, x):
 
 bar = tqdm()
 keyboard.on_press_key("q", press_key_exit)
+
+
 while True:
     if ready_to_exit:
         break
diff --git a/ODE2_test.py b/ODE2_test.py
index 08cbcb0..6e43a2a 100644
--- a/ODE2_test.py
+++ b/ODE2_test.py
@@ -1,3 +1,5 @@
+from ODE_wrapper import WrapperODE
+
 from tqdm import tqdm
 import numpy as np
 import tensorflow as tf
@@ -9,44 +11,48 @@
 from scipy.integrate import solve_ivp
 tf.keras.backend.set_floatx('float64')
 
-
 ready_to_exit = False
 
 
 def press_key_exit(_q):
     global ready_to_exit
-    ready_to_exit=True
+    ready_to_exit = True
+
+
+def setup_ode(model):
+    ode = WrapperODE(model)
+    func = lambda x: ode.d(x, 2) - 6*tf.math.square(ode.y(x))
+    ode.set_function(func)
+    ode.set_start_values(np.asarray([1, -2]))
+    return ode
+
 
+def train_step(model, optimizer, ode, x):
 
-def train_step(model, optimizer, x):
-    N=x.shape[0]
     with tf.GradientTape(persistent=True) as g:
         g.watch(x)
-        with tf.GradientTape() as gg:
-            gg.watch(x)
-            y=model(x)
-            dy = gg.gradient(y, x)
-        ddy=g.gradient(dy, x)
-        residual=ddy-6*tf.math.square(y)
-        loss = tf.math.square(residual)
-        start_loss = (tf.math.square(y[0]-1)+tf.math.square(dy[0]+2))*N
-        start_loss=tf.concat([start_loss, tf.zeros(N-1,dtype='float64')], 0)
-        total_loss = loss+start_loss
+
+        [common_loss, start_loss] = ode.loss(x)
+        total_loss = common_loss + start_loss
+
         gradients = g.gradient(total_loss, model.trainable_variables)
         optimizer.apply_gradients(zip(gradients, model.trainable_variables))
-    return float(tf.math.reduce_mean(loss)),float(tf.math.reduce_mean(start_loss))
+    return float(tf.math.reduce_mean(common_loss)), float(tf.math.reduce_mean(start_loss))
 
 
 N = 500
-T=2
+T = 2
 gx = np.linspace(1,T,N)
 
+
 model = keras.models.Sequential()
 model.add(layers.InputLayer(input_shape=(1,)))
 for k in range(5):
     model.add(layers.Dense(50, activation='elu'))
 model.add(layers.Dense(1, use_bias = False))
 
+ode = setup_ode(model)
+
 lr_schedule = keras.optimizers.schedules.ExponentialDecay(0.001,decay_steps=100,decay_rate=0.99)
 
 optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
@@ -59,7 +65,7 @@ def train_step(model, optimizer, x):
     if ready_to_exit:
         break
     grid_tf_run=tf.expand_dims(gx,axis=1)
-    loss = train_step(model, optimizer, grid_tf_run)
+    loss = train_step(model, optimizer, ode, grid_tf_run)
     if (sum(loss)<min_loss):
         min_loss=sum(loss)
         best_model.set_weights(model.get_weights()) 
@@ -68,13 +74,13 @@ def train_step(model, optimizer, x):
 
 #solve by Runge kutta
 def diff(t, z):
-    return [z[1],6*z[0]*z[0]]
+    return [z[1], 6*z[0]*z[0]]
 sol = solve_ivp(diff,[1, T], [1,-2],dense_output=True)
 z = sol.sol(gx)
 y = best_model(gx)
 plt.plot(gx,np.squeeze(y))
 plt.plot(gx, z[0].T)
 plt.plot(gx, 1.0/gx**2)
-plt.title('Solving equation $y\'\'=y^2$')
+plt.title('Solving equation $y\'\'=6y^2$')
 plt.legend(['Tensorflow','Runge-Kutta','Theoretical'])
-plt.show()
+plt.show()
\ No newline at end of file
diff --git a/ODE_wrapper.py b/ODE_wrapper.py
new file mode 100644
index 0000000..17689e7
--- /dev/null
+++ b/ODE_wrapper.py
@@ -0,0 +1,81 @@
+import tensorflow as tf
+import numpy as np
+
+class WrapperODE(object):
+    """docstring"""
+
+    def __init__(self, model):
+        """Constructor"""
+        self.__model = model
+        self.__function = None
+        self.__start_values = None
+        self.__derivatives = []
+        self.__keep_derivatives = False
+
+
+    def set_function(self, function):
+        self.__function = function
+
+
+    def set_start_values(self, start_values):
+        self.__start_values = start_values
+
+
+    def residual(self, x):
+        return self.__function(x)
+
+
+    def d(self, x, degree = 1):
+
+        if (len(self.__derivatives) > degree):
+            return self.__derivatives[degree]
+
+        if (degree == 0):
+            y = self.__model(x)
+            self.__add_to_derivitives(y)
+            return y
+
+        with tf.GradientTape() as g:
+            g.watch(x)
+            y = self.d(x, degree - 1)
+            dy = g.gradient(y, x)
+        self.__add_to_derivitives(dy)
+        return dy
+
+
+    def y(self, x):
+        return self.d(x, 0)
+
+
+    def loss(self, x):
+        self.__set_keep_derivatives(True)
+        res = [self.__common_loss(x), self.__start_loss(x)]
+        self.__set_keep_derivatives(False)
+        return res
+
+
+    def __common_loss(self, x):
+        return tf.math.square(self.residual(x))
+
+
+    def __start_loss(self, x):
+        n = x.shape[0]
+        values_number = self.__start_values.size
+
+        res = np.zeros(values_number)
+        for i in range(values_number):
+            res += tf.math.square(self.d(x, i)[0] - self.__start_values[i])
+        res *= n
+        res = tf.concat([res, tf.zeros(n - 1, dtype='float64')], 0)
+        return res
+
+
+    def __set_keep_derivatives(self, on):
+        self.__keep_derivatives = on
+        if (not on):
+            self.__derivatives = []
+
+
+    def __add_to_derivitives(self, dy):
+        if (self.__keep_derivatives):
+            self.__derivatives.insert(len(self.__derivatives), dy)
\ No newline at end of file
diff --git a/PDE.py b/PDE.py
index c73a586..9500760 100644
--- a/PDE.py
+++ b/PDE.py
@@ -1,3 +1,5 @@
+from PDE_wrapper import WrapperPDE
+
 from tqdm import tqdm
 import numpy as np
 import tensorflow as tf
@@ -15,24 +17,40 @@ def press_key_exit(_q):
     ready_to_exit=True
 
 
-def train_step(model, optimizer, features, init_cond, init_mask):
-    N=int(tf.math.reduce_sum(init_mask))
+def setup_init_cond():
+    init_cond = np.zeros((N, N))
+    init_cond[:, -1] = x ** 2
+    init_cond = tf.convert_to_tensor(np.ndarray.flatten(init_cond))
+    return init_cond
+
+
+def setup_init_mask():
+    init_mask = np.zeros((N, N))
+    init_mask[:, -1] = 1
+    init_mask = tf.convert_to_tensor(np.ndarray.flatten(init_mask))
+    return init_mask
+
+
+def setup_pde(model):
+    pde = WrapperPDE(model)
+    func = lambda features: tf.math.square(features) - [1.0, -2.0] * features * pde.d(features)
+    pde.set_function(func)
+    return pde
+
+def train_step(model, optimizer, pde, features, init_cond, init_mask):
+
+    pde.set_init_cond(init_cond)
+    pde.set_init_mask(init_mask)
+
     with tf.GradientTape(persistent=True) as tape:
         tape.watch(features)
-        predictions = model(features)
-        dy = tape.gradient(predictions, features)
-        coeff=tf.convert_to_tensor([1.0,-2.0],dtype=np.float64)
-        cy=tf.math.multiply(features,coeff)
-        lhs=tf.math.reduce_sum(tf.math.multiply(dy,cy), axis=1)
-        rhs=tf.math.reduce_sum(tf.math.square(features),axis=1)
-        loss = tf.math.square(lhs - rhs)
-        kkk = tf.math.square(tf.squeeze(predictions) - init_cond)
-        start_loss = tf.multiply(kkk, init_mask)
-        start_loss = tf.expand_dims(start_loss, axis=1)*N
-        total_loss = loss+start_loss
+
+        [common_loss, start_loss] = pde.loss(features)
+        total_loss = common_loss + start_loss
+
         gradients = tape.gradient(total_loss, model.trainable_variables)
         optimizer.apply_gradients(zip(gradients, model.trainable_variables))
-    return float(tf.math.reduce_mean(loss)),float(tf.math.reduce_mean(start_loss))
+    return float(tf.math.reduce_mean(common_loss)), float(tf.math.reduce_mean(start_loss))
 
 
 N = 100
@@ -40,12 +58,9 @@ def train_step(model, optimizer, features, init_cond, init_mask):
 func = lambda x,y: x*x/2 - y*y/4 + y*x*x/2 + 1/4
 ground_truth = func(x[:,None], x[None,:])
 gx,gy = np.meshgrid(x,x)
-init_cond=np.zeros((N,N))
-init_cond[:,-1]=x**2
-init_cond = tf.convert_to_tensor(np.ndarray.flatten(init_cond))
-init_mask=np.zeros((N,N))
-init_mask[:,-1]=1
-init_mask = tf.convert_to_tensor(np.ndarray.flatten(init_mask))
+init_cond = setup_init_cond()
+init_mask = setup_init_mask()
+
 grid_tf = np.vstack((np.ndarray.flatten(gy),np.ndarray.flatten(gx))).transpose()
 grid_tf = tf.convert_to_tensor(grid_tf)
 
@@ -57,6 +72,9 @@ def train_step(model, optimizer, features, init_cond, init_mask):
 optimizer = tf.keras.optimizers.Adam()
 initial_cond = 0
 
+pde = setup_pde(model)
+pde.set_optimization_mode(True)
+
 bar = tqdm()
 keyboard.on_press_key("q", press_key_exit)
 while True:
@@ -66,13 +84,12 @@ def train_step(model, optimizer, features, init_cond, init_mask):
     grid_tf_run=tf.gather(grid_tf, indicies,axis=0)
     init_cond_run=tf.gather(init_cond, indicies)
     init_mask_run=tf.gather(init_mask, indicies)
-    loss = train_step(model, optimizer, grid_tf_run, init_cond_run, init_mask_run)
+    loss = train_step(model, optimizer, pde, grid_tf_run, init_cond_run, init_mask_run)
     bar.set_description("Loss: {:.5f} {:.5f}".format(*loss))
     bar.update(1)
 
 
 
-
 with tf.GradientTape() as tape:
         tape.watch(grid_tf)
         val = model(grid_tf)
@@ -87,7 +104,7 @@ def train_step(model, optimizer, features, init_cond, init_mask):
 dx=np.reshape(grad[:,0],(N,N))
 dy=np.reshape(grad[:,1],(N,N))
 err=dx*xm-2*dy*ym-xm*xm-ym*ym
-print("Mean error of differential equation: (analytical diff){}".format(np.mean(err)))
+print("Mean error of differential equation: (analytical diff) {}".format(np.mean(err)))
 err=res[0]*xm-2*res[1]*ym-xm*xm-ym*ym
 print("Mean error of differential equation (numerical diff): {}".format(np.mean(err)))
 err=np.abs(val-ground_truth)
@@ -103,6 +120,3 @@ def train_step(model, optimizer, features, init_cond, init_mask):
 ax.set_title('Surface plot')
 plt.show()
 
-
-
-
diff --git a/PDE_wrapper.py b/PDE_wrapper.py
new file mode 100644
index 0000000..8c8eb40
--- /dev/null
+++ b/PDE_wrapper.py
@@ -0,0 +1,72 @@
+import tensorflow as tf
+import numpy as np
+
+class WrapperPDE(object):
+    """docstring"""
+
+    def __init__(self, model):
+        """Constructor"""
+        self.__model = model
+        self.__function = None
+        self.__init_cond = None
+        self.__init_mask = None
+        # This flag is imposed due to probable accuracy losses
+        # while working on a large number of iterations.
+        self.__optimization_mode = False
+
+
+    def set_function(self, function):
+        self.__function = function
+
+
+    def set_init_cond(self, init_cond):
+        self.__init_cond = init_cond
+
+
+    def set_init_mask(self, init_mask):
+        self.__init_mask = init_mask
+
+
+    def set_optimization_mode(self, optimization_mode):
+        self.__optimization_mode = optimization_mode
+
+
+    def residual(self, features):
+        return self.__function(features)
+
+
+    def d(self, features, degree = 1):
+        if (degree == 0):
+            u = self.u(features)
+            return u
+
+        with tf.GradientTape() as g:
+            g.watch(features)
+            u = self.d(features, degree - 1)
+            du = g.gradient(u, features)
+        return du
+
+
+    def u(self, features):
+        return self.__model(features)
+
+
+    def loss(self, features):
+        res = [self.__common_loss(features), self.__start_loss(features)]
+        return res
+
+
+    def __common_loss(self, features):
+        residual = self.residual(features)
+        if (not self.__optimization_mode):
+            residual = tf.math.reduce_sum(residual, axis=1)
+        return tf.math.square(residual)
+
+
+    def __start_loss(self, features):
+        N = int(tf.math.reduce_sum(self.__init_mask))
+        predictions = self.__model(features)
+        diff = tf.math.square(tf.squeeze(predictions) - self.__init_cond)
+        start_loss = tf.multiply(diff, self.__init_mask)
+        start_loss = tf.expand_dims(start_loss, axis=1)*N
+        return start_loss
\ No newline at end of file