Memory Leak problem in arcsim.step()

Hi, when I am running modified exp_inverse.py example to fold a cloth, it seems that there is memory leakage, for each epoch when I check the memory using htop, the memory of exp_inverse.py is always increasing. And the process will be automatically killed if the epoch is long. Here is our code
```python
import torch
import arcsim
import gc
import time
import json
import sys
import gc
import os
import numpy as np
import matplotlib.pyplot as plt

from datetime import datetime
now = datetime.now()
timestamp = datetime.now().strftime('%Y-%m-%d_%H:%M:%S')
steps = 30
epochs= 10
node_number = 0
handles = [25, 60, 30, 54]
losses = []
param_g = torch.zeros([steps, 12],dtype=torch.float64, requires_grad=True)
default_dir = 'results/'+time.ctime()
os.mkdir(default_dir)
out_path = default_dir + '/default_out'
os.mkdir(out_path)
with open('conf/rigidcloth/drag/drag.json','r') as f:
    config = json.load(f)


def save_config(config, file):
    with open(file,'w') as f:
        json.dump(config, f)

save_config(config, out_path+'/conf.json')


torch.set_num_threads(16)
scalev=1

def reset_sim(sim, epoch):

    if epoch < 20:

        arcsim.init_physics(out_path+'/conf.json', out_path+'/out%d'%epoch,False)
    else:
        arcsim.init_physics(out_path+'/conf.json',out_path+'/out',False)

def get_target_mesh():
    sim = arcsim.get_sim()
    arcsim.init_physics('conf/rigidcloth/fold_targets/target1.json',out_path+'/target',False)
    global node_number
    node_number = len(sim.cloths[0].mesh.nodes)
    ref = [sim.cloths[0].mesh.nodes[i].x.numpy() for i in range(node_number)]
    ref = torch.from_numpy(np.vstack(ref))
    return ref

def get_loss(sim,ref):
    reg  = torch.norm(param_g, p=2)*0.001
    loss = 0
    for i in range(ref.shape[0]):
        loss += torch.norm(ref[i]-sim.cloths[0].mesh.nodes[i].x)**2
    loss /= node_number
    loss += reg
    return loss

def run_sim(steps,sim,ref):
    # sim.obstacles[2].curr_state_mesh.dummy_node.x = param_g[1]
    print("step")
    for step in range(steps):
        print(step)
        for i in range(len(handles)):
            inc_v = param_g[step,3*i:3*i+3]
            sim.cloths[0].mesh.nodes[handles[i]].v += inc_v
            del inc_v
        arcsim.sim_step()
    loss = get_loss(sim,ref)
    return loss

@profile
def do_train(cur_step,optimizer,scheduler,sim):
    epoch = 0
    ref = get_target_mesh()
    print(ref)
    while True:
        reset_sim(sim, epoch)
        st = time.time()
        loss = run_sim(steps, sim,ref)
        en0 = time.time()
        optimizer.zero_grad()


        loss.backward()
        en1 = time.time()
        print("=======================================")
        f.write('epoch {}:  loss={} \n'.format(epoch,  loss.data))
        print('epoch {}:  loss={} \n'.format(epoch, loss.data))

        print('forward time={}'.format(en0-st))
        print('backward time={}'.format(en1-en0))


        optimizer.step()
        #scheduler.step(epoch)
        losses.append(loss)
        if epoch>=epochs:
            break
        epoch = epoch + 1
        # break

def visualize_loss(losses,dir_name):
    plt.plot(losses)
    plt.title('losses')
    plt.xlabel('epochs')
    plt.ylabel('losses')
    plt.savefig(dir_name+'/'+'loss.jpg')

with open(out_path+('/log%s.txt'%timestamp),'w',buffering=1) as f:
    tot_step = 1
    sim=arcsim.get_sim()
    # reset_sim(sim)
    lr = 10
    momentum = 0.4
    f.write('lr={} momentum={}\n'.format(lr,momentum))
    optimizer = torch.optim.SGD([{'params':param_g,'lr':lr}],momentum=momentum)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer,10,2,eta_min=0.0001)
    for cur_step in range(tot_step):
        do_train(cur_step,optimizer,scheduler,sim)
    visualize_loss(losses,default_dir)

print("done")
```
Also, I used memory profiler to inspect the code and find that arcsim.step() takes most memory without releasing them.


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Memory Leak problem in arcsim.step() #7

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Memory Leak problem in arcsim.step() #7

Description

Metadata

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Issue actions