Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .DS_Store
Binary file not shown.
1,623 changes: 1,623 additions & 0 deletions code/YOUR_PROCESSED_DATASET_COLLECTIONS_FOR_ERC_PATH/iemocap/window/test.json

Large diffs are not rendered by default.

5,163 changes: 5,163 additions & 0 deletions code/YOUR_PROCESSED_DATASET_COLLECTIONS_FOR_ERC_PATH/iemocap/window/train.json

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Binary file added code/__pycache__/main_new.cpython-311.pyc
Binary file not shown.
12 changes: 6 additions & 6 deletions code/data_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def process_dataset(dataset, window=110, speaker_task='True', demons='False', pr
content_task_dict = {}
speaker_task_dict = {}
sentence_dict = {}
data = pickle.load(open(f'YOUR_DATASET_COLLECTIONS_FOR_ERC_PATH/{dataset}/{dataset}.pkl','rb'))
data = pickle.load(open(f'../original_data/{dataset}/{dataset}.pkl','rb'))

# 不同的数据集有不同的speaker_label的处理方式
#Different datasets have different ways of handling speaker_label
Expand Down Expand Up @@ -142,7 +142,7 @@ def process_dataset(dataset, window=110, speaker_task='True', demons='False', pr
# dataset_list = ['train', 'test', 'valid']
if predictions == 'False':
if speaker_task == 'True_mixed':
data_path = f'YOUR_PROCESSED_DATASET_COLLECTIONS_FOR_ERC_PATH/{dataset}/speaker_window'
data_path = f'../original_data/{dataset}/speaker_window'
os.makedirs(data_path, exist_ok=True)
with open(f'{data_path}/train.json', 'w') as f_train:
for train_id in new_train_id:
Expand All @@ -166,7 +166,7 @@ def process_dataset(dataset, window=110, speaker_task='True', demons='False', pr
f_valid.write(json.dumps({'input':f'{speaker_task_dict[valid_id]}','target':f'{speaker_target_dict[valid_id]}'}, ensure_ascii=False)+ '\n')

elif speaker_task == 'True':
data_path_speaker = f'YOUR_PROCESSED_DATASET_COLLECTIONS_FOR_ERC_PATH{dataset}/speaker'
data_path_speaker = f'../original_data{dataset}/speaker'
os.makedirs(data_path_speaker, exist_ok=True)
with open(f'{data_path_speaker}/train.json', 'w') as f_train:
for train_id in new_train_id:
Expand All @@ -183,7 +183,7 @@ def process_dataset(dataset, window=110, speaker_task='True', demons='False', pr
if valid_id in speaker_task_dict:
f_valid.write(json.dumps({'input':f'{speaker_task_dict[valid_id]}','target':f'{speaker_target_dict[valid_id]}'}, ensure_ascii=False)+ '\n')

data_path_window = f'YOUR_PROCESSED_DATASET_COLLECTIONS_FOR_ERC_PATH/{dataset}/window'
data_path_window = f'../original_data/{dataset}/window'
os.makedirs(data_path_window, exist_ok=True)
with open(f'{data_path_window}/train.json', 'w') as f_train:
for train_id in new_train_id:
Expand All @@ -205,7 +205,7 @@ def process_dataset(dataset, window=110, speaker_task='True', demons='False', pr


elif speaker_task == 'None' and demons == 'False':
data_path = f'YOUR_PROCESSED_DATASET_COLLECTIONS_FOR_ERC_PATH/{dataset}/window'
data_path = f'../original_data/{dataset}/window'
os.makedirs(data_path, exist_ok=True)

with open(f'{data_path}/train.json', 'w') as f_train:
Expand All @@ -221,7 +221,7 @@ def process_dataset(dataset, window=110, speaker_task='True', demons='False', pr
f_valid.write(json.dumps({'input':f'{content_task_dict[valid_id]}','target':f'{content_target_dict[valid_id]}'}, ensure_ascii=False)+ '\n')

elif speaker_task == 'None' and demons == 'True':
data_path = f'YOUR_PROCESSED_DATASET_COLLECTIONS_FOR_ERC_PATH/{dataset}/demon'
data_path = f'../original_data/{dataset}/demon'
os.makedirs(data_path, exist_ok=True)

with open(f'{data_path}/train.json', 'w') as f_train:
Expand Down
2 changes: 1 addition & 1 deletion code/data_process_plain.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def process_dataset(dataset):
content_task_dict = {}
speaker_task_dict = {}
sentence_dict = {}
data = pickle.load(open(f'YOUR_DATASET_COLLECTIONS_FOR_ERC_PATH/{dataset}/{dataset}.pkl','rb'))
data = pickle.load(open(f'../original_data/{dataset}/{dataset}.pkl','rb'))

# 不同的数据集有不同的speaker_label的处理方式
#Different datasets have different ways of handling speaker_label
Expand Down
Binary file added code/data_utils/__pycache__/__init__.cpython-310.pyc
Binary file not shown.
Binary file not shown.
4 changes: 2 additions & 2 deletions code/data_utils/deepspeed_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"gradient_clipping": 1.0,
"steps_per_print": 100,
"fp16": {
"enabled": false,
"enabled": true,
"auto_cast": "auto",
"loss_scale": 0,
"initial_scale_power": 16,
Expand All @@ -18,7 +18,7 @@
"opt_level": "O2"
},
"bfloat16": {
"enabled": true
"enabled": false
},
"zero_optimization": {
"stage": 2,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"Total": 6738415616,
"Trainable": 6738415616
}

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"Total": 6738415616,
"Trainable": 6738415616
}

Large diffs are not rendered by default.

69 changes: 45 additions & 24 deletions code/main_new.py
Original file line number Diff line number Diff line change
Expand Up @@ -756,19 +756,30 @@ def _get_input_dict(batch):
else:
if "token_type_ids" in eval_batch:
token_type_ids = eval_batch.pop("token_type_ids")
outputs = model.generate(

# Build generation kwargs based on whether we're using beam search or sampling
gen_kwargs = {
**eval_batch,
num_beams=args.num_beams,
top_k=args.top_k,
top_p=args.top_p,
early_stopping=True,
# max_length=max_length_this_batch + args.max_length,
max_length=args.max_length,
length_penalty=2.0,
repetition_penalty=1.0,
num_return_sequences=1
# stopping_criteria=StoppingCriteriaList([stop_criteria]
)
"max_length": args.max_length,
"num_return_sequences": 1,
}

# Use beam search (deterministic) to avoid inf/nan issues with sampling
if args.num_beams > 1:
gen_kwargs.update({
"num_beams": args.num_beams,
"early_stopping": True,
"length_penalty": 2.0,
"repetition_penalty": 1.0,
"do_sample": False,
})
else:
# Greedy decoding (no sampling to avoid inf/nan)
gen_kwargs.update({
"do_sample": False,
})

outputs = model.generate(**gen_kwargs)
outputs[outputs[:, :] < 0] = tokenizer.pad_token_id
all_outputs.extend(outputs)
eval_inputs_iter = [tokenizer.decode(e_id, skip_special_tokens=True, clean_up_tokenization_spaces=True) for e_id in eval_inputs_iter]
Expand Down Expand Up @@ -872,19 +883,29 @@ def _get_input_dict(batch):
else:
if "token_type_ids" in eval_batch:
token_type_ids = eval_batch.pop("token_type_ids")
outputs = model.generate(
# Build generation kwargs based on whether we're using beam search or sampling
gen_kwargs = {
**eval_batch,
num_beams=args.num_beams,
top_k=args.top_k,
top_p=args.top_p,
early_stopping=True,
# max_length=max_length_this_batch + args.max_length,
max_length=args.max_length,
length_penalty=2.0,
repetition_penalty=1.0,
num_return_sequences=1
# stopping_criteria=StoppingCriteriaList([stop_criteria])
)
"max_length": args.max_length,
"num_return_sequences": 1,
}

# Use beam search (deterministic) to avoid inf/nan issues with sampling
if args.num_beams > 1:
gen_kwargs.update({
"num_beams": args.num_beams,
"early_stopping": True,
"length_penalty": 2.0,
"repetition_penalty": 1.0,
"do_sample": False,
})
else:
# Greedy decoding (no sampling to avoid inf/nan)
gen_kwargs.update({
"do_sample": False,
})

outputs = model.generate(**gen_kwargs)
outputs[outputs[:, :] < 0] = tokenizer.pad_token_id
all_outputs.extend(outputs)
eval_inputs_iter = [tokenizer.decode(e_id, skip_special_tokens=True, clean_up_tokenization_spaces=True) for e_id in eval_inputs_iter]
Expand Down
14 changes: 14 additions & 0 deletions code/run_instructerc.sbatch
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/bash
#SBATCH --job-name=instructerc_llm
#SBATCH --output=../logs/instructerc_%j.out
#SBATCH --error=../logs/instructerc_%j.err
#SBATCH --gres=gpu:1
#SBATCH --time=12:00:00
#SBATCH --mem=32G
echo "Job started at $(date)"
echo "Running on host: $(hostname)"
echo "Job ID: $SLURM_JOB_ID"
cd /local/scratch/yhu383/InstructERC/code
source ../venv/bin/activate
bash train_and_inference_Uni.sh
echo "Job finished at $(date)"
29 changes: 14 additions & 15 deletions code/train_and_inference_Uni.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
source YOUR CONDA ENVS
source YOUR DOCKER



# The Shellparameter that controls the mainprocess
Expand All @@ -16,15 +15,15 @@ MODEL_NAME='LLaMA2'
# select the experiment's model
# Experiments_setting='test'
# Experiments_setting='zero_shot'
# Experiments_setting='few_shot'
Experiments_setting='lora'
Experiments_setting='few_shot'
# Experiments_setting='lora'
# Experiments_setting='all_parameters'

# select the dataset
# dataset='test'
# dataset='iemocap'
dataset='iemocap'
# dataset='meld'
dataset='EmoryNLP'
# dataset='EmoryNLP'

# select the historical window for dataset
# LLaMA 's context = 1024 is enough for almost dataset, except for iemocap.
Expand Down Expand Up @@ -61,13 +60,13 @@ echo "domain_base: ${domain_base}"
emotion_prediction='False'
echo "emotion_prediction: ${emotion_prediction}"

# data_percent=1.0 # 1
data_percent=1.0 # 1
# data_percent=0.5 # 1/2
# data_percent=0.25 # 1/4
# data_percent=0.125 # 1/8
# data_percent=0.0625 # 1/16
# data_percent=0.03125 # 1/32
data_percent=0.015625 # 1/64
# data_percent=0.015625 # 1/64
echo "data_percent: ${data_percent}"


Expand Down Expand Up @@ -148,7 +147,7 @@ then
MODEL_PATH='LLaMA MODELPATH'
elif [ ${MODEL_NAME} = 'LLaMA2' ]
then
MODEL_PATH='LLaMA2 MODELPATH'
MODEL_PATH='../../models/llama2-7b'
elif [ ${MODEL_NAME} = 'Bloom-560m' ]
then
MODEL_PATH='Bloom-560m MODELPATH'
Expand Down Expand Up @@ -207,7 +206,7 @@ then
--output_dir ./experiments/${MODEL_NAME}/${Experiments_setting}/${dataset}/${speaker_task} \
--max_length ${MAX_LENGTH} \
--batch_size ${BS} \
--deepspeed_config ./code/data_utils/deepspeed_config.json \
--deepspeed_config ./data_utils/deepspeed_config.json \
--gradient_accumulation_steps ${accumulations} \
--eval_batch_size 8 \
--num_train_epochs 6 \
Expand Down Expand Up @@ -244,7 +243,7 @@ then
# --output_dir ${Speaker_Model_output_dir} \
# --max_length ${MAX_LENGTH} \
# --batch_size ${BS} \
# --deepspeed_config ./code/data_utils/deepspeed_config.json \
# --deepspeed_config ./data_utils/deepspeed_config.json \
# --gradient_accumulation_steps ${accumulations} \
# --eval_batch_size 8 \
# --num_train_epochs 3 \
Expand All @@ -270,7 +269,7 @@ then
--output_dir ${Content_Model_output_dir} \
--max_length ${MAX_LENGTH} \
--batch_size ${BS} \
--deepspeed_config ./code/data_utils/deepspeed_config.json \
--deepspeed_config ./data_utils/deepspeed_config.json \
--gradient_accumulation_steps ${accumulations} \
--eval_batch_size 16 \
--num_train_epochs 15 \
Expand All @@ -293,7 +292,7 @@ then
--output_dir ./experiments/${MODEL_NAME}/${Experiments_setting}/${dataset}/demon \
--max_length ${MAX_LENGTH} \
--batch_size ${BS} \
--deepspeed_config ./code/data_utils/deepspeed_config.json \
--deepspeed_config ./data_utils/deepspeed_config.json \
--gradient_accumulation_steps ${accumulations} \
--eval_batch_size 8 \
--num_train_epochs 8 \
Expand All @@ -313,7 +312,7 @@ then
--output_dir ./experiments/${MODEL_NAME}/${Experiments_setting}/${dataset}/window_${historical_window}/LR_${LR}_BS_${BS}_per_${data_percent} \
--max_length ${MAX_LENGTH} \
--batch_size ${BS} \
--deepspeed_config ./code/data_utils/deepspeed_config.json \
--deepspeed_config ./data_utils/deepspeed_config.json \
--gradient_accumulation_steps ${accumulations} \
--eval_batch_size 8 \
--num_train_epochs 6 \
Expand Down Expand Up @@ -343,7 +342,7 @@ then
--output_dir ./experiments/${MODEL_NAME}/${Experiments_setting}/${dataset}/Predict/speaker/window_${historical_window} \
--max_length ${MAX_LENGTH} \
--batch_size ${BS} \
--deepspeed_config ./code/data_utils/deepspeed_config.json \
--deepspeed_config ./data_utils/deepspeed_config.json \
--gradient_accumulation_steps ${accumulations} \
--eval_batch_size 8 \
--num_train_epochs 15 \
Expand Down
33 changes: 17 additions & 16 deletions envs/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ gradio_client==0.2.7
grpcio==1.54.2
h11==0.14.0
hjson==3.1.0
hope==3.6.4
hope==0.7.3
httpcore==0.17.2
httpx==0.24.1
huggingface-hub==0.14.1
Expand Down Expand Up @@ -123,7 +123,7 @@ mdtex2html==1.2.0
mdurl==0.1.2
mistune==2.0.5
mpmath==1.3.0
mt-tritonclient==1.0.4
# mt-tritonclient==1.0.4 # Package not found - consider using 'tritonclient' instead
multidict==6.0.4
multiprocess==0.70.14
multivolumefile==0.2.3
Expand All @@ -142,19 +142,20 @@ nodeenv==1.8.0
notebook==6.5.4
notebook_shim==0.2.3
numpy==1.24.3
nvidia-cublas-cu11==11.10.3.66
nvidia-cuda-cupti-cu11==11.7.101
nvidia-cuda-nvrtc-cu11==11.7.99
nvidia-cuda-runtime-cu11==11.7.99
nvidia-cudnn-cu11==8.5.0.96
nvidia-cufft-cu11==10.9.0.58
nvidia-curand-cu11==10.2.10.91
nvidia-cusolver-cu11==11.4.0.1
nvidia-cusparse-cu11==11.7.4.91
nvidia-nccl-cu11==2.14.3
nvidia-nvtx-cu11==11.7.91
# NVIDIA CUDA packages - these are installed automatically by PyTorch
# nvidia-cublas-cu11==11.10.3.66
# nvidia-cuda-cupti-cu11==11.7.101
# nvidia-cuda-nvrtc-cu11==11.7.99
# nvidia-cuda-runtime-cu11==11.7.99
# nvidia-cudnn-cu11==8.5.0.96
# nvidia-cufft-cu11==10.9.0.58
# nvidia-curand-cu11==10.2.10.91
# nvidia-cusolver-cu11==11.4.0.1
# nvidia-cusparse-cu11==11.7.4.91
# nvidia-nccl-cu11==2.14.3
# nvidia-nvtx-cu11==11.7.91
oauthlib==3.2.2
octo-rpc==0.4.6
# octo-rpc==0.4.6 # Package not found in PyPI
onnxruntime==1.14.1
orjson==3.9.1
packaging==23.1
Expand All @@ -178,7 +179,7 @@ preshed==3.0.8
prettytable==3.7.0
prometheus-client==0.16.0
prompt-toolkit==3.0.38
promptsource==0.2.3
# promptsource==0.2.3 # Package not found in PyPI
protobuf==3.20.3
psutil==5.9.5
ptyprocess==0.7.0
Expand All @@ -205,7 +206,7 @@ pyppmd==1.0.0
pyrsistent==0.19.3
pyserini==0.21.0
pytest==7.3.1
python-cat==0.0.10
# python-cat==0.0.10 # Package not found in PyPI
python-dateutil==2.8.2
python-json-logger==2.0.7
python-multipart==0.0.6
Expand Down
Loading