{"id": "math_25280", "message": "[{"role": "user", "content": [{"type": "image", "image": "/mnt/lustre/share_data/luquanfeng/qianyunhang/MM-EUREKA/MMPR/images/geoqa_plus/images/11874.png"}, {"type": "text", "text": "You should first thinks about the reasoning process in the mind and then provides the user with the answer. Your answer must be in latex format and wrapped in $...$.The reasoning process and answer are enclosed within and tags, respectively, i.e., Since $1+1=2$, so the answer is $2$. $2$ , which means your output should start with and end with .\nQuestion:\nAs shown in the figure, AD is the median of △ABC, and E is the midpoint of AD. The area of △ABE is 2, then the area of △ABC is ( )."}]}]", "answer": "$8$"}
def preprocess_data(data, input_template=None, input_key="input", label_key="answer", apply_chat_template=None) -> str:
if apply_chat_template:
chat = json.dumps(data[input_key])
if isinstance(chat, str):
chat = [{"role": "user", "content": chat}]
prompt = apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
else:
prompt = json.dumps(data[input_key])
if input_template:
prompt = input_template.format(prompt)
# for Reinforced Fine-tuning
label = "" if label_key is None else data[label_key]
return prompt, label
{"id": "math_25280", "message": "[{"role": "user", "content": [{"type": "image", "image": "/mnt/lustre/share_data/luquanfeng/qianyunhang/MM-EUREKA/MMPR/images/geoqa_plus/images/11874.png"}, {"type": "text", "text": "You should first thinks about the reasoning process in the mind and then provides the user with the answer. Your answer must be in latex format and wrapped in $...$.The reasoning process and answer are enclosed within and tags, respectively, i.e., Since$1+1=2$ , so the answer is $2$ . $2$ , which means your output should start with and end with .\nQuestion:\nAs shown in the figure, AD is the median of △ABC, and E is the midpoint of AD. The area of △ABE is 2, then the area of △ABC is ( )."}]}]", "answer": "$8$"}
def preprocess_data(data, input_template=None, input_key="input", label_key="answer", apply_chat_template=None) -> str:
if apply_chat_template:
chat = json.dumps(data[input_key])
if isinstance(chat, str):
chat = [{"role": "user", "content": chat}]
prompt = apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
else:
prompt = json.dumps(data[input_key])
if input_template:
prompt = input_template.format(prompt)