Skip to content

Commit 7919ac9

Browse files
committed
Add new papers
1 parent a041e70 commit 7919ac9

12 files changed

Lines changed: 400 additions & 12 deletions

README.md

Lines changed: 146 additions & 12 deletions
Large diffs are not rendered by default.

data/papers_3d_object_design.yaml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,14 @@
1414
github: ""
1515
website: ""
1616

17+
- title: "Learning Object Placement Programs for Indoor Scene Synthesis with Iterative Self Training"
18+
authors: "Adrian Chang, Kai Wang, Yuanbo Li, Manolis Savva, Angel X. Chang, Daniel Ritchie"
19+
venue: "arXiv 2025"
20+
links:
21+
paper: "https://www.arxiv.org/abs/2503.04496"
22+
github: ""
23+
website: ""
24+
1725
- title: "HSM: Hierarchical Scene Motifs for Multi-Scale Indoor Scene Generation"
1826
authors: "Hou In Derek Pun, Hou In Ivan Tam, Austin T. Wang, Xiaoliang Huo, Angel X. Chang, Manolis Savva"
1927
venue: "arXiv 2025"
@@ -62,6 +70,14 @@
6270
github: "https://github.com/richard-guyunqi/BlenderGym-Open"
6371
website: "https://blendergym.github.io/"
6472

73+
- title: "CAD-Editor: A Locate-then-Infill Framework with Automated Training Data Synthesis for Text-Based CAD Editing"
74+
authors: "Yu Yuan, Shizhao Sun, Qi Liu, Jiang Bian"
75+
venue: "ICML 2025"
76+
links:
77+
paper: "https://arxiv.org/abs/2502.03997"
78+
github: "https://github.com/microsoft/CAD-Editor"
79+
website: "https://cad-editor.github.io/"
80+
6581
- title: "IR3D-Bench: Evaluating Vision-Language Model Scene Understanding as Agentic Inverse Rendering"
6682
authors: "Parker Liu, Chenxin Li, Zhengxin Li, Yipeng Wu, Wuyang Li, Zhiqin Yang, Zhenyuan Zhang, Yunlong Lin, Sirui Han, Brandon Y. Feng"
6783
venue: "NeurIPS 2025 Datasets and Benchmarks Track"
@@ -94,6 +110,14 @@
94110
github: "https://github.com/THUDM/SceneGenAgent"
95111
website: ""
96112

113+
- title: "CADTalk: An Algorithm and Benchmark for Semantic Commenting of CAD Programs"
114+
authors: "Haocheng Yuan, Jing Xu, Hao Pan, Adrien Bousseau, Niloy J. Mitra, Changjian Li"
115+
venue: "CVPR 2024"
116+
links:
117+
paper: "https://openaccess.thecvf.com/content/CVPR2024/html/Yuan_CADTalk_An_Algorithm_and_Benchmark_for_Semantic_Commenting_of_CAD_CVPR_2024_paper.html"
118+
github: "https://github.com/YYYYYHC/CADTalk"
119+
website: "https://enigma-li.github.io/CADTalk/"
120+
97121
- title: "SceneCraft: An LLM Agent for Synthesizing 3D Scene as Blender Code"
98122
authors: "Ziniu Hu, Ahmet Iscen, Aashi Jain, Thomas Kipf, Yisong Yue, David A. Ross, Cordelia Schmid, Alireza Fathi"
99123
venue: "ICML 2024"
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
- title: "Plot2Code: A Comprehensive Benchmark for Evaluating Multi-modal Large Language Models in Code Generation from Scientific Plots"
2+
authors: "Chengyue Wu, Yixiao Ge, Qiushan Guo, Jiahao Wang, Zhixuan Liang, Zeyu Lu, Ying Shan, Ping Luo"
3+
venue: "arXiv 2025"
4+
links:
5+
paper: "https://arxiv.org/abs/2405.07990"
6+
github: "https://github.com/TencentARC/Plot2Code"
7+
website: "https://huggingface.co/datasets/TencentARC/Plot2Code"
8+
9+
- title: "Chart-CoCa: Self-Improving Chart Understanding of Vision LMs via Code-Driven Synthesis and Candidate-Conditioned Answering"
10+
authors: "Gongyao Jiang, Qiong Luo"
11+
venue: "CIKM 2025"
12+
links:
13+
paper: "https://arxiv.org/abs/2508.11975"
14+
github: ""
15+
website: ""

data/papers_automated_data_science.yaml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,19 @@
1+
- title: "DeepAnalyze: Agentic Large Language Models for Autonomous Data Science"
2+
authors: "Shaolei Zhang, Ju Fan, Meihao Fan, Guoliang Li, Xiaoyong Du"
3+
venue: "arXiv 2025"
4+
links:
5+
paper: "https://arxiv.org/abs/2510.16872"
6+
github: "https://github.com/ruc-datalab/DeepAnalyze"
7+
website: "https://ruc-deepanalyze.github.io/"
8+
9+
- title: "WebDS: An End-to-End Benchmark for Web-based Data Science"
10+
authors: "Ethan Hsu, Hong Meng Yam, Ines Bouissou, Aaron Murali John, Raj Thota, Josh Koe, Vivek Sarath Putta, G K Dharesan, Alexander Spangher, Shikhar Murty, Tenghao Huang, Christopher D. Manning"
11+
venue: "arXiv 2025"
12+
links:
13+
paper: "https://arxiv.org/abs/2508.01222"
14+
github: ""
15+
website: ""
16+
117
- title: "AutoMind: Adaptive Knowledgeable Agent for Automated Data Science"
218
authors: "Yixin Ou, Yujie Luo, Jingsheng Zheng, Lanning Wei, Zhuoyun Yu, Shuofei Qiao, Jintian Zhang, Da Zheng, Yuren Mao, Yunjun Gao, Huajun Chen, Ningyu Zhang"
319
venue: "arXiv 2025"

data/papers_benchmarks.yaml

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,14 @@
2222
github: "https://github.com/amazon-science/SWE-PolyBench"
2323
website: "https://amazon-science.github.io/SWE-PolyBench/"
2424

25+
- title: "MORSE-500: A Programmatically Controllable Video Benchmark to Stress-Test Multimodal Reasoning"
26+
authors: "Zikui Cai, Andrew Wang, Anirudh Satheesh, Ankit Nakhawa, Hyunwoo Jae, Keenan Powell, Minghui Liu, Neel Jay, Sungbin Oh, Xiyao Wang, Yongyuan Liang, Tom Goldstein, Furong Huang"
27+
venue: "arXiv 2025"
28+
links:
29+
paper: "https://arxiv.org/abs/2506.05523"
30+
github: "https://github.com/morse-benchmark/morse-500"
31+
website: "https://morse-500.github.io/"
32+
2533
- title: "SWE-SQL: Illuminating LLM Pathways to Solve User SQL Issues in Real-World Applications"
2634
authors: "Jinyang Li, Xiaolong Li, Ge Qu, Per Jacobsson, Bowen Qin, Binyuan Hui, Shuzheng Si, Nan Huo, Xiaohan Xu, Yue Zhang, Ziwei Tang, Yuanshuai Li, Florensia Widjaja, Xintong Zhu, Feige Zhou, Yongfeng Huang, Yannis Papakonstantinou, Fatma Ozcan, Chenhao Ma, Reynold Cheng"
2735
venue: "arXiv 2025"
@@ -46,6 +54,30 @@
4654
github: "https://github.com/swe-perf/swe-perf"
4755
website: "https://swe-perf.github.io"
4856

57+
- title: "MetaGen: A DSL, Database, and Benchmark for VLM-Assisted Metamaterial Generation"
58+
authors: "Liane Makatura, Benjamin Jones, Siyuan Bian, Wojciech Matusik"
59+
venue: "arXiv 2025"
60+
links:
61+
paper: "https://arxiv.org/abs/2508.17568"
62+
github: ""
63+
website: ""
64+
65+
- title: "WebDS: An End-to-End Benchmark for Web-based Data Science"
66+
authors: "Ethan Hsu, Hong Meng Yam, Ines Bouissou, Aaron Murali John, Raj Thota, Josh Koe, Vivek Sarath Putta, G K Dharesan, Alexander Spangher, Shikhar Murty, Tenghao Huang, Christopher D. Manning"
67+
venue: "arXiv 2025"
68+
links:
69+
paper: "https://arxiv.org/abs/2508.01222"
70+
github: ""
71+
website: ""
72+
73+
- title: "Plot2Code: A Comprehensive Benchmark for Evaluating Multi-modal Large Language Models in Code Generation from Scientific Plots"
74+
authors: "Chengyue Wu, Yixiao Ge, Qiushan Guo, Jiahao Wang, Zhixuan Liang, Zeyu Lu, Ying Shan, Ping Luo"
75+
venue: "arXiv 2025"
76+
links:
77+
paper: "https://arxiv.org/abs/2405.07990"
78+
github: "https://github.com/TencentARC/Plot2Code"
79+
website: "https://huggingface.co/datasets/TencentARC/Plot2Code"
80+
4981
- title: "SWE-rebench: An Automated Pipeline for Task Collection and Decontaminated Evaluation of Software Engineering Agents"
5082
authors: "Ibragim Badertdinov, Alexander Golubev, Maksim Nekrashevich, Anton Shevtsov, Simon Karasik, Andrei Andriushchenko, Maria Trofimova, Daria Litvintseva, Boris Yangel"
5183
venue: "arXiv 2025"
@@ -70,6 +102,14 @@
70102
github: "https://centre-for-software-excellence.github.io/SWE-Effi/"
71103
website: "https://github.com/Centre-for-Software-Excellence/SWE-Effi"
72104

105+
- title: "PReview: A Benchmark Dataset for Pull Request Outcomes and Quality Analysis"
106+
authors: "Anonymous Authors"
107+
venue: "2025"
108+
links:
109+
paper: "https://openreview.net/forum?id=cdwp8BXTVV"
110+
github: ""
111+
website: ""
112+
73113
- title: "SEC-bench: Automated Benchmarking of LLM Agents on Real-World Software Security Tasks"
74114
authors: "Hwiwon Lee, Ziqi Zhang, Hanxiao Lu, Lingming Zhang"
75115
venue: "arXiv 2025"
@@ -118,6 +158,14 @@
118158
github: "https://github.com/LiuHengyu321/IR3D-Bench"
119159
website: "https://ir3d-bench.github.io/"
120160

161+
- title: "BlenderGym: Benchmarking Foundational Model Systems for Graphics Editing"
162+
authors: "Yunqi Gu, Ian Huang, Jihyeon Je, Guandao Yang, Leonidas Guiba"
163+
venue: "CVPR 2025"
164+
links:
165+
paper: "https://openaccess.thecvf.com/content/CVPR2025/html/Gu_BlenderGym_Benchmarking_Foundational_Model_Systems_for_Graphics_Editing_CVPR_2025_paper.html"
166+
github: "https://github.com/richard-guyunqi/BlenderGym-Open"
167+
website: "https://blendergym.github.io/"
168+
121169
- title: "SyncMind: Measuring Agent Out-of-Sync Recovery in Collaborative Software Engineering"
122170
authors: "Xuehang Guo, Xingyao Wang, Yangyi Chen, Sha Li, Chi Han, Manling Li, Heng Ji"
123171
venue: "ICML 2025"
@@ -150,6 +198,14 @@
150198
github: "https://github.com/uiuc-kang-lab/cve-bench"
151199
website: ""
152200

201+
- title: "WebMMU: A Benchmark for Multimodal Multilingual Website Understanding and Code Generation"
202+
authors: "Rabiul Awal, Mahsa Massoud, Aarash Feizi, Zichao Li, Suyuchen Wang, Christopher Pal, Aishwarya Agrawal, David Vazquez, Siva Reddy, Juan A. Rodriguez, Perouz Taslakian, Spandana Gella, Sai Rajeswar"
203+
venue: "EMNLP 2025"
204+
links:
205+
paper: "https://arxiv.org/abs/2508.16763"
206+
github: ""
207+
website: "https://webmmu-paper.github.io/"
208+
153209
- title: "CVE-Bench: Benchmarking LLM-based Software Engineering Agent’s Ability to Repair Real-World CVE Vulnerabilities"
154210
authors: "Peiran Wang, Xiaogeng Liu, Chaowei Xiao"
155211
venue: "NAACL 2025"
@@ -166,6 +222,22 @@
166222
github: "https://github.com/FSoft-AI4Code/RepoExec"
167223
website: "https://fsoft-ai4code.github.io/repoexec/"
168224

225+
- title: "FEA-Bench: A Benchmark for Evaluating Repository-Level Code Generation for Feature Implementation"
226+
authors: "Wei Li, Xin Zhang, Zhongxin Guo, Shaoguang Mao, Wen Luo, Guangyue Peng, Yangyu Huang, Houfeng Wang, Scarlett Li"
227+
venue: "ACL 2025"
228+
links:
229+
paper: "https://arxiv.org/abs/2503.06680"
230+
github: "https://github.com/microsoft/FEA-Bench"
231+
website: "https://gmago-leway.github.io/fea-bench.github.io/"
232+
233+
- title: "ProjectEval: A Benchmark for Programming Agents Automated Evaluation on Project-Level Code Generation"
234+
authors: "Kaiyuan Liu, Youcheng Pan, Yang Xiang, Daojing He, Jing Li, Yexing Du, Tianrun Gao"
235+
venue: "ACL 2025 Findings"
236+
links:
237+
paper: "https://aclanthology.org/2025.findings-acl.1036/"
238+
github: "https://github.com/RyanLoil/ProjectEval/"
239+
website: ""
240+
169241
- title: "EnvBench: A Benchmark for Automated Environment Setup"
170242
authors: "Aleksandra Eliseeva, Alexander Kovrigin, Ilia Kholkin, Egor Bogomolov, Yaroslav Zharov"
171243
venue: "ICLR 2025 Workshop"
@@ -189,3 +261,19 @@
189261
paper: "https://arxiv.org/abs/2310.06770"
190262
github: "https://github.com/SWE-bench/SWE-bench"
191263
website: "https://www.swebench.com/"
264+
265+
- title: "CADTalk: An Algorithm and Benchmark for Semantic Commenting of CAD Programs"
266+
authors: "Haocheng Yuan, Jing Xu, Hao Pan, Adrien Bousseau, Niloy J. Mitra, Changjian Li"
267+
venue: "CVPR 2024"
268+
links:
269+
paper: "https://openaccess.thecvf.com/content/CVPR2024/html/Yuan_CADTalk_An_Algorithm_and_Benchmark_for_Semantic_Commenting_of_CAD_CVPR_2024_paper.html"
270+
github: "https://github.com/YYYYYHC/CADTalk"
271+
website: "https://enigma-li.github.io/CADTalk/"
272+
273+
- title: "DA-Code: Agent Data Science Code Generation Benchmark for Large Language Models"
274+
authors: "Yiming Huang, Jianwen Luo, Yan Yu, Yitong Zhang, Fangyu Lei, Yifan Wei, Shizhu He, Lifu Huang, Xiao Liu, Jun Zhao, Kang Liu"
275+
venue: "EMNLP 2024"
276+
links:
277+
paper: "https://aclanthology.org/2024.emnlp-main.748/"
278+
github: "https://github.com/yiyihum/da-code"
279+
website: "https://da-code-bench.github.io/"

data/papers_code_executing_embodied.yaml

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,19 @@
1+
- title: "MORSE-500: A Programmatically Controllable Video Benchmark to Stress-Test Multimodal Reasoning"
2+
authors: "Zikui Cai, Andrew Wang, Anirudh Satheesh, Ankit Nakhawa, Hyunwoo Jae, Keenan Powell, Minghui Liu, Neel Jay, Sungbin Oh, Xiyao Wang, Yongyuan Liang, Tom Goldstein, Furong Huang"
3+
venue: "arXiv 2025"
4+
links:
5+
paper: "https://arxiv.org/abs/2506.05523"
6+
github: "https://github.com/morse-benchmark/morse-500"
7+
website: "https://morse-500.github.io/"
8+
9+
- title: "Chain-of-Modality: Learning Manipulation Programs from Multimodal Human Videos with Vision-Language-Models"
10+
authors: "Chen Wang, Fei Xia, Wenhao Yu, Tingnan Zhang, Ruohan Zhang, C. Karen Liu, Li Fei-Fei, Jie Tan, Jacky Liang"
11+
venue: "ICRA 2025"
12+
links:
13+
paper: "https://arxiv.org/abs/2504.13351"
14+
github: ""
15+
website: "https://chain-of-modality.github.io/"
16+
117
- title: "Visual Agentic AI for Spatial Reasoning with a Dynamic API"
218
authors: "Damiano Marsili, Rohun Agrawal, Yisong Yue, Georgia Gkioxari"
319
venue: "CVPR 2025"
@@ -38,6 +54,14 @@
3854
github: "https://github.com/haotang1995/WorldCoder"
3955
website: "https://haotang1995.github.io/projects/worldcoder"
4056

57+
- title: "RoboCodeX: Multimodal Code Generation for Robotic Behavior Synthesis"
58+
authors: "Yao Mu, Junting Chen, Qinglong Zhang, Shoufa Chen, Qiaojun Yu, Chongjian Ge, Runjian Chen, Zhixuan Liang, Mengkang Hu, Chaofan Tao, Peize Sun, Haibao Yu, Chao Yang, Wenqi Shao, Wenhai Wang, Jifeng Dai, Yu Qiao, Mingyu Ding, Ping Luo"
59+
venue: "ICML 2024"
60+
links:
61+
paper: "https://dl.acm.org/doi/10.5555/3692070.3693552"
62+
github: "https://github.com/RoboCodeX-source/RoboCodeX_code"
63+
website: "https://sites.google.com/view/robocodexplus"
64+
4165
- title: "RoboScript: Code Generation for Free-Form Manipulation Tasks across Real and Simulation"
4266
authors: "Junting Chen, Yao Mu, Qiaojun Yu, Tianming Wei, Silang Wu, Zhecheng Yuan, Zhixuan Liang, Chao Yang, Kaipeng Zhang, Wenqi Shao, Yu Qiao, Huazhe Xu, Mingyu Ding, Ping Luo"
4367
venue: "arXiv 2024"
@@ -69,3 +93,11 @@
6993
paper: "https://arxiv.org/abs/2209.07753"
7094
github: "https://github.com/google-research/google-research/tree/master/code_as_policies"
7195
website: "https://code-as-policies.github.io/"
96+
97+
- title: "Instruct2Act: Mapping Multi-modality Instructions to Robotic Actions with Large Language Model"
98+
authors: "Siyuan Huang, Zhengkai Jiang, Hao Dong, Yu Qiao, Peng Gao, Hongsheng Li"
99+
venue: "arXiv 2023"
100+
links:
101+
paper: "https://arxiv.org/abs/2305.11176"
102+
github: "https://github.com/OpenGVLab/Instruct2Act"
103+
website: ""

data/papers_code_executing_web.yaml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,19 @@
1+
- title: "SkillWeaver: Web Agents can Self-Improve by Discovering and Honing Skills"
2+
authors: "Boyuan Zheng, Michael Y. Fatemi, Xiaolong Jin, Zora Zhiruo Wang, Apurva Gandhi, Yueqi Song, Yu Gu, Jayanth Srinivasa, Gaowen Liu, Graham Neubig, Yu Su"
3+
venue: "arXiv 2025"
4+
links:
5+
paper: "https://arxiv.org/abs/2504.07079"
6+
github: "https://github.com/OSU-NLP-Group/SkillWeaver"
7+
website: "https://osu-nlp-group.github.io/SkillWeaver/"
8+
9+
- title: "WebDS: An End-to-End Benchmark for Web-based Data Science"
10+
authors: "Ethan Hsu, Hong Meng Yam, Ines Bouissou, Aaron Murali John, Raj Thota, Josh Koe, Vivek Sarath Putta, G K Dharesan, Alexander Spangher, Shikhar Murty, Tenghao Huang, Christopher D. Manning"
11+
venue: "arXiv 2025"
12+
links:
13+
paper: "https://arxiv.org/abs/2508.01222"
14+
github: ""
15+
website: ""
16+
117
- title: "Tree-of-Code: A Self-Growing Tree Framework for End-to-End Code Generation and Execution in Complex Tasks"
218
authors: "Ziyi Ni, Yifan Li, Ning Yang, Dou Shen, Pin Lyu, Daxiang Dong"
319
venue: "ACL 2025 Findings"

data/papers_code_generation.yaml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
- title: "Improving Cursor Tab with online RL"
2+
authors: "Jacob Jackson, Phillip Kravtsov, Shomil Jain"
3+
venue: "2025"
4+
links:
5+
paper: "https://cursor.com/blog/tab-rl"
6+
github: ""
7+
website: ""
8+
19
- title: "EvoAgentX: An Automated Framework for Evolving Agentic Workflows"
210
authors: "Yingxu Wang, Siwei Liu, Jinyuan Fang, Zaiqiao Meng"
311
venue: "arXiv 2025"
@@ -102,6 +110,14 @@
102110
github: "https://github.com/OpenBMB/ChatDev/tree/macnet"
103111
website: ""
104112

113+
- title: "Commit0: Library Generation from Scratch"
114+
authors: "Wenting Zhao, Nan Jiang, Celine Lee, Justin T Chiu, Claire Cardie, Matthias Gallé, Alexander M Rush"
115+
venue: "ICLR 2025"
116+
links:
117+
paper: "https://openreview.net/forum?id=MMwaQEVsAg"
118+
github: "https://github.com/commit-0/commit0"
119+
website: "https://commit-0.github.io/"
120+
105121
- title: "AgentVerse: Facilitating Multi-Agent Collaboration and Exploring Emergent Behaviors"
106122
authors: "Weize Chen, Yusheng Su, Jingwei Zuo, Cheng Yang, Chenfei Yuan, Chi-Min Chan, Heyang Yu, Yaxi Lu, Yi-Hsin Hung, Chen Qian, Yujia Qin, Xin Cong, Ruobing Xie, Zhiyuan Liu, Maosong Sun, Jie Zhou"
107123
venue: "ICLR 2024"

data/papers_data_synthesis.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,3 +61,11 @@
6161
paper: "https://openreview.net/forum?id=qnE2m3pIAb"
6262
github: ""
6363
website: ""
64+
65+
- title: "AgentGen: Enhancing Planning Abilities for Large Language Model based Agent via Environment and Task Generation"
66+
authors: "Mengkang Hu, Pu Zhao, Can Xu, Qingfeng Sun, Jianguang Lou, Qingwei Lin, Ping Luo, Saravan Rajmohan"
67+
venue: "KDD 2025"
68+
links:
69+
paper: "https://dl.acm.org/doi/abs/10.1145/3690624.3709321"
70+
github: "https://github.com/Aaron617/AgentGen"
71+
website: "https://agent-gen.github.io/"

data/papers_empirical_studies.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
- title: "Measuring the Impact of Early-2025 AI on Experienced Open-Source Developer Productivity"
2+
authors: "Joel Becker, Nate Rush, Elizabeth Barnes, David Rein"
3+
venue: "arxiv 2025"
4+
links:
5+
paper: "https://arxiv.org/abs/2507.09089"
6+
github: ""
7+
website: ""

0 commit comments

Comments
 (0)