Add latest papers on benchmarks, data synthesis, issue resolution, products, surveys, and so on

Zhaoyang-Chu · Zhaoyang-Chu · commit 3117c996da38 · 2025-10-18T13:08:33.000Z
diff --git a/README.md b/README.md
diff --git a/data/papers_automated_data_science.yaml b/data/papers_automated_data_science.yaml
@@ -0,0 +1,7 @@
+- title: "AutoMind: Adaptive Knowledgeable Agent for Automated Data Science"
+  authors: "Yixin Ou, Yujie Luo, Jingsheng Zheng, Lanning Wei, Zhuoyun Yu, Shuofei Qiao, Jintian Zhang, Da Zheng, Yuren Mao, Yunjun Gao, Huajun Chen, Ningyu Zhang"
+  venue: "arXiv 2025"
+  links:
+    paper: "https://arxiv.org/abs/2506.10974"
+    github: "https://github.com/innovatingAI/AutoMind"
+    website: "https://innovatingai.github.io/"
diff --git a/data/papers_benchmarks.yaml b/data/papers_benchmarks.yaml
@@ -38,13 +38,29 @@
     github: "https://github.com/multi-swe-bench/multi-swe-bench"
     website: "https://multi-swe-bench.github.io/"
 
-- title: "SWE-bench Goes Live!"
-  authors: "Linghao Zhang, Shilin He, Chaoyun Zhang, Yu Kang, Bowen Li, Chengxing Xie, Junhao Wang, Maoquan Wang, Yufan Huang, Shengyu Fu, Elsie Nallipogu, Qingwei Lin, Yingnong Dang, Saravan Rajmohan, Dongmei Zhang"
+- title: "SWE-Effi: Re-Evaluating Software AI Agent System Effectiveness Under Resource Constraints"
+  authors: "Zhiyu Fan, Kirill Vasilevski, Dayi Lin, Boyuan Chen, Yihao Chen, Zhiqing Zhong, Jie M. Zhang, Pinjia He, Ahmed E. Hassan"
   venue: "arXiv 2025"
   links:
-    paper: "https://arxiv.org/abs/2505.23419"
-    github: "https://github.com/microsoft/SWE-bench-Live"
-    website: "https://swe-bench-live.github.io/"
+    paper: "https://arxiv.org/abs/2509.09853"
+    github: "https://centre-for-software-excellence.github.io/SWE-Effi/"  
+    website: "https://github.com/Centre-for-Software-Excellence/SWE-Effi"
+
+- title: "SEC-bench: Automated Benchmarking of LLM Agents on Real-World Software Security Tasks"
+  authors: "Hwiwon Lee, Ziqi Zhang, Hanxiao Lu, Lingming Zhang"
+  venue: "arXiv 2025"
+  links:
+    paper: "https://arxiv.org/abs/2506.11791"
+    github: "https://github.com/SEC-bench/SEC-bench"
+    website: "https://sec-bench.github.io/"
+
+- title: "SecureAgentBench: Benchmarking Secure Code Generation under Realistic Vulnerability Scenarios"
+  authors: "Junkai Chen, Huihui Huang, Yunbo Lyu, Junwen An, Jieke Shi, Chengran Yang, Ting Zhang, Haoye Tian, Yikun Li, Zhenhao Li, Xin Zhou, Xing Hu, David Lo"
+  venue: "arXiv 2025"
+  links:
+    paper: "https://arxiv.org/abs/2509.22097"
+    github: "https://github.com/iCSawyer/SecureAgentBench"
+    website: ""
 
 - title: "SWE-MERA: A Dynamic Benchmark for Agenticly Evaluating Large Language Models on Software Engineering Tasks"
   authors: "Pavel Adamenko, Mikhail Ivanov, Aidar Valeev, Rodion Levichev, Pavel Zadorozhny, Ivan Lopatin, Dmitry Babayev, Alena Fenogenova, Valentin Malykh"
@@ -54,6 +70,22 @@
     github: "https://github.com/MERA-Evaluation/repotest"
     website: "https://mera-evaluation.github.io/demo-swe-mera/"
 
+- title: "Auto-SWE-Bench: A Framework for the Scalable Generation of Software Engineering Benchmark from Open-Source Repositories"
+  authors: "Anonymous Authors"
+  venue: "2025"
+  links:
+    paper: "https://openreview.net/forum?id=Gxw1EDSm9S"
+    github: ""
+    website: ""
+
+- title: "SWE-bench Goes Live!"
+  authors: "Linghao Zhang, Shilin He, Chaoyun Zhang, Yu Kang, Bowen Li, Chengxing Xie, Junhao Wang, Maoquan Wang, Yufan Huang, Shengyu Fu, Elsie Nallipogu, Qingwei Lin, Yingnong Dang, Saravan Rajmohan, Dongmei Zhang"
+  venue: "NeurIPS 2025 Datasets & Benchmarks Track"
+  links:
+    paper: "https://arxiv.org/abs/2505.23419"
+    github: "https://github.com/microsoft/SWE-bench-Live"
+    website: "https://swe-bench-live.github.io/"
+
 - title: "SWE-Lancer: Can Frontier LLMs Earn $1 Million from Real-World Freelance Software Engineering?"
   authors: "Samuel Miserendino, Michele Wang, Tejal Patwardhan, Johannes Heidecke"
   venue: "ICML 2025"
diff --git a/data/papers_code_generation.yaml b/data/papers_code_generation.yaml
diff --git a/data/papers_data_synthesis.yaml b/data/papers_data_synthesis.yaml
@@ -1,7 +1,31 @@
+- title: "SWE-Mirror: Scaling Issue-Resolving Datasets by Mirroring Issues Across Repositories"
+  authors: "Junhao Wang, Daoguang Zan, Shulin Xin, Siyao Liu, Yurong Wu, Kai Shen"
+  venue: "arXiv 2025"
+  links:
+    paper: "https://arxiv.org/abs/2509.08724"
+    github: ""
+    website: ""
+
 - title: "SPICE: An Automated SWE-Bench Labeling Pipeline for Issue Clarity, Test Coverage, and Effort Estimation"
   authors: "Gustavo A. Oliva, Gopi Krishnan Rajbahadur, Aaditya Bhatia, Haoxiang Zhang, Yihao Chen, Zhilong Chen, Arthur Leung, Dayi Lin, Boyuan Chen, Ahmed E. Hassan"
   venue: "arXiv 2025"
   links:
     paper: "https://arxiv.org/abs/2507.09108"
     github: ""
-    website: ""
+    website: ""
+
+- title: "RepoForge: Training a SOTA Fast-thinking SWE Agent with an End-to-End Data Curation Pipeline Synergizing SFT and RL at Scale"
+  authors: "Zhilong Chen, Chengzong Zhao, Boyuan Chen, Dayi Lin, Yihao Chen, Arthur Leung, Gopi Krishnan Rajbahadur, Gustavo Oliva, Haoxiang Zhang, Aadi Bhatia, Kim Kisub, Kirill Vasilevski, Youssef Esseddiq, Yanruo Yang, Ahmed Hassan"
+  venue: "arXiv 2025"
+  links:
+    paper: "https://arxiv.org/abs/2508.01550"
+    github: ""
+    website: "https://centre-for-software-excellence.github.io/docs/blog/repoforge"
+
+- title: "MCTS-Refined CoT: High-Quality Fine-Tuning Data for LLM-Based Repository Issue Resolution"
+  authors: "Yibo Wang, Zhihao Peng, Ying Wang, Zhao Wei, Hai Yu, Zhiliang Zhu"
+  venue: "arXiv 2025"
+  links:
+    paper: "https://arxiv.org/abs/2506.12728"
+    github: ""
+    website: "https://mcts-refine.github.io/"
diff --git a/data/papers_issue_localization.yaml b/data/papers_issue_localization.yaml
@@ -0,0 +1,7 @@
+- title: "Issue Localization via LLM-Driven Iterative Code Graph Searching"
+  authors: "Zhonghao Jiang, Xiaoxue Ren, Meng Yan, Wei Jiang, Yong Li, Zhongxin Liu"
+  venue: "ASE 2025"
+  links:
+    paper: "https://arxiv.org/abs/2503.22424"
+    github: "https://github.com/ZhonghaoJiang/CoSIL"
+    website: ""
diff --git a/data/papers_issue_resolution.yaml b/data/papers_issue_resolution.yaml
@@ -6,6 +6,46 @@
     github: "https://github.com/EuniAI/Prometheus"
     website: "https://euni.ai/"
 
+- title: "Terminal-Bench: A Benchmark for AI Agents in Terminal Environments"
+  authors: "The Terminal-Bench Team"
+  venue: "2025"
+  links:
+    paper: ""
+    github: "https://github.com/laude-institute/terminal-bench"
+    website: "https://www.tbench.ai/"
+
+- title: "SWE-Bench Pro: Can AI Agents Solve Long-Horizon Software Engineering Tasks?"
+  authors: "Xiang Deng, Jeff Da, Edwin Pan, Yannis Yiming He, Charles Ide, Kanak Garg, Niklas Lauffer, Andrew Park, Nitin Pasari, Chetan Rane, Karmini Sampath, Maya Krishnan, Srivatsa Kundurthy, Sean Hendryx, Zifan Wang, Chen Bo Calvin Zhang, Noah Jacobson, Bing Liu, Brad Kenstler"
+  venue: "arXiv 2025"
+  links:
+    paper: "https://arxiv.org/abs/2509.16941"
+    github: "https://github.com/scaleapi/SWE-bench_Pro-os"
+    website: "https://scale.com/leaderboard/swe_bench_pro_public"
+
+- title: "SWE-PolyBench: A multi-language benchmark for repository level evaluation of coding agents"
+  authors: "Muhammad Shihab Rashid, Christian Bock, Yuan Zhuang, Alexander Buchholz, Tim Esler, Simon Valentin, Luca Franceschi, Martin Wistuba, Prabhu Teja Sivaprasad, Woo Jung Kim, Anoop Deoras, Giovanni Zappella, Laurent Callot"
+  venue: "arXiv 2025"
+  links:
+    paper: "https://arxiv.org/abs/2504.08703"
+    github: "https://github.com/amazon-science/SWE-PolyBench"
+    website: "https://amazon-science.github.io/SWE-PolyBench/"
+
+- title: "Multi-SWE-bench: A Multilingual Benchmark for Issue Resolving"
+  authors: "Daoguang Zan, Zhirong Huang, Wei Liu, Hanwu Chen, Linhao Zhang, Shulin Xin, Lu Chen, Qi Liu, Xiaojian Zhong, Aoyan Li, Siyao Liu, Yongsheng Xiao, Liangqiang Chen, Yuyu Zhang, Jing Su, Tianyu Liu, Rui Long, Kai Shen, Liang Xiang"
+  venue: "arXiv 2025"
+  links:
+    paper: "https://arxiv.org/abs/2504.02605"
+    github: "https://github.com/multi-swe-bench/multi-swe-bench"
+    website: "https://multi-swe-bench.github.io/"
+
+- title: "SWE-Effi: Re-Evaluating Software AI Agent System Effectiveness Under Resource Constraints"
+  authors: "Zhiyu Fan, Kirill Vasilevski, Dayi Lin, Boyuan Chen, Yihao Chen, Zhiqing Zhong, Jie M. Zhang, Pinjia He, Ahmed E. Hassan"
+  venue: "arXiv 2025"
+  links:
+    paper: "https://arxiv.org/abs/2509.09853"
+    github: "https://centre-for-software-excellence.github.io/SWE-Effi/"  
+    website: "https://github.com/Centre-for-Software-Excellence/SWE-Effi"
+
 - title: "Saving SWE-Bench: A Benchmark Mutation Approach for Realistic Agent Evaluation"
   authors: "Spandan Garg, Ben Steenhoek, Yufan Huang"
   venue: "arXiv 2025"
@@ -14,6 +54,22 @@
     github: ""
     website: ""
 
+- title: "SWE-MERA: A Dynamic Benchmark for Agenticly Evaluating Large Language Models on Software Engineering Tasks"
+  authors: "Pavel Adamenko, Mikhail Ivanov, Aidar Valeev, Rodion Levichev, Pavel Zadorozhny, Ivan Lopatin, Dmitry Babayev, Alena Fenogenova, Valentin Malykh"
+  venue: "arXiv 2025"
+  links:
+    paper: "https://arxiv.org/abs/2507.11059"
+    github: "https://github.com/MERA-Evaluation/repotest"
+    website: "https://mera-evaluation.github.io/demo-swe-mera/"
+
+- title: "Auto-SWE-Bench: A Framework for the Scalable Generation of Software Engineering Benchmark from Open-Source Repositories"
+  authors: "Anonymous Authors"
+  venue: "2025"
+  links:
+    paper: "https://openreview.net/forum?id=Gxw1EDSm9S"
+    github: ""
+    website: ""
+
 - title: "SWE-RL: Advancing LLM Reasoning via Reinforcement Learning on Open Software Evolution"
   authors: "Yuxiang Wei, Olivier Duchenne, Jade Copet, Quentin Carbonneaux, Lingming Zhang, Daniel Fried, Gabriel Synnaeve, Rishabh Singh, Sida I. Wang"
   venue: "NeurIPS 2025"
@@ -22,6 +78,14 @@
     github: "https://github.com/facebookresearch/swe-rl"
     website: ""
 
+- title: "SWE-bench Goes Live!"
+  authors: "Linghao Zhang, Shilin He, Chaoyun Zhang, Yu Kang, Bowen Li, Chengxing Xie, Junhao Wang, Maoquan Wang, Yufan Huang, Shengyu Fu, Elsie Nallipogu, Qingwei Lin, Yingnong Dang, Saravan Rajmohan, Dongmei Zhang"
+  venue: "NeurIPS 2025 Datasets & Benchmarks Track"
+  links:
+    paper: "https://arxiv.org/abs/2505.23419"
+    github: "https://github.com/microsoft/SWE-bench-Live"
+    website: "https://swe-bench-live.github.io/"
+
 - title: "Training Software Engineering Agents and Verifiers with SWE-Gym"
   authors: "Jiayi Pan, Xingyao Wang, Graham Neubig, Navdeep Jaitly, Heng Ji, Alane Suhr, Yizhe Zhang"
   venue: "ICML 2025"  
@@ -30,6 +94,14 @@
     github: "https://github.com/SWE-Gym/SWE-Gym"
     website: ""
 
+- title: "SWE-Lancer: Can Frontier LLMs Earn $1 Million from Real-World Freelance Software Engineering?"
+  authors: "Samuel Miserendino, Michele Wang, Tejal Patwardhan, Johannes Heidecke"
+  venue: "ICML 2025"
+  links:
+    paper: "https://arxiv.org/abs/2502.12115"
+    github: "https://github.com/openai/frontier-evals/tree/main/project/swelancer"
+    website: "https://openai.com/index/swe-lancer/"
+
 - title: "SWE-GPT: A Process-Centric Language Model for Automated Software Improvement"
   authors: "Yingwei Ma, Rongyu Cao, Yongchang Cao, Yue Zhang, Jue Chen, Yibo Liu, Yuchen Liu, Binhua Li, Fei Huang, Yongbin Li"
   venue: "ISSTA 2025"
@@ -62,6 +134,14 @@
     github: "https://github.com/OpenAutoCoder/Agentless"
     website: ""
 
+- title: "OmniGIRL: A Multilingual and Multimodal Benchmark for GitHub Issue Resolution"
+  authors: "Lianghong Guo, Wei Tao, Runhan Jiang, Yanlin Wang, Jiachi Chen, Xilin Liu, Yuchi Ma, Mingzhi Mao, Hongyu Zhang, Zibin Zheng"
+  venue: "ISSTA 2025"
+  links:
+    paper: "https://arxiv.org/abs/2505.04606"
+    github: "https://github.com/DeepSoftwareAnalytics/OmniGIRL"
+    website: "https://deepsoftwareanalytics.github.io/omnigirl_leaderboard.html"
+
 - title: "AutoCodeRover: Autonomous Program Improvement"
   authors: "Yuntong Zhang, Haifeng Ruan, Zhiyu Fan, Abhik Roychoudhury"
   venue: "ISSTA 2024"
@@ -77,3 +157,11 @@
     paper: "https://arxiv.org/abs/2405.15793"
     github: "https://github.com/SWE-agent/SWE-agent"
     website: "https://swe-agent.com/"
+
+- title: "SWE-bench: Can Language Models Resolve Real-World GitHub Issues?"
+  authors: "Carlos E. Jimenez, John Yang, Alexander Wettig, Shunyu Yao, Kexin Pei, Ofir Press, Karthik Narasimhan"
+  venue: "ICLR 2024"
+  links:
+    paper: "https://arxiv.org/abs/2310.06770"
+    github: "https://github.com/SWE-bench/SWE-bench"
+    website: "https://www.swebench.com/"
diff --git a/data/papers_products.yaml b/data/papers_products.yaml
@@ -14,6 +14,14 @@
     github: "https://github.com/openai/codex"
     website: "https://openai.com/codex/"
 
+- title: "Lovable"
+  authors: "Lovable"
+  venue: "2024"
+  links:
+    paper: ""
+    github: ""
+    website: "https://lovable.dev/"
+
 - title: "Devin"
   authors: "Cognition AI"
   venue: "2024"
@@ -54,6 +62,14 @@
     github: "https://github.com/SWE-agent/SWE-agent"
     website: "https://swe-agent.com/"
 
+- title: "Open Lovable"
+  authors: "Firecrawl"
+  venue: "2025"
+  links:
+    paper: ""
+    github: "https://github.com/firecrawl/open-lovable"
+    website: "https://open-lovable.com/"
+
 - title: "PR-Agent & Qodo Merge"
   authors: "Qodo"
   venue: "2024"
@@ -134,3 +150,10 @@
     github: ""
     website: "https://www.essential.com/"
 
+- title: "Anything"
+  authors: "Anything"
+  venue: "2025"
+  links:
+    paper: ""
+    github: ""
+    website: "https://www.createanything.com/"
diff --git a/data/papers_security_engineering.yaml b/data/papers_security_engineering.yaml
@@ -0,0 +1,31 @@
+- title: "SEC-bench: Automated Benchmarking of LLM Agents on Real-World Software Security Tasks"
+  authors: "Hwiwon Lee, Ziqi Zhang, Hanxiao Lu, Lingming Zhang"
+  venue: "arXiv 2025"
+  links:
+    paper: "https://arxiv.org/abs/2506.11791"
+    github: "https://github.com/SEC-bench/SEC-bench"
+    website: "https://sec-bench.github.io/"
+
+- title: "SecureAgentBench: Benchmarking Secure Code Generation under Realistic Vulnerability Scenarios"
+  authors: "Junkai Chen, Huihui Huang, Yunbo Lyu, Junwen An, Jieke Shi, Chengran Yang, Ting Zhang, Haoye Tian, Yikun Li, Zhenhao Li, Xin Zhou, Xing Hu, David Lo"
+  venue: "arXiv 2025"
+  links:
+    paper: "https://arxiv.org/abs/2509.22097"
+    github: "https://github.com/iCSawyer/SecureAgentBench"
+    website: ""
+
+- title: "CVE-Bench: A Benchmark for AI Agents’ Ability to Exploit Real-World Web Application Vulnerabilities"
+  authors: "Yuxuan Zhu, Antony Kellermann, Dylan Bowman, Philip Li, Akul Gupta, Adarsh Danda, Richard Fang, Conner Jensen, Eric Ihli, Jason Benn, Jet Geronimo, Avi Dhir, Sudhit Rao, Kaicheng Yu, Twm Stone, Daniel Kang"
+  venue: "ICML 2025"
+  links:
+    paper: "https://arxiv.org/abs/2503.17332"
+    github: "https://github.com/uiuc-kang-lab/cve-bench"
+    website: ""
+
+- title: "CVE-Bench: Benchmarking LLM-based Software Engineering Agent’s Ability to Repair Real-World CVE Vulnerabilities"
+  authors: "Peiran Wang, Xiaogeng Liu, Chaowei Xiao"
+  venue: "NAACL 2025"
+  links:
+    paper: "https://aclanthology.org/2025.naacl-long.212/"
+    github: ""
+    website: ""
diff --git a/data/papers_sql_engineering.yaml b/data/papers_sql_engineering.yaml
@@ -0,0 +1,7 @@
+- title: "SWE-SQL: Illuminating LLM Pathways to Solve User SQL Issues in Real-World Applications"
+  authors: "Jinyang Li, Xiaolong Li, Ge Qu, Per Jacobsson, Bowen Qin, Binyuan Hui, Shuzheng Si, Nan Huo, Xiaohan Xu, Yue Zhang, Ziwei Tang, Yuanshuai Li, Florensia Widjaja, Xintong Zhu, Feige Zhou, Yongfeng Huang, Yannis Papakonstantinou, Fatma Ozcan, Chenhao Ma, Reynold Cheng"
+  venue: "arXiv 2025"
+  links:
+    paper: "https://arxiv.org/abs/2506.18951"
+    github: "https://github.com/bird-bench/BIRD-CRITIC-1"
+    website: "https://bird-critic.github.io/"
diff --git a/data/papers_surveys.yaml b/data/papers_surveys.yaml
@@ -1,7 +1,31 @@
+- title: "A Comprehensive Survey on Benchmarks and Solutions in Software Engineering of LLM-Empowered Agentic System"
+  authors: "Jiale Guo, Suizhi Huang, Mei Li, Dong Huang, Xingsheng Chen, Regina Zhang, Zhijiang Guo, Han Yu, Siu-Ming Yiu, Christian Jensen, Pietro Lio, Kwok-Yan Lam"
+  venue: "arXiv 2025"
+  links:
+    paper: "https://arxiv.org/abs/2510.09721"
+    github: "https://github.com/lisaGuojl/LLM-Agent-SE-Survey"
+    website: ""
+
+- title: "How Does LLM Reasoning Work for Code? A Survey and a Call to Action"
+  authors: "Ira Ceka, Saurabh Pujar, Irene Manotas, Gail Kaiser, Baishakhi Ray, Shyam Ramji"
+  venue: "arXiv 2025"
+  links:
+    paper: "https://arxiv.org/abs/2506.13932"
+    github: ""
+    website: ""
+
 - title: "The Rise of AI Teammates in Software Engineering (SE) 3.0: How Autonomous Coding Agents Are Reshaping Software Engineering"
   authors: "Hao Li, Haoxiang Zhang, Ahmed E. Hassan"
   venue: "arXiv 2025"
   links:
     paper: "https://arxiv.org/abs/2507.15003"
     github: "https://github.com/SAILResearch/AI_Teammates_in_SE3"
     website: ""
+
+- title: "Agentic Software Engineering: Foundational Pillars and a Research Roadmap"
+  authors: "Ahmed E. Hassan, Hao Li, Dayi Lin, Bram Adams, Tse-Hsun Chen, Yutaro Kashiwa, Dong Qiu"
+  venue: "arXiv 2025"
+  links:
+    paper: "https://arxiv.org/abs/2509.06216"
+    github: ""
+    website: ""