|
6 | 6 | github: "https://github.com/EuniAI/Prometheus" |
7 | 7 | website: "https://euni.ai/" |
8 | 8 |
|
| 9 | +- title: "Unified Software Engineering Agent as AI Software Engineer" |
| 10 | + authors: "Leonhard Applis, Yuntong Zhang, Shanchao Liang, Nan Jiang, Lin Tan, Abhik Roychoudhury" |
| 11 | + venue: "ICSE 2026" |
| 12 | + links: |
| 13 | + paper: "https://arxiv.org/abs/2506.14683" |
| 14 | + github: "" |
| 15 | + website: "" |
| 16 | + |
9 | 17 | - title: "Beyond Final Code: A Process-Oriented Error Analysis of Software Development Agents in Real-World GitHub Scenarios" |
10 | 18 | authors: "Zhi Chen, Wei Ma, Lingxiao Jiang" |
11 | 19 | venue: "ICSE 2026" |
|
24 | 32 | website: "" |
25 | 33 | tags: "Empirical Study" |
26 | 34 |
|
| 35 | +- title: "Confucius Code Agent: Scalable Agent Scaffolding for Real-World Codebases" |
| 36 | + authors: "Zhaodong Wang, Zhenting Qi, Sherman Wong, Nathan Hu, Samuel Lin, Jun Ge, Erwin Gao, Wenlin Chen, Yilun Du, Minlan Yu, Ying Zhang" |
| 37 | + venue: "arXiv 2025/12" |
| 38 | + links: |
| 39 | + paper: "https://arxiv.org/abs/2512.10398" |
| 40 | + github: "" |
| 41 | + website: "" |
| 42 | + |
27 | 43 | - title: "Is Vibe Coding Safe? Benchmarking Vulnerability of Agent-Generated Code in Real-World Tasks" |
28 | 44 | authors: "Songwen Zhao, Danqing Wang, Kexun Zhang, Jiaxuan Luo, Zhuo Li, Lei Li" |
29 | 45 | venue: "arXiv 2025/12" |
|
48 | 64 | github: "https://github.com/OpenHands/software-agent-sdk" |
49 | 65 | website: "" |
50 | 66 |
|
| 67 | +- title: "CodeClash: Benchmarking Goal-Oriented Software Engineering" |
| 68 | + authors: "John Yang, Kilian Lieret, Joyce Yang, Carlos E. Jimenez, Ofir Press, Ludwig Schmidt, Diyi Yang" |
| 69 | + venue: "arXiv 2025/11" |
| 70 | + links: |
| 71 | + paper: "https://arxiv.org/abs/2511.00839" |
| 72 | + github: "" |
| 73 | + website: "https://codeclash.ai/" |
| 74 | + |
| 75 | +- title: "InfCode: Adversarial Iterative Refinement of Tests and Patches for Reliable Software Issue Resolution" |
| 76 | + authors: "KeFan Li, Mengfei Wang, Hengzhi Zhang, Zhichao Li, Yuan Yuan, Mu Li, Xiang Gao, Hailong Sun, Chunming Hu, Weifeng Lv" |
| 77 | + venue: "arXiv 2025/11" |
| 78 | + links: |
| 79 | + paper: "https://arxiv.org/abs/2511.16004" |
| 80 | + github: "" |
| 81 | + website: "" |
| 82 | + |
| 83 | +- title: "Agent READMEs: An Empirical Study of Context Files for Agentic Coding" |
| 84 | + authors: "Worawalan Chatlatanagulchai, Hao Li, Yutaro Kashiwa, Brittany Reid, Kundjanasith Thonglek, Pattara Leelaprute, Arnon Rungsawang, Bundit Manaskasemsak, Bram Adams, Ahmed E. Hassan, Hajimu Iida" |
| 85 | + venue: "arXiv 2025/11" |
| 86 | + links: |
| 87 | + paper: "https://arxiv.org/abs/2511.12884" |
| 88 | + github: "" |
| 89 | + website: "" |
| 90 | + |
51 | 91 | - title: "Understanding Code Agent Behaviour: An Empirical Study of Success and Failure Trajectories" |
52 | 92 | authors: "Oorja Majgaonkar, Zhiwei Fei, Xiang Li, Federica Sarro, He Ye" |
53 | 93 | venue: "arXiv 2025/11" |
|
114 | 154 | github: "" |
115 | 155 | website: "" |
116 | 156 |
|
| 157 | +- title: "Process-Level Trajectory Evaluation for Environment Configuration in Software Engineering Agents" |
| 158 | + authors: "Jiayi Kuang, Yinghui Li, Xin Zhang, Yangning Li, Di Yin, Xing Sun, Ying Shen, Philip S. Yu" |
| 159 | + venue: "arXiv 2025/10" |
| 160 | + links: |
| 161 | + paper: "https://arxiv.org/abs/2510.25694" |
| 162 | + github: "" |
| 163 | + website: "" |
| 164 | + |
117 | 165 | - title: "BugPilot: Complex Bug Generation for Efficient Learning of SWE Skills" |
118 | 166 | authors: "Atharv Sonwane, Isadora White, Hyunji Lee, Matheus Pereira, Lucas Caccia, Minseon Kim, Zhengyan Shi, Chinmay Singh, Alessandro Sordoni, Marc-Alexandre Côté, Xingdi Yuan" |
119 | 167 | venue: "arXiv 2025/10" |
|
188 | 236 | github: "https://github.com/QuantaAlpha/GitTaskBench" |
189 | 237 | website: "https://gittaskbench.github.io/" |
190 | 238 |
|
| 239 | +- title: "RepoForge: Training a SOTA Fast-thinking SWE Agent with an End-to-End Data Curation Pipeline Synergizing SFT and RL at Scale" |
| 240 | + authors: "Zhilong Chen, Chengzong Zhao, Boyuan Chen, Dayi Lin, Yihao Chen, Arthur Leung, Gopi Krishnan Rajbahadur, Gustavo A. Oliva, Haoxiang Zhang, Aaditya Bhatia, Chong Chun Yong, Ahmed E. Hassan" |
| 241 | + venue: "arXiv 2025/08" |
| 242 | + links: |
| 243 | + paper: "https://arxiv.org/abs/2508.01550" |
| 244 | + github: "" |
| 245 | + website: "" |
| 246 | + |
| 247 | +- title: "Understanding Software Engineering Agents Through the Lens of Traceability: An Empirical Study" |
| 248 | + authors: "Ira Ceka, Saurabh Pujar, Shyam Ramji, Luca Buratti, Gail Kaiser, Baishakhi Ray" |
| 249 | + venue: "arXiv 2025/06" |
| 250 | + links: |
| 251 | + paper: "https://arxiv.org/abs/2506.08311" |
| 252 | + github: "" |
| 253 | + website: "" |
| 254 | + |
191 | 255 | - title: "SWE-Bench Pro: Can AI Agents Solve Long-Horizon Software Engineering Tasks?" |
192 | 256 | authors: "Xiang Deng, Jeff Da, Edwin Pan, Yannis Yiming He, Charles Ide, Kanak Garg, Niklas Lauffer, Andrew Park, Nitin Pasari, Chetan Rane, Karmini Sampath, Maya Krishnan, Srivatsa Kundurthy, Sean Hendryx, Zifan Wang, Chen Bo Calvin Zhang, Noah Jacobson, Bing Liu, Brad Kenstler" |
193 | 257 | venue: "arXiv 2025" |
|
268 | 332 | github: "https://github.com/yingweima2022/SWE-Reasoner" |
269 | 333 | website: "" |
270 | 334 |
|
| 335 | +- title: "AutoCodeSherpa: Symbolic Explanations in AI Coding Agents" |
| 336 | + authors: "Sungmin Kang, Haifeng Ruan, Abhik Roychoudhury" |
| 337 | + venue: "arXiv 2025/07" |
| 338 | + links: |
| 339 | + paper: "https://arxiv.org/abs/2507.22414" |
| 340 | + github: "" |
| 341 | + website: "" |
| 342 | + |
271 | 343 | - title: "Satori-SWE: Evolutionary Test-Time Scaling for Sample-Efficient Software Engineering" |
272 | 344 | authors: "Guangtao Zeng, Maohao Shen, Delin Chen, Zhenting Qi, Subhro Das, Dan Gutfreund, David Cox, Gregory Wornell, Wei Lu, Zhang-Wei Hong, Chuang Gan" |
273 | 345 | venue: "arXiv 2025/05" |
|
276 | 348 | github: "https://github.com/satori-reasoning/Satori-SWE" |
277 | 349 | website: "https://satori-reasoning.github.io/" |
278 | 350 |
|
| 351 | +- title: "CrashFixer: A crash resolution agent for the Linux kernel" |
| 352 | + authors: "Alex Mathai, Chenxi Huang, Suwei Ma, Jihwan Kim, Hailie Mitchell, Aleksandr Nogikh, Petros Maniatis, Franjo Ivančić, Junfeng Yang, Baishakhi Ray" |
| 353 | + venue: "arXiv 2025/04" |
| 354 | + links: |
| 355 | + paper: "https://arxiv.org/abs/2504.20412" |
| 356 | + github: "" |
| 357 | + website: "" |
| 358 | + |
279 | 359 | - title: "DARS: Dynamic Action Re-Sampling to Enhance Coding Agent Performance by Adaptive Tree Traversal" |
280 | 360 | authors: "Vaibhav Aggarwal, Ojasv Kamal, Abhinav Japesh, Zhijing Jin, Bernhard Schölkopf" |
281 | 361 | venue: "arXiv 2025/03" |
|
500 | 580 | github: "" |
501 | 581 | website: "" |
502 | 582 |
|
| 583 | +# - title: "Large Language Model Critics for Execution-Free Evaluation of Code Changes" |
| 584 | +# authors: "Aashish Yadavally, Hoan Nguyen, Laurent Callot, Gauthier Guinet" |
| 585 | +# venue: "arXiv 2025/01" |
| 586 | +# links: |
| 587 | +# paper: "https://arxiv.org/abs/2501.16655" |
| 588 | +# github: "" |
| 589 | +# website: "" |
| 590 | + |
503 | 591 | - title: "Can Agents Fix Agent Issues?" |
504 | 592 | authors: "Alfin Wijaya Rahardja, Junwei Liu, Weitong Chen, Zhenpeng Chen, Yiling Lou" |
505 | 593 | venue: "NeurIPS 2025" |
|
580 | 668 | github: "https://github.com/Hambaobao/SWE-Flow" |
581 | 669 | website: "" |
582 | 670 |
|
| 671 | +- title: "Nemotron-CORTEXA: Enhancing LLM Agents for Software Engineering Tasks via Improved Localization and Solution Diversity" |
| 672 | + authors: "Atefeh Sohrabizadeh, Jialin Song, Mingjie Liu, Rajarshi Roy, Chankyu Lee, Jonathan Raiman, Bryan Catanzaro" |
| 673 | + venue: "ICML 2025" |
| 674 | + links: |
| 675 | + paper: "https://openreview.net/forum?id=k6p8UKRdH7" |
| 676 | + github: "" |
| 677 | + website: "" |
| 678 | + |
583 | 679 | - title: "Guided Search Strategies in Non-Serializable Environments with Applications to Software Engineering Agents" |
584 | 680 | authors: "Karina Zainullina, Alexander Golubev, Maria Trofimova, Sergei Polezhaev, Ibragim Badertdinov, Daria Litvintseva, Simon Karasik, Filipp Fisin, Sergei Skvortsov, Maksim Nekrashevich, Anton Shevtsov, Boris Yangel" |
585 | 681 | venue: "ICML 2025" |
|
652 | 748 | github: "" |
653 | 749 | website: "" |
654 | 750 |
|
| 751 | +- title: "Understanding Software Engineering Agents: A Study of Thought-Action-Result Trajectories" |
| 752 | + authors: "Islem Bouzenia, Michael Pradel" |
| 753 | + venue: "ASE 2025" |
| 754 | + links: |
| 755 | + paper: "https://arxiv.org/abs/2506.18824" |
| 756 | + github: "" |
| 757 | + website: "" |
| 758 | + |
655 | 759 | - title: "SPICE: An Automated SWE-Bench Labeling Pipeline for Issue Clarity, Test Coverage, and Effort Estimation" |
656 | 760 | authors: "Gustavo A. Oliva, Gopi Krishnan Rajbahadur, Aaditya Bhatia, Haoxiang Zhang, Yihao Chen, Zhilong Chen, Arthur Leung, Dayi Lin, Boyuan Chen, Ahmed E. Hassan" |
657 | 761 | venue: "ASE 2025" |
|
692 | 796 | github: "https://github.com/InternLM/SWE-Fixer" |
693 | 797 | website: "" |
694 | 798 |
|
| 799 | +- title: "SynFix: Dependency-Aware Program Repair via RelationGraph Analysis" |
| 800 | + authors: "Xunzhu Tang, Jiechao Gao, Jin Xu, Tiezhu Sun, Yewei Song, Saad Ezzini, Wendkûuni C. Ouédraogo, Jacques Klein, Tegawendé F. Bissyandé" |
| 801 | + venue: "ACL 2025 Findings" |
| 802 | + links: |
| 803 | + paper: "https://aclanthology.org/2025.findings-acl.252/" |
| 804 | + github: "" |
| 805 | + website: "" |
| 806 | + |
695 | 807 | - title: "UniDebugger: Hierarchical Multi-Agent Framework for Unified Software Debugging" |
696 | 808 | authors: "Cheryl Lee, Chunqiu Steven Xia, Longji Yang, Jen-tse Huang, Zhouruixing Zhu, Lingming Zhang, Michael R. Lyu" |
697 | 809 | venue: "EMNLP 2025" |
|
732 | 844 | github: "" |
733 | 845 | website: "" |
734 | 846 |
|
| 847 | +- title: "Agentic Program Repair from Test Failures at Scale: A Neuro-symbolic approach with static analysis and test execution feedback" |
| 848 | + authors: "Chandra Maddila, Adam Tait, Claire Chang, Daniel Cheng, Nauman Ahmad, Vijayaraghavan Murali, Marshall Roch, Arnaud Avondet, Aaron Meltzer, Victor Montalvao, Michael Hopko, Chris Waterson, Parth Thakkar, Renuka Fernandez, Kristian Kristensen, Sivan Barzily, Sherry Chen, Rui Abreu, Nachiappan Nagappan, Payam Shodjai, Killian Murphy, James Everingham, Aparna Ramani, Peter C. Rigby" |
| 849 | + venue: "TSE 2025" |
| 850 | + links: |
| 851 | + paper: "https://arxiv.org/abs/2507.18755" |
| 852 | + github: "" |
| 853 | + website: "" |
| 854 | + |
735 | 855 | - title: "AutoCodeRover: Autonomous Program Improvement" |
736 | 856 | authors: "Yuntong Zhang, Haifeng Ruan, Zhiyu Fan, Abhik Roychoudhury" |
737 | 857 | venue: "ISSTA 2024" |
|
772 | 892 | github: "https://github.com/SWE-bench/SWE-bench" |
773 | 893 | website: "https://www.swebench.com/" |
774 | 894 |
|
775 | | -- title: "CodeR: Issue Resolving with Multi-Agent and Task Graphs" |
776 | | - authors: "Dong Chen, Shaoxin Lin, Muhan Zeng, Daoguang Zan, Jian-Gang Wang, Anton Cheshkov, Jun Sun, Hao Yu, Guoliang Dong, Artem Aliev, Jie Wang, Xiao Cheng, Guangtai Liang, Yuchi Ma, Pan Bian, Tao Xie, Qianxiang Wang" |
777 | | - venue: "arXiv 2024" |
778 | | - links: |
779 | | - paper: "https://arxiv.org/abs/2406.01304" |
780 | | - github: "https://github.com/NL2Code/CodeR" |
781 | | - website: "" |
782 | | - |
783 | | -- title: "MarsCode Agent: AI-native Automated Bug Fixing" |
784 | | - authors: "Yizhou Liu, Pengfei Gao, Xinchen Wang, Jie Liu, Yexuan Shi, Zhao Zhang, Chao Peng" |
785 | | - venue: "arXiv 2025" |
786 | | - links: |
787 | | - paper: "https://arxiv.org/abs/2409.00899" |
788 | | - github: "" |
789 | | - website: "https://se-research.bytedance.com/" |
| 895 | +# - title: "CodeR: Issue Resolving with Multi-Agent and Task Graphs" |
| 896 | +# authors: "Dong Chen, Shaoxin Lin, Muhan Zeng, Daoguang Zan, Jian-Gang Wang, Anton Cheshkov, Jun Sun, Hao Yu, Guoliang Dong, Artem Aliev, Jie Wang, Xiao Cheng, Guangtai Liang, Yuchi Ma, Pan Bian, Tao Xie, Qianxiang Wang" |
| 897 | +# venue: "arXiv 2024" |
| 898 | +# links: |
| 899 | +# paper: "https://arxiv.org/abs/2406.01304" |
| 900 | +# github: "https://github.com/NL2Code/CodeR" |
| 901 | +# website: "" |
| 902 | + |
| 903 | +# - title: "MarsCode Agent: AI-native Automated Bug Fixing" |
| 904 | +# authors: "Yizhou Liu, Pengfei Gao, Xinchen Wang, Jie Liu, Yexuan Shi, Zhao Zhang, Chao Peng" |
| 905 | +# venue: "arXiv 2025" |
| 906 | +# links: |
| 907 | +# paper: "https://arxiv.org/abs/2409.00899" |
| 908 | +# github: "" |
| 909 | +# website: "https://se-research.bytedance.com/" |
0 commit comments