URL: https://arxiv.org/pdf/2605.08678
%PDF-1.7
%����
1 0 obj
<< /Metadata 3 0 R /Names 4 0 R /OpenAction 5 0 R /Outlines 6 0 R /PageMode /UseOutlines /Pages 7 0 R /Type /Catalog >>
endobj
2 0 obj
<< /Author (Bohan Lyu; Yucheng Yang; Siqiao Huang; Jiaru Zhang; Qixin Xu; Xinghan Li; Xinyang Han; Yicheng Zhang; Huaqing Zhang; Runhan Huang; Kaicheng Yang; Zitao Chen; Wentao Guo; Junlin Yang; Xinyue Ai; Wenhao Chai; Yadi Cao; Ziran Yang; Kun Wang; Dapeng Jiang; Huan-ang Gao; Shange Tang; Chengshuai Shi; Simon S. Du; Max Simchowitz; Jiantao Jiao; Dawn Song; Chi Jin) /Creator (arXiv GenPDF \(tex2pdf:a6404ea\)) /DOI (https://doi.org/10.48550/arXiv.2605.08678) /License (http://creativecommons.org/licenses/by/4.0/) /PTEX.Fullbanner (This is pdfTeX, Version 3.141592653-2.6-1.40.28 \(TeX Live 2025\) kpathsea version 6.4.1) /Producer (pikepdf 8.15.1) /Title (MLS-Bench: A Holistic and Rigorous Assessment of AI Systems on Building Better AI) /Trapped /False /arXivID (https://arxiv.org/abs/2605.08678v2) >>
endobj
3 0 obj
<< /Subtype /XML /Type /Metadata /Length 2330 >>
stream
endstream
endobj
4 0 obj
<< /Dests 8 0 R >>
endobj
5 0 obj
<< /D [ 9 0 R /Fit ] /S /GoTo >>
endobj
6 0 obj
<< /Count 12 /First 10 0 R /Last 11 0 R /Type /Outlines >>
endobj
7 0 obj
<< /Count 65 /Kids [ 12 0 R 13 0 R ] /Type /Pages >>
endobj
8 0 obj
<< /Kids [ 14 0 R 15 0 R ] /Limits [ (Doc-Start) (table.caption.8) ] >>
endobj
9 0 obj
<< /Annots [ 16 0 R 17 0 R 18 0 R 19 0 R 20 0 R 21 0 R 22 0 R 23 0 R 24 0 R 25 0 R 26 0 R 27 0 R 28 0 R 29 0 R 30 0 R 31 0 R 32 0 R 33 0 R 34 0 R 35 0 R 36 0 R 37 0 R 38 0 R 39 0 R 40 0 R 41 0 R 42 0 R 43 0 R 44 0 R 45 0 R 46 0 R 47 0 R 48 0 R 49 0 R 50 0 R 51 0 R 52 0 R 53 0 R 54 0 R 55 0 R 56 0 R 57 0 R 58 0 R ] /Contents [ 59 0 R 60 0 R 61 0 R 62 0 R ] /MediaBox [ 0 0 612 792 ] /Parent 63 0 R /Resources 64 0 R /Type /Page >>
endobj
10 0 obj
<< /A 65 0 R /Next 66 0 R /Parent 6 0 R /Title 67 0 R >>
endobj
11 0 obj
<< /A 68 0 R /Count -6 /First 69 0 R /Last 70 0 R /Parent 6 0 R /Prev 71 0 R /Title 72 0 R >>
endobj
12 0 obj
<< /Count 36 /Kids [ 63 0 R 73 0 R 74 0 R 75 0 R 76 0 R 77 0 R ] /Parent 7 0 R /Type /Pages >>
endobj
13 0 obj
<< /Count 29 /Kids [ 78 0 R 79 0 R 80 0 R 81 0 R 82 0 R ] /Parent 7 0 R /Type /Pages >>
endobj
14 0 obj
<< /Kids [ 83 0 R 84 0 R 85 0 R 86 0 R 87 0 R 88 0 R ] /Limits [ (Doc-Start) (lstnumber.-30.43) ] >>
endobj
15 0 obj
<< /Kids [ 89 0 R 90 0 R 91 0 R ] /Limits [ (lstnumber.-30.44) (table.caption.8) ] >>
endobj
16 0 obj
<< /A << /S /URI /Type /Action /URI (https://mls-bench.com) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 356.483 252.89 463.581 263.794 ] /Subtype /Link /Type /Annot >>
endobj
17 0 obj
<< /A << /D (cite.bai2023qwen) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 359.491 164.03 366.464 172.608 ] /Subtype /Link /Type /Annot >>
endobj
18 0 obj
<< /A << /D (cite.brown2020language) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 369.929 163.911 376.903 172.877 ] /Subtype /Link /Type /Annot >>
endobj
19 0 obj
<< /A << /D (cite.chiang2024chatbot) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 380.368 164.03 392.323 172.877 ] /Subtype /Link /Type /Annot >>
endobj
20 0 obj
<< /A << /D (cite.openai2023gpt4) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 395.788 164.03 407.743 172.877 ] /Subtype /Link /Type /Annot >>
endobj
21 0 obj
<< /A << /D (cite.team2023gemini) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 411.208 163.911 423.163 172.877 ] /Subtype /Link /Type /Annot >>
endobj
22 0 obj
<< /A << /D (cite.kimiteam2025kimi15) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 426.628 163.911 438.583 172.877 ] /Subtype /Link /Type /Annot >>
endobj
23 0 obj
<< /A << /D (cite.touvron2023llama2) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 442.048 163.911 454.003 172.877 ] /Subtype /Link /Type /Annot >>
endobj
24 0 obj
<< /A << /D (cite.anthropic2025claude4systemcard) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 258.076 153.221 265.05 161.968 ] /Subtype /Link /Type /Annot >>
endobj
25 0 obj
<< /A << /D (cite.comanici2025gemini) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 268.611 153.221 280.566 161.968 ] /Subtype /Link /Type /Annot >>
endobj
26 0 obj
<< /A << /D (cite.deepseekai2025r1) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 284.127 153.121 296.082 161.968 ] /Subtype /Link /Type /Annot >>
endobj
27 0 obj
<< /A << /D (cite.mialon2023gaia) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 299.643 153.121 311.598 161.968 ] /Subtype /Link /Type /Annot >>
endobj
28 0 obj
<< /A << /D (cite.openai2025o3o4systemcard) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 315.159 153.121 327.114 161.968 ] /Subtype /Link /Type /Annot >>
endobj
29 0 obj
<< /A << /D (cite.schick2023toolformer) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 330.674 153.121 342.63 161.968 ] /Subtype /Link /Type /Annot >>
endobj
30 0 obj
<< /A << /D (cite.kimiteam2025kimik2) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 346.19 153.001 358.146 161.968 ] /Subtype /Link /Type /Annot >>
endobj
31 0 obj
<< /A << /D (cite.an2025qwen3) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 361.706 153.121 378.643 161.968 ] /Subtype /Link /Type /Annot >>
endobj
32 0 obj
<< /A << /D (cite.yao2023react) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 382.203 153.121 399.14 161.968 ] /Subtype /Link /Type /Annot >>
endobj
33 0 obj
<< /A << /D (cite.zhou2023webarena) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 402.701 153.121 419.637 161.968 ] /Subtype /Link /Type /Annot >>
endobj
34 0 obj
<< /A << /D (cite.chen2026sweci) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 159.037 142.212 170.993 151.059 ] /Subtype /Link /Type /Annot >>
endobj
35 0 obj
<< /A << /D (cite.jimenez2023swebench) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 173.939 142.312 185.895 151.059 ] /Subtype /Link /Type /Annot >>
endobj
36 0 obj
<< /A << /D (cite.liang2026swe) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 188.841 142.212 200.797 151.059 ] /Subtype /Link /Type /Annot >>
endobj
37 0 obj
<< /A << /D (cite.mundler2024swtbench) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 203.744 142.212 215.699 151.059 ] /Subtype /Link /Type /Annot >>
endobj
38 0 obj
<< /A << /D (cite.schmidgall2025agentlab) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 281.315 142.212 293.27 151.059 ] /Subtype /Link /Type /Annot >>
endobj
39 0 obj
<< /A << /D (cite.shao2025drtulu) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 296.217 142.212 308.172 151.059 ] /Subtype /Link /Type /Annot >>
endobj
40 0 obj
<< /A << /D (cite.tang2025airesearcher) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 311.119 142.092 323.074 151.059 ] /Subtype /Link /Type /Annot >>
endobj
41 0 obj
<< /A << /D (cite.wei2025browsecomp) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 326.021 142.092 342.957 151.059 ] /Subtype /Link /Type /Annot >>
endobj
42 0 obj
<< /A << /D (cite.chen2025seed) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 491.836 142.312 503.791 151.059 ] /Subtype /Link /Type /Annot >>
endobj
43 0 obj
<< /A << /D (cite.hubert2025olympiad) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 107.004 131.303 118.959 140.15 ] /Subtype /Link /Type /Annot >>
endobj
44 0 obj
<< /A << /D (cite.lin2025goedelprover) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 121.617 131.303 133.572 140.15 ] /Subtype /Link /Type /Annot >>
endobj
45 0 obj
<< /A << /D (cite.lin2025goedel) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 136.231 131.303 148.186 140.15 ] /Subtype /Link /Type /Annot >>
endobj
46 0 obj
<< /A << /D (cite.wang2025kimina) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 150.844 131.303 167.781 140.15 ] /Subtype /Link /Type /Annot >>
endobj
47 0 obj
<< /A << /D (cite.mang2025frontiercs) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 223.629 120.394 235.584 129.24 ] /Subtype /Link /Type /Annot >>
endobj
48 0 obj
<< /A << /D (cite.novikov2025alphaevolve) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 238.586 120.274 250.541 129.24 ] /Subtype /Link /Type /Annot >>
endobj
49 0 obj
<< /A << /D (cite.openevolve) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 253.543 120.394 265.498 129.24 ] /Subtype /Link /Type /Annot >>
endobj
50 0 obj
<< /A << /D (cite.wang2025thetaevolve) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 268.5 120.394 285.436 129.24 ] /Subtype /Link /Type /Annot >>
endobj
51 0 obj
<< /A << /D (cite.yuksekgonul2026learning) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 288.438 120.394 305.374 129.24 ] /Subtype /Link /Type /Annot >>
endobj
52 0 obj
<< /A << /D (cite.chan2024mlebench) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 231.934 109.584 243.889 118.331 ] /Subtype /Link /Type /Annot >>
endobj
53 0 obj
<< /A << /D (cite.zhang2024mleagent) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 246.693 109.485 258.648 118.331 ] /Subtype /Link /Type /Annot >>
endobj
54 0 obj
<< /A << /D (cite.huang2023mlagentbench) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 261.452 109.485 273.407 118.331 ] /Subtype /Link /Type /Annot >>
endobj
55 0 obj
<< /A << /D (cite.chen2026autolab) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 276.21 109.485 288.166 118.331 ] /Subtype /Link /Type /Annot >>
endobj
56 0 obj
<< /A << /D (cite.qiang2025mledojo) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 290.969 109.485 302.924 118.331 ] /Subtype /Link /Type /Annot >>
endobj
57 0 obj
<< /A << /D (cite.yang2025reinforcement) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 305.728 109.485 322.665 118.331 ] /Subtype /Link /Type /Annot >>
endobj
58 0 obj
<< /A << /S /URI /URI (https://arxiv.org/abs/2605.08678v2) >> /BS << /W 0 >> /NM (fitz-L0) /Rect [ 12 218.79999 32 573.2 ] /Subtype /Link >>
endobj
59 0 obj
<< /Length 10 /Filter /FlateDecode >>
stream
x�+��|
endstream
endobj
60 0 obj
<< /Filter /FlateDecode /Length 4065 >>
stream
xڝ[Y�۶~���[�sF6D�b;��ا˸����S��%�q����^���5z" ��]��"��,�yFF�_�Qx�]P�,I��Ƌl����$^�����ߟ�x�싯�^P1�����"�r!U�/�n?-�]���Y��՚S�|�Z��,��JӴ&s_�~�^�a�U��ɲ���i�����}W��wo_-dQ�XSi;JϿs=n�6�����=_t�ܘ���m�מ�w8��_rIn���;����$����mgyB��v�X�d�q#�h�