URL: https://arxiv.org/pdf/2604.06268
%PDF-1.7
%����
1 0 obj
<< /Metadata 3 0 R /Names 4 0 R /OpenAction 5 0 R /Outlines 6 0 R /PageMode /UseOutlines /Pages 7 0 R /Type /Catalog >>
endobj
2 0 obj
<< /Author (Zihan Wang; Chi Gui; Xing Jin; Qineng Wang; Licheng Liu; Kangrui Wang; Shiqi Chen; Linjie Li; Zhengyuan Yang; Pingyue Zhang; Yiping Lu; Jiajun Wu; Li Fei-Fei; Lijuan Wang; Yejin Choi; Manling Li) /Creator (arXiv GenPDF \(tex2pdf:a6404ea\)) /DOI (https://doi.org/10.48550/arXiv.2604.06268) /License (http://creativecommons.org/licenses/by/4.0/) /PTEX.Fullbanner (This is pdfTeX, Version 3.141592653-2.6-1.40.28 \(TeX Live 2025\) kpathsea version 6.4.1) /Producer (pikepdf 8.15.1) /Title (RAGEN-2: Reasoning Collapse in Agentic RL) /Trapped /False /arXivID (https://arxiv.org/abs/2604.06268v1) >>
endobj
3 0 obj
<< /Subtype /XML /Type /Metadata /Length 1947 >>
stream
endstream
endobj
4 0 obj
<< /Dests 8 0 R >>
endobj
5 0 obj
<< /D [ 9 0 R /Fit ] /S /GoTo >>
endobj
6 0 obj
<< /Count 23 /First 10 0 R /Last 11 0 R /Type /Outlines >>
endobj
7 0 obj
<< /Count 44 /Kids [ 12 0 R 13 0 R ] /Type /Pages >>
endobj
8 0 obj
<< /Kids [ 14 0 R 15 0 R ] /Limits [ (Doc-Start) (theorem.N.1) ] >>
endobj
9 0 obj
<< /Annots [ 16 0 R 17 0 R 18 0 R 19 0 R 20 0 R 21 0 R 22 0 R 23 0 R 24 0 R 25 0 R 26 0 R ] /Contents [ 27 0 R 28 0 R 29 0 R 30 0 R ] /Group 31 0 R /MediaBox [ 0 0 595.276 841.89 ] /Parent 32 0 R /Resources 33 0 R /Type /Page >>
endobj
10 0 obj
<< /A 34 0 R /Next 35 0 R /Parent 6 0 R /Title 36 0 R >>
endobj
11 0 obj
<< /A 37 0 R /Parent 6 0 R /Prev 38 0 R /Title 39 0 R >>
endobj
12 0 obj
<< /Count 36 /Kids [ 32 0 R 40 0 R 41 0 R 42 0 R 43 0 R 44 0 R ] /Parent 7 0 R /Type /Pages >>
endobj
13 0 obj
<< /Count 8 /Kids [ 45 0 R 46 0 R ] /Parent 7 0 R /Type /Pages >>
endobj
14 0 obj
<< /Kids [ 47 0 R 48 0 R 49 0 R 50 0 R 51 0 R 52 0 R ] /Limits [ (Doc-Start) (theorem.J.1) ] >>
endobj
15 0 obj
<< /Kids [ 53 0 R ] /Limits [ (theorem.L.1) (theorem.N.1) ] >>
endobj
16 0 obj
<< /A << /S /URI /Type /Action /URI (https://ragen-ai.github.io/v2/) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 208.472 575.66 383.566 587.529 ] /Subtype /Link /Type /Annot >>
endobj
17 0 obj
<< /A << /D (cite.qi2025defeatingtraininginferencemismatchfp16) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 511.157 221.277 524.058 231.064 ] /Subtype /Link /Type /Annot >>
endobj
18 0 obj
<< /A << /D (cite.zhang2026precisiontraininginferencemismatchoptimization) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 69.87 207.929 82.771 217.515 ] /Subtype /Link /Type /Annot >>
endobj
19 0 obj
<< /A << /D (cite.yu2025dapo) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 86.233 207.728 99.135 217.515 ] /Subtype /Link /Type /Annot >>
endobj
20 0 obj
<< /A << /D (cite.schulman2017proximalpolicyoptimizationalgorithms) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 156.771 194.38 169.672 203.965 ] /Subtype /Link /Type /Annot >>
endobj
21 0 obj
<< /A << /D (cite.ouyang2022traininglanguagemodelsfollow) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 173.134 194.178 186.036 203.965 ] /Subtype /Link /Type /Annot >>
endobj
22 0 obj
<< /A << /D (cite.xu2025epoentropyregularizedpolicyoptimization) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 189.498 194.178 202.4 203.965 ] /Subtype /Link /Type /Annot >>
endobj
23 0 obj
<< /A << /D (cite.yu2025dapo) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 339.763 146.756 352.664 156.543 ] /Subtype /Link /Type /Annot >>
endobj
24 0 obj
<< /A << /D (cite.xu2025epoentropyregularizedpolicyoptimization) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 357.201 146.756 370.102 156.543 ] /Subtype /Link /Type /Annot >>
endobj
25 0 obj
<< /A << /D (figure.caption.1) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 288.641 116.816 296.082 129.777 ] /Subtype /Link /Type /Annot >>
endobj
26 0 obj
<< /A << /S /URI /URI (https://arxiv.org/abs/2604.06268v1) >> /BS << /W 0 >> /NM (fitz-L0) /Rect [ 12 251.52503 32 590.365 ] /Subtype /Link >>
endobj
27 0 obj
<< /Length 10 /Filter /FlateDecode >>
stream
x�+��|
endstream
endobj
28 0 obj
<< /Filter /FlateDecode /Length 3613 >>
stream
x��Z[��F~�_�G�9X�U-�iY6�d�l��&����l�X��[�խ�G�@ξ�I��V��U_]d�#=�`���/�xdd�3��ũ���������(7>j���1�����َG��/^�aS���3V��e��}�
��$����R%qj��X��Y��o��t�Hf0[¢X'~â��s�,����]�4S!C��%�w$�s�=e��8�Apu���`:�:�ѫۈK&��2\E�V����������].%g��2o��V�˥�l��n�}Sқ���h]V�-�����ͫz��hq8�N3$�T2�@�K)��x���M�7��2��.����|�q��bit$����I?�3_/�D�+w������������
��>EW�"�T2C�?�t��Q����r�0�g龰U(N�F�L�s~�<��e�M�����h'`��չ���<\����|�/��c����A+��9(m% ������ޱ A��j�|�f.P֧��~��eeD��4��}@�l��:��,i|J:�"icb��,���|��ԑ��ů��