VOOZH about

URL: https://arxiv.org/pdf/2603.22117


%PDF-1.7 %���� 1 0 obj << /Metadata 3 0 R /Names 4 0 R /OpenAction 5 0 R /PageMode /UseOutlines /Pages 6 0 R /Type /Catalog >> endobj 2 0 obj << /Author (Kexin Huang; Haoming Meng; Junkang Wu; Jinda Lu; Chiyu Ma; Ziqian Chen; Xue Wang; Bolin Ding; Jiancan Wu; Xiang Wang; Xiangnan He; Guoyin Wang; Jingren Zhou) /Creator (arXiv GenPDF \(tex2pdf:a6404ea\)) /DOI (https://doi.org/10.48550/arXiv.2603.22117) /License (http://arxiv.org/licenses/nonexclusive-distrib/1.0/) /PTEX.Fullbanner (This is pdfTeX, Version 3.141592653-2.6-1.40.28 \(TeX Live 2025\) kpathsea version 6.4.1) /Producer (pikepdf 8.15.1) /Title (On the Direction of RLVR Updates for LLM Reasoning: Identification and Exploitation) /Trapped /False /arXivID (https://arxiv.org/abs/2603.22117v1) >> endobj 3 0 obj << /Subtype /XML /Type /Metadata /Length 1936 >> stream endstream endobj 4 0 obj << /Dests 7 0 R >> endobj 5 0 obj << /D [ 8 0 R /Fit ] /S /GoTo >> endobj 6 0 obj << /Count 26 /Kids [ 9 0 R 10 0 R 11 0 R 12 0 R 13 0 R ] /Type /Pages >> endobj 7 0 obj << /Kids [ 14 0 R 15 0 R 16 0 R 17 0 R ] /Limits [ (ALC@unique.1) (theorem.4.1) ] >> endobj 8 0 obj << /Annots [ 18 0 R 19 0 R 20 0 R 21 0 R 22 0 R 23 0 R 24 0 R 25 0 R 26 0 R 27 0 R 28 0 R 29 0 R 30 0 R 31 0 R 32 0 R 33 0 R 34 0 R 35 0 R 36 0 R 37 0 R 38 0 R 39 0 R 40 0 R 41 0 R 42 0 R 43 0 R 44 0 R 45 0 R 46 0 R 47 0 R 48 0 R 49 0 R ] /Contents [ 50 0 R 51 0 R 52 0 R 53 0 R ] /Group 54 0 R /MediaBox [ 0 0 595.276 841.89 ] /Parent 9 0 R /Resources 55 0 R /Type /Page >> endobj 9 0 obj << /Count 6 /Kids [ 8 0 R 56 0 R 57 0 R 58 0 R 59 0 R 60 0 R ] /Parent 6 0 R /Type /Pages >> endobj 10 0 obj << /Count 6 /Kids [ 61 0 R 62 0 R 63 0 R 64 0 R 65 0 R 66 0 R ] /Parent 6 0 R /Type /Pages >> endobj 11 0 obj << /Count 6 /Kids [ 67 0 R 68 0 R 69 0 R 70 0 R 71 0 R 72 0 R ] /Parent 6 0 R /Type /Pages >> endobj 12 0 obj << /Count 6 /Kids [ 73 0 R 74 0 R 75 0 R 76 0 R 77 0 R 78 0 R ] /Parent 6 0 R /Type /Pages >> endobj 13 0 obj << /Count 2 /Kids [ 79 0 R 80 0 R ] /Parent 6 0 R /Type /Pages >> endobj 14 0 obj << /Kids [ 81 0 R 82 0 R 83 0 R 84 0 R 85 0 R 86 0 R ] /Limits [ (ALC@unique.1) (cite.NPG) ] >> endobj 15 0 obj << /Kids [ 87 0 R 88 0 R 89 0 R 90 0 R 91 0 R 92 0 R ] /Limits [ (cite.ORZ) (equation.5) ] >> endobj 16 0 obj << /Kids [ 93 0 R 94 0 R 95 0 R 96 0 R 97 0 R 98 0 R ] /Limits [ (equation.6) (page.24) ] >> endobj 17 0 obj << /Kids [ 99 0 R 100 0 R 101 0 R 102 0 R 103 0 R 104 0 R ] /Limits [ (page.25) (theorem.4.1) ] >> endobj 18 0 obj << /A << /D (Hfootnote.1) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 393.664 710.516 395.656 727.873 ] /Subtype /Link /Type /Annot >> endobj 19 0 obj << /A << /S /URI /Type /Action /URI (https://qwen-pilot.notion.site/rlvr-direction) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 248.203 682.126 303.983 698.066 ] /Subtype /Link /Type /Annot >> endobj 20 0 obj << /A << /S /URI /Type /Action /URI (https://github.com/Hesse73/RLVR-Directions) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 327.115 682.126 362.661 698.066 ] /Subtype /Link /Type /Annot >> endobj 21 0 obj << /A << /D (cite.Openai-O1) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 340.273 320.396 388.212 332.406 ] /Subtype /Link /Type /Annot >> endobj 22 0 obj << /A << /D (cite.Openai-O1) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 391.154 320.396 412.733 332.406 ] /Subtype /Link /Type /Annot >> endobj 23 0 obj << /A << /D (cite.Deepseek-R1) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 480.435 320.396 524.203 332.406 ] /Subtype /Link /Type /Annot >> endobj 24 0 obj << /A << /D (cite.Deepseek-R1) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 69.87 306.449 91.449 318.458 ] /Subtype /Link /Type /Annot >> endobj 25 0 obj << /A << /D (cite.Gemini-2.5) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 150.324 306.449 216.939 318.458 ] /Subtype /Link /Type /Annot >> endobj 26 0 obj << /A << /D (cite.Gemini-2.5) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 219.876 306.449 241.455 318.458 ] /Subtype /Link /Type /Annot >> endobj 27 0 obj << /A << /D (cite.Qwen3) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 303.611 306.449 350.301 318.458 ] /Subtype /Link /Type /Annot >> endobj 28 0 obj << /A << /D (cite.Qwen3) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 353.238 306.449 379.714 318.458 ] /Subtype /Link /Type /Annot >> endobj 29 0 obj << /A << /D (cite.Deepseek-R1) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 372.093 292.501 417.467 304.511 ] /Subtype /Link /Type /Annot >> endobj 30 0 obj << /A << /D (cite.Deepseek-R1) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 420.612 292.501 442.929 304.511 ] /Subtype /Link /Type /Annot >> endobj 31 0 obj << /A << /D (cite.kimi-k1.5) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 446.074 292.501 472.3 304.511 ] /Subtype /Link /Type /Annot >> endobj 32 0 obj << /A << /D (cite.kimi-k1.5) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 475.445 292.501 497.762 304.511 ] /Subtype /Link /Type /Annot >> endobj 33 0 obj << /A << /D (cite.Qwen3) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 500.907 292.501 525.406 304.511 ] /Subtype /Link /Type /Annot >> endobj 34 0 obj << /A << /D (cite.Qwen3) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 69.87 278.553 93.065 290.563 ] /Subtype /Link /Type /Annot >> endobj 35 0 obj << /A << /D (cite.Qwen3) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 96.089 278.553 123.437 290.563 ] /Subtype /Link /Type /Annot >> endobj 36 0 obj << /A << /D (cite.iclr25_dynamics) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 295.998 230.733 380.325 242.742 ] /Subtype /Link /Type /Annot >> endobj 37 0 obj << /A << /D (cite.iclr25_dynamics) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 384.437 230.733 406.753 242.742 ] /Subtype /Link /Type /Annot >> endobj 38 0 obj << /A << /D (cite.entropy_28_rule) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 222.597 202.837 272.797 214.847 ] /Subtype /Link /Type /Annot >> endobj 39 0 obj << /A << /D (cite.entropy_28_rule) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 276.576 202.837 303.698 214.847 ] /Subtype /Link /Type /Annot >> endobj 40 0 obj << /A << /D (cite.UniReason) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 69.87 188.889 119.047 200.899 ] /Subtype /Link /Type /Annot >> endobj 41 0 obj << /A << /D (cite.UniReason) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 122.6 188.889 144.119 200.899 ] /Subtype /Link /Type /Annot >> endobj 42 0 obj << /A << /D (cite.over_dominate) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 69.541 174.942 115.888 186.952 ] /Subtype /Link /Type /Annot >> endobj 43 0 obj << /A << /D (cite.over_dominate) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 119.518 174.942 146.436 186.952 ] /Subtype /Link /Type /Annot >> endobj 44 0 obj << /A << /D (cite.decomposing_zhaoxin) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 168.967 174.942 217.07 186.952 ] /Subtype /Link /Type /Annot >> endobj 45 0 obj << /A << /D (cite.decomposing_zhaoxin) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 220.7 174.942 242.22 186.952 ] /Subtype /Link /Type /Annot >> endobj 46 0 obj << /A << /D (figure.caption.1) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 101.056 133.099 108.129 145.108 ] /Subtype /Link /Type /Annot >> endobj 47 0 obj << /A << /D (figure.caption.1) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 106.137 133.099 120.516 145.108 ] /Subtype /Link /Type /Annot >> endobj 48 0 obj << /A << /D (section*.10) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 209.34 66.881 266.69 78.489 ] /Subtype /Link /Type /Annot >> endobj 49 0 obj << /A << /S /URI /URI (https://arxiv.org/abs/2603.22117v1) >> /BS << /W 0 >> /NM (fitz-L0) /Rect [ 12 245.41498 32 596.475 ] /Subtype /Link >> endobj 50 0 obj << /Length 10 /Filter /FlateDecode >> stream x�+��| endstream endobj 51 0 obj << /Filter /FlateDecode /Length 4036 >> stream x��;k�ܶ���+��BUih��H��Dz}�r�Q.�r�3���C�����ק� A��j�������xt7� N�8x�J��8AGy�Y��B�`wz�˫(�Rv�״���#MS����ꇓ �m_��q$cQ�����k�n����8ب(�5���?�`�ah��S�4l�$���/�'S�~�57L�ƣ����Ndy T$U�n��4ʋ<�ta��?�?5�7����h��mՙ�P� oo�y��u���uC�����`z�ܶ5޽�Olʾm��������c�+dl�� %h�����q�+���fO�?�z��j�W��#���a�?�e��*Ȁ�