URL: https://arxiv.org/pdf/2503.01307
%PDF-1.5
%����
1 0 obj
<< /Metadata 3 0 R /Names 4 0 R /OpenAction 5 0 R /Outlines 6 0 R /PageMode /UseOutlines /Pages 7 0 R /Type /Catalog >>
endobj
2 0 obj
<< /Author (Kanishk Gandhi; Ayush Chakravarthy; Anikait Singh; Nathan Lile; Noah D. Goodman) /Creator (arXiv GenPDF \(tex2pdf:\)) /DOI (https://doi.org/10.48550/arXiv.2503.01307) /License (http://creativecommons.org/licenses/by/4.0/) /PTEX.Fullbanner (This is pdfTeX, Version 3.141592653-2.6-1.40.25 \(TeX Live 2023\) kpathsea version 6.3.5) /Producer (pikepdf 8.15.1) /Title (Cognitive Behaviors that Enable Self-Improving Reasoners, or, Four Habits of Highly Effective STaRs) /Trapped /False /arXivID (https://arxiv.org/abs/2503.01307v2) >>
endobj
3 0 obj
<< /Subtype /XML /Type /Metadata /Length 1740 >>
stream
endstream
endobj
4 0 obj
<< /Dests 8 0 R >>
endobj
5 0 obj
<< /D [ 9 0 R /Fit ] /S /GoTo >>
endobj
6 0 obj
<< /Count 13 /First 10 0 R /Last 11 0 R /Type /Outlines >>
endobj
7 0 obj
<< /Count 21 /Kids [ 12 0 R 13 0 R 14 0 R 15 0 R ] /Type /Pages >>
endobj
8 0 obj
<< /Kids [ 16 0 R 17 0 R 18 0 R 19 0 R ] /Limits [ (Doc-Start) (table.caption.22) ] >>
endobj
9 0 obj
<< /Annots [ 20 0 R 21 0 R 22 0 R 23 0 R 24 0 R 25 0 R 26 0 R 27 0 R 28 0 R 29 0 R 30 0 R 31 0 R 32 0 R 33 0 R 34 0 R 35 0 R 36 0 R ] /Contents [ 37 0 R 38 0 R 39 0 R 40 0 R ] /MediaBox [ 0 0 612 792 ] /Parent 12 0 R /Resources 41 0 R /Type /Page >>
endobj
10 0 obj
<< /A 42 0 R /Next 43 0 R /Parent 6 0 R /Title 44 0 R >>
endobj
11 0 obj
<< /A 45 0 R /Parent 6 0 R /Prev 46 0 R /Title 47 0 R >>
endobj
12 0 obj
<< /Count 6 /Kids [ 9 0 R 48 0 R 49 0 R 50 0 R 51 0 R 52 0 R ] /Parent 7 0 R /Type /Pages >>
endobj
13 0 obj
<< /Count 6 /Kids [ 53 0 R 54 0 R 55 0 R 56 0 R 57 0 R 58 0 R ] /Parent 7 0 R /Type /Pages >>
endobj
14 0 obj
<< /Count 6 /Kids [ 59 0 R 60 0 R 61 0 R 62 0 R 63 0 R 64 0 R ] /Parent 7 0 R /Type /Pages >>
endobj
15 0 obj
<< /Count 3 /Kids [ 65 0 R 66 0 R 67 0 R ] /Parent 7 0 R /Type /Pages >>
endobj
16 0 obj
<< /Kids [ 68 0 R 69 0 R 70 0 R 71 0 R 72 0 R 73 0 R ] /Limits [ (Doc-Start) (cite.guo2025deepseek) ] >>
endobj
17 0 obj
<< /Kids [ 74 0 R 75 0 R 76 0 R 77 0 R 78 0 R 79 0 R ] /Limits [ (cite.havrilla2024teaching) (cite.ye2025emergence) ] >>
endobj
18 0 obj
<< /Kids [ 80 0 R 81 0 R 82 0 R 83 0 R 84 0 R 85 0 R ] /Limits [ (cite.yeo2025demystifyinglongchainofthoughtreasoning) (page.6) ] >>
endobj
19 0 obj
<< /Kids [ 86 0 R 87 0 R 88 0 R 89 0 R 90 0 R ] /Limits [ (page.7) (table.caption.22) ] >>
endobj
20 0 obj
<< /A << /D (Hfootnote.1) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 208.144 248.942 214.42 262.607 ] /Subtype /Link /Type /Annot >>
endobj
21 0 obj
<< /A << /D (cite.guo2025deepseek) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 199.204 159.613 245.633 171.623 ] /Subtype /Link /Type /Annot >>
endobj
22 0 obj
<< /A << /D (cite.guo2025deepseek) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 249.305 159.613 271.621 171.623 ] /Subtype /Link /Type /Annot >>
endobj
23 0 obj
<< /A << /D (cite.jaech2024openai) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 275.294 159.613 326.052 171.623 ] /Subtype /Link /Type /Annot >>
endobj
24 0 obj
<< /A << /D (cite.jaech2024openai) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 329.724 159.613 352.04 171.623 ] /Subtype /Link /Type /Annot >>
endobj
25 0 obj
<< /A << /D (cite.zelikman2022star) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 436.068 148.654 503.751 160.664 ] /Subtype /Link /Type /Annot >>
endobj
26 0 obj
<< /A << /D (cite.zelikman2022star) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 107.004 137.695 128.543 149.705 ] /Subtype /Link /Type /Annot >>
endobj
27 0 obj
<< /A << /D (cite.havrilla2024teaching) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 131.486 137.695 192.322 149.705 ] /Subtype /Link /Type /Annot >>
endobj
28 0 obj
<< /A << /D (cite.havrilla2024teaching) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 195.265 137.695 216.804 149.705 ] /Subtype /Link /Type /Annot >>
endobj
29 0 obj
<< /A << /D (cite.hoffman2023training) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 219.747 137.695 283.77 149.705 ] /Subtype /Link /Type /Annot >>
endobj
30 0 obj
<< /A << /D (cite.hoffman2023training) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 286.713 137.695 308.252 149.705 ] /Subtype /Link /Type /Annot >>
endobj
31 0 obj
<< /A << /D (cite.qwen2025qwen25technicalreport) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 398.456 87.882 453.997 99.892 ] /Subtype /Link /Type /Annot >>
endobj
32 0 obj
<< /A << /D (cite.qwen2025qwen25technicalreport) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 458.319 87.882 480.635 99.892 ] /Subtype /Link /Type /Annot >>
endobj
33 0 obj
<< /A << /D (cite.grattafiori2024llama3herdmodels) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 171.515 76.923 244.025 88.933 ] /Subtype /Link /Type /Annot >>
endobj
34 0 obj
<< /A << /D (cite.grattafiori2024llama3herdmodels) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 247.05 76.923 269.227 88.933 ] /Subtype /Link /Type /Annot >>
endobj
35 0 obj
<< /A << /S /URI /Type /Action /URI (https://github.com/kanishkg/cognitive-behaviors) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 193.896 58.153 399.978 71.222 ] /Subtype /Link /Type /Annot >>
endobj
36 0 obj
<< /A << /S /URI /URI (https://arxiv.org/abs/2503.01307v2) >> /BS << /W 0 >> /NM (fitz-L0) /Rect [ 12 220.46002 32 571.54 ] /Subtype /Link >>
endobj
37 0 obj
<< /Length 10 /Filter /FlateDecode >>
stream
x�+��|
endstream
endobj
38 0 obj
<< /Filter /FlateDecode /Length 3250 >>
stream
xڵZYs��~ׯ�[�* ��
�!;>#��R[�CpH"������= ȥ���0�����A��"X|}���ͫ�T�/
�H�t�^� _dI꧑Z<�?{��u�o���>LO���O�6k�ݪ�3Mix��U����{n�A������͗�7��(�,�ܪ�$Y����t�����Y�o~� ���co�Gq:�Ӣ�,b�h7M5TOB��f�����7[Gߗ�^ֲ荩����]�T5{mt�6���v�7^(�Ib�
�*�A����n�ěV��W~Վ¨o����v-c�f[?�k��ڔ�Ǽy�UJy�u��$G\���(�e ��O�@�;���nV�Jv�i����w�t�n��E�݀�����k�5~�����y췢
[������� ����)h���D�o ��'��>,2?B4r_9!���g
���e���f�>��Uɪvm�o������"߶]�q��z�L��U�GQ����ƙ0x��.kۗ����`�X~�y�,�,���q��mx��je��ԫf2��E9���@�(���0����h��X��AwzUm�