URL: https://arxiv.org/pdf/2506.03637
%PDF-1.5
%����
1 0 obj
<< /Metadata 3 0 R /Names 4 0 R /OpenAction 5 0 R /Outlines 6 0 R /PageMode /UseOutlines /Pages 7 0 R /Type /Catalog >>
endobj
2 0 obj
<< /Author (Zhuohao Yu; Jiali Zeng; Weizheng Gu; Yidong Wang; Jindong Wang; Fandong Meng; Jie Zhou; Yue Zhang; Shikun Zhang; Wei Ye) /Creator (arXiv GenPDF \(tex2pdf:\)) /DOI (https://doi.org/10.48550/arXiv.2506.03637) /License (http://creativecommons.org/licenses/by/4.0/) /PTEX.Fullbanner (This is pdfTeX, Version 3.141592653-2.6-1.40.25 \(TeX Live 2023\) kpathsea version 6.3.5) /Producer (pikepdf 8.15.1) /Title (RewardAnything: Generalizable Principle-Following Reward Models) /Trapped /False /arXivID (https://arxiv.org/abs/2506.03637v2) >>
endobj
3 0 obj
<< /Subtype /XML /Type /Metadata /Length 1841 >>
stream
endstream
endobj
4 0 obj
<< /Dests 8 0 R >>
endobj
5 0 obj
<< /D [ 9 0 R /Fit ] /S /GoTo >>
endobj
6 0 obj
<< /Count 16 /First 10 0 R /Last 11 0 R /Type /Outlines >>
endobj
7 0 obj
<< /Count 25 /Kids [ 12 0 R 13 0 R 14 0 R 15 0 R 16 0 R ] /Type /Pages >>
endobj
8 0 obj
<< /Kids [ 17 0 R 18 0 R 19 0 R 20 0 R 21 0 R 22 0 R ] /Limits [ (Doc-Start) (table.caption.7) ] >>
endobj
9 0 obj
<< /Annots [ 23 0 R 24 0 R 25 0 R 26 0 R 27 0 R 28 0 R ] /Contents [ 29 0 R 30 0 R ] /MediaBox [ 0 0 612 792 ] /Parent 12 0 R /Resources 31 0 R /Type /Page >>
endobj
10 0 obj
<< /A 32 0 R /Next 33 0 R /Parent 6 0 R /Title 34 0 R >>
endobj
11 0 obj
<< /A 35 0 R /Parent 6 0 R /Prev 36 0 R /Title 37 0 R >>
endobj
12 0 obj
<< /Count 6 /Kids [ 9 0 R 38 0 R 39 0 R 40 0 R 41 0 R 42 0 R ] /Parent 7 0 R /Type /Pages >>
endobj
13 0 obj
<< /Count 6 /Kids [ 43 0 R 44 0 R 45 0 R 46 0 R 47 0 R 48 0 R ] /Parent 7 0 R /Type /Pages >>
endobj
14 0 obj
<< /Count 6 /Kids [ 49 0 R 50 0 R 51 0 R 52 0 R 53 0 R 54 0 R ] /Parent 7 0 R /Type /Pages >>
endobj
15 0 obj
<< /Count 6 /Kids [ 55 0 R 56 0 R 57 0 R 58 0 R 59 0 R 60 0 R ] /Parent 7 0 R /Type /Pages >>
endobj
16 0 obj
<< /Count 1 /Kids [ 61 0 R ] /Parent 7 0 R /Type /Pages >>
endobj
17 0 obj
<< /Kids [ 62 0 R 63 0 R 64 0 R 65 0 R 66 0 R 67 0 R ] /Limits [ (Doc-Start) (cite.fu2025reward) ] >>
endobj
18 0 obj
<< /Kids [ 68 0 R 69 0 R 70 0 R 71 0 R 72 0 R 73 0 R ] /Limits [ (cite.gao2023scaling) (cite.pitis2024improving) ] >>
endobj
19 0 obj
<< /Kids [ 74 0 R 75 0 R 76 0 R 77 0 R 78 0 R 79 0 R ] /Limits [ (cite.pku_saferlhf) (cite.wangglue) ] >>
endobj
20 0 obj
<< /Kids [ 80 0 R 81 0 R 82 0 R 83 0 R 84 0 R 85 0 R ] /Limits [ (cite.weng2022large) (page.19) ] >>
endobj
21 0 obj
<< /Kids [ 86 0 R 87 0 R 88 0 R 89 0 R 90 0 R 91 0 R ] /Limits [ (page.2) (subsection.5.2) ] >>
endobj
22 0 obj
<< /Kids [ 92 0 R 93 0 R 94 0 R ] /Limits [ (subsection.5.3) (table.caption.7) ] >>
endobj
23 0 obj
<< /A << /S /URI /Type /Action /URI (https://zhuohaoyu.github.io/RewardAnything) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 200.396 559.502 411.604 572.404 ] /Subtype /Link /Type /Annot >>
endobj
24 0 obj
<< /A << /D (cite.ziegler2019fine) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 448.673 272.738 455.647 281.485 ] /Subtype /Link /Type /Annot >>
endobj
25 0 obj
<< /A << /D (cite.ouyang2022training) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 458.629 272.738 465.603 281.485 ] /Subtype /Link /Type /Annot >>
endobj
26 0 obj
<< /A << /D (cite.christiano2017deep) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 320.635 250.82 327.609 259.667 ] /Subtype /Link /Type /Annot >>
endobj
27 0 obj
<< /A << /D (cite.stiennon2020learning) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 330.605 250.92 337.579 259.667 ] /Subtype /Link /Type /Annot >>
endobj
28 0 obj
<< /A << /S /URI /URI (https://arxiv.org/abs/2506.03637v2) >> /BS << /W 0 >> /NM (fitz-L0) /Rect [ 12 231.01001 32 560.99 ] /Subtype /Link >>
endobj
29 0 obj
<< /Filter /FlateDecode /Length 139 >>
stream
x�E��
�@��}�}�Ľ�잂X"����8E}��3��u"��U`�������PK���2��5������*�F���v���KQm���0\�Ҁ�IJ����:4hz;O�J�E�b���iG�D{z2X&�
endstream
endobj
30 0 obj
<< /Filter /FlateDecode /Length 3071 >>
stream
xڝZYs�6~ׯ��.�J�#OQ۱��&m�� iX���_��h���T5qt7}|
�s��yu�-�_pxzw�9���+'=\`��G�8RN��ۋ_/~��x��{��!\����#|��w���0����y���CRgW�c��˻�6��8[.$� ���B��W��uR䟓�Bo�By�/u^����ۗ�ܪ(�
���D�:��U�.dp��z���WLzf;�?xNo���`��=�p~���D����cB)'��2$���w�>����X���C�Z%��p�WN�D<�E��h�_�'GR������$�orP5�Ԡ���I)�"�5�$�
��7�ru�y�{տZ�j ���~%�?62t�������W,�o��&/��Q�������1�.b��*k���ƌA'ޝ8��gB��~ror�L��#���;���� *�dN�����o���+�0 ��v��V��s��5>�g~���C?Z:ܟ�؊B�G��`.3L��̿��!���ў>n8�_7y��.��z8���M���>i����5�b�U79K��ܼ(��@��e="�W�Đ<��)}�>��H�1��/�.��r���5���}~�oڎ�;����%ؑ� A=���N��0a��w�� �j��a0�o�c�ݳg�)�