VOOZH about

URL: https://arxiv.org/pdf/2505.06708


%PDF-1.5 %���� 1 0 obj << /Metadata 3 0 R /Names 4 0 R /OpenAction 5 0 R /PageMode /UseOutlines /Pages 6 0 R /Type /Catalog >> endobj 2 0 obj << /Author (Zihan Qiu; Zekun Wang; Bo Zheng; Zeyu Huang; Kaiyue Wen; Songlin Yang; Rui Men; Le Yu; Fei Huang; Suozhi Huang; Dayiheng Liu; Jingren Zhou; Junyang Lin) /Creator (arXiv GenPDF \(tex2pdf:\)) /DOI (https://doi.org/10.48550/arXiv.2505.06708) /License (http://arxiv.org/licenses/nonexclusive-distrib/1.0/) /PTEX.Fullbanner (This is pdfTeX, Version 3.141592653-2.6-1.40.25 \(TeX Live 2023\) kpathsea version 6.3.5) /Producer (pikepdf 8.15.1) /Title (Gated Attention for Large Language Models: Non-linearity, Sparsity, and Attention-Sink-Free) /Trapped /False /arXivID (https://arxiv.org/abs/2505.06708v1) >> endobj 3 0 obj << /Subtype /XML /Type /Metadata /Length 1910 >> stream endstream endobj 4 0 obj << /Dests 7 0 R >> endobj 5 0 obj << /D [ 8 0 R /Fit ] /S /GoTo >> endobj 6 0 obj << /Count 17 /Kids [ 9 0 R 10 0 R 11 0 R ] /Type /Pages >> endobj 7 0 obj << /Kids [ 12 0 R 13 0 R 14 0 R 15 0 R ] /Limits [ (Doc-Start) (table.caption.9) ] >> endobj 8 0 obj << /Annots [ 16 0 R 17 0 R 18 0 R 19 0 R 20 0 R 21 0 R 22 0 R 23 0 R 24 0 R 25 0 R 26 0 R 27 0 R 28 0 R 29 0 R 30 0 R 31 0 R 32 0 R 33 0 R 34 0 R 35 0 R 36 0 R 37 0 R 38 0 R 39 0 R 40 0 R 41 0 R 42 0 R 43 0 R 44 0 R 45 0 R 46 0 R 47 0 R 48 0 R 49 0 R 50 0 R 51 0 R 52 0 R 53 0 R 54 0 R 55 0 R 56 0 R 57 0 R 58 0 R 59 0 R 60 0 R 61 0 R ] /Contents [ 62 0 R 63 0 R ] /MediaBox [ 0 0 595.276 841.89 ] /Parent 9 0 R /Resources 64 0 R /Type /Page >> endobj 9 0 obj << /Count 6 /Kids [ 8 0 R 65 0 R 66 0 R 67 0 R 68 0 R 69 0 R ] /Parent 6 0 R /Type /Pages >> endobj 10 0 obj << /Count 6 /Kids [ 70 0 R 71 0 R 72 0 R 73 0 R 74 0 R 75 0 R ] /Parent 6 0 R /Type /Pages >> endobj 11 0 obj << /Count 5 /Kids [ 76 0 R 77 0 R 78 0 R 79 0 R 80 0 R ] /Parent 6 0 R /Type /Pages >> endobj 12 0 obj << /Kids [ 81 0 R 82 0 R 83 0 R 84 0 R 85 0 R 86 0 R ] /Limits [ (Doc-Start) (cite.montufar2014numberlinearregionsdeep) ] >> endobj 13 0 obj << /Kids [ 87 0 R 88 0 R 89 0 R 90 0 R 91 0 R 92 0 R ] /Limits [ (cite.olsson2022context) (equation.4.8) ] >> endobj 14 0 obj << /Kids [ 93 0 R 94 0 R 95 0 R 96 0 R 97 0 R 98 0 R ] /Limits [ (figure.caption.1) (section.4) ] >> endobj 15 0 obj << /Kids [ 99 0 R 100 0 R 101 0 R 102 0 R 103 0 R ] /Limits [ (section.5) (table.caption.9) ] >> endobj 16 0 obj << /A << /D (cite.hochreiter1997long) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 441.224 600.8 489.54 612.775 ] /Subtype /Link /Type /Annot >> endobj 17 0 obj << /A << /D (cite.hochreiter1997long) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 105.735 589.841 178.196 601.816 ] /Subtype /Link /Type /Annot >> endobj 18 0 obj << /A << /D (cite.hochreiter1997long) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 181.293 589.841 203.609 601.816 ] /Subtype /Link /Type /Annot >> endobj 19 0 obj << /A << /D (cite.srivastava2015highway) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 323.5 589.841 394.865 601.816 ] /Subtype /Link /Type /Annot >> endobj 20 0 obj << /A << /D (cite.srivastava2015highway) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 397.962 589.841 420.278 601.816 ] /Subtype /Link /Type /Annot >> endobj 21 0 obj << /A << /D (cite.gu2023mamba) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 173.702 578.882 221.569 590.857 ] /Subtype /Link /Type /Annot >> endobj 22 0 obj << /A << /D (cite.gu2023mamba) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 224.913 578.882 247.23 590.857 ] /Subtype /Link /Type /Annot >> endobj 23 0 obj << /A << /D (cite.hua2022transformer) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 331.441 578.882 377.449 590.857 ] /Subtype /Link /Type /Annot >> endobj 24 0 obj << /A << /D (cite.hua2022transformer) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 380.793 578.882 403.109 590.857 ] /Subtype /Link /Type /Annot >> endobj 25 0 obj << /A << /D (cite.lin2025forgetting) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 152.283 567.923 192.473 579.898 ] /Subtype /Link /Type /Annot >> endobj 26 0 obj << /A << /D (cite.lin2025forgetting) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 195.46 567.923 217.437 579.898 ] /Subtype /Link /Type /Annot >> endobj 27 0 obj << /A << /S /URI /Type /Action /URI (https://github.com/qiuzh20/gated_attention) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 443.918 436.416 470.458 448.391 ] /Subtype /Link /Type /Annot >> endobj 28 0 obj << /A << /S /URI /Type /Action /URI (https://huggingface.co/QwQZh/gated_attention) >> /Border [ 0 0 0 ] /C [ 0 1 1 ] /H /I /Rect [ 105.735 428.237 139.947 437.433 ] /Subtype /Link /Type /Annot >> endobj 29 0 obj << /A << /D (cite.hochreiter1997long) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 477.09 369.925 525.406 381.9 ] /Subtype /Link /Type /Annot >> endobj 30 0 obj << /A << /D (cite.hochreiter1997long) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 69.87 358.966 142.897 370.941 ] /Subtype /Link /Type /Annot >> endobj 31 0 obj << /A << /D (cite.hochreiter1997long) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 146.56 358.966 168.877 370.941 ] /Subtype /Link /Type /Annot >> endobj 32 0 obj << /A << /D (cite.srivastava2015highway) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 273.411 358.966 345.91 370.941 ] /Subtype /Link /Type /Annot >> endobj 33 0 obj << /A << /D (cite.srivastava2015highway) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 349.573 358.966 371.889 370.941 ] /Subtype /Link /Type /Annot >> endobj 34 0 obj << /A << /D (cite.dey2017gate) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 431.922 358.966 493.497 370.941 ] /Subtype /Link /Type /Annot >> endobj 35 0 obj << /A << /D (cite.dey2017gate) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 497.16 358.966 519.477 370.941 ] /Subtype /Link /Type /Annot >> endobj 36 0 obj << /A << /D (cite.gu2023mamba) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 159.735 326.089 206.248 338.064 ] /Subtype /Link /Type /Annot >> endobj 37 0 obj << /A << /D (cite.gu2023mamba) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 209.243 326.089 231.18 338.064 ] /Subtype /Link /Type /Annot >> endobj 38 0 obj << /A << /D (cite.mamba2) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 234.175 326.089 280.688 338.064 ] /Subtype /Link /Type /Annot >> endobj 39 0 obj << /A << /D (cite.mamba2) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 283.683 326.089 305.621 338.064 ] /Subtype /Link /Type /Annot >> endobj 40 0 obj << /A << /D (cite.hua2022transformer) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 433.745 326.089 478.433 338.064 ] /Subtype /Link /Type /Annot >> endobj 41 0 obj << /A << /D (cite.hua2022transformer) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 481.427 326.089 503.365 338.064 ] /Subtype /Link /Type /Annot >> endobj 42 0 obj << /A << /D (cite.sun2023retentivenetworksuccessortransformer) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 506.36 326.089 525.406 338.064 ] /Subtype /Link /Type /Annot >> endobj 43 0 obj << /A << /D (cite.sun2023retentivenetworksuccessortransformer) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 69.87 315.13 92.232 327.105 ] /Subtype /Link /Type /Annot >> endobj 44 0 obj << /A << /D (cite.sun2023retentivenetworksuccessortransformer) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 95.027 315.13 116.546 327.105 ] /Subtype /Link /Type /Annot >> endobj 45 0 obj << /A << /D (cite.qin2024lightning) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 119.341 315.13 160.208 327.105 ] /Subtype /Link /Type /Annot >> endobj 46 0 obj << /A << /D (cite.qin2024lightning) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 163.003 315.13 189.404 327.105 ] /Subtype /Link /Type /Annot >> endobj 47 0 obj << /A << /D (cite.yang2024gated) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 192.198 315.13 238.565 327.105 ] /Subtype /Link /Type /Annot >> endobj 48 0 obj << /A << /D (cite.yang2024gated) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 241.36 315.13 268.278 327.105 ] /Subtype /Link /Type /Annot >> endobj 49 0 obj << /A << /D (cite.lin2025forgetting) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 271.073 315.13 310.231 327.105 ] /Subtype /Link /Type /Annot >> endobj 50 0 obj << /A << /D (cite.lin2025forgetting) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 313.026 315.13 334.545 327.105 ] /Subtype /Link /Type /Annot >> endobj 51 0 obj << /A << /D (cite.csordas2024moeut) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 333.188 265.317 392.339 277.292 ] /Subtype /Link /Type /Annot >> endobj 52 0 obj << /A << /D (cite.csordas2024moeut) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 394.811 265.317 421.212 277.292 ] /Subtype /Link /Type /Annot >> endobj 53 0 obj << /A << /D (cite.csordas2024switchhead) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 421.66 265.317 429.052 277.292 ] /Subtype /Link /Type /Annot >> endobj 54 0 obj << /A << /D (subsection.A.1) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 503.096 254.358 519.758 266.333 ] /Subtype /Link /Type /Annot >> endobj 55 0 obj << /A << /D (cite.yuan2025native) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 445.578 221.481 494.242 233.456 ] /Subtype /Link /Type /Annot >> endobj 56 0 obj << /A << /D (cite.yuan2025native) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 497.268 221.481 519.505 233.456 ] /Subtype /Link /Type /Annot >> endobj 57 0 obj << /A << /D (cite.vaswani2017attention) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 418.586 171.668 456.912 183.643 ] /Subtype /Link /Type /Annot >> endobj 58 0 obj << /A << /D (cite.vaswani2017attention) /S /GoTo >> /Border [ 0 0 0 ] /C [ 0 1 0 ] /H /I /Rect [ 459.206 171.668 480.725 183.643 ] /Subtype /Link /Type /Annot >> endobj 59 0 obj << /A << /D (subsection.2.2) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 506.06 171.668 520.256 183.643 ] /Subtype /Link /Type /Annot >> endobj 60 0 obj << /A << /D (figure.caption.1) /S /GoTo >> /Border [ 0 0 0 ] /C [ 1 0 0 ] /H /I /Rect [ 325.941 160.709 332.92 172.684 ] /Subtype /Link /Type /Annot >> endobj 61 0 obj << /A << /S /URI /URI (https://arxiv.org/abs/2505.06708v1) >> /BS << /W 0 >> /NM (fitz-L0) /Rect [ 12 244.29499 32 597.595 ] /Subtype /Link >> endobj 62 0 obj << /Filter /FlateDecode /Length 143 >> stream x�E�A 1 E�9E.�1M�v �B��Cw�b��⢊z�UA�<~���� -�����7Zd�:�J�FS�E���2ý�7d�t�N;l��y���������¢B��+��ʾz�Ĥ�&8;q��͈�J7׆X?�v�:�l�� �'� endstream endobj 63 0 obj << /Filter /FlateDecode /Length 5091 >> stream xڵ[[s�F�~����.Xe"� n�'�ر���I���d�U ���c��Y��O�@�%+�r� �������=�hq��o�)�?Z�E�Y�,�<c�.��g�~�\ǚ*x���5[�Ԅ*�+��a�����o�j����G��&Ry���mn�>�t�j%b�<�^^>���y�':Y\^/l�������?�x%+������Y$�}�<�/�9��Ck4��F� |�����j�66al <�� '��/O)](�h��5�i�I���]�Y�t/�*�����Ps��RE���w<�� �+]Y}�~��æܵ_/WƘ������jW�%�����[&q�k_��8h�rP,�l�$I��0�e!Q�zN��EUX�8X}��MY��F��*N����0J��J'�Ib��j[���z��o�h��q��&B��„��&�E�኱�+�d�TE���q�w0D���R+��3����Ћp_&h�fF�8 -��8 �C">J�a�/,ޯ�rV6X�(�_J��V�]���Ϩ. �(����O�ne"&@N�P9�kQ���[�vU=#A�I�}Q�Y�AI���*� ?�j4~�BmGVf����p�W$�S7������r�4H03���47_~�op�eu�4�����/ ~���}`�l-�?��u�c���_����`��ʀ��/ �5Z��3��� ��wJFޥvw���i��P�+`���;��8���Q�C:v�S�N��,����&����F����2�*���_쪫���7͡?��`��f���ן�����$@C<�:\��כ���d27۹��:�?M/�j���F� �~2#��aW�a@Q��,L�G�h•�{9P*g�U{`�J��e��o�}�)�� �Ra��-�G)���a��~q�vM��ε�%�� VQ����H`_��Sij��}��V���U�-PdX �W%�"|�����֡/}W����4MX�����O� NuU�'*˟w�� ��,� .���ߢ8�I¿0&�/�@�A�����)��l�߆��fk���9� ��f�'\ \���j����٠;�gǣ�*��i3�T�2 �V���f{��yZ��<@�mPv�X�m��q���<<�,��RY�&�B4 �Pv��؅�=��0��� :�Z��)�'�sKq