Skip to content

Commit bd1a9b9

Browse files
committed
Run make fix-copies.
1 parent 6fba53d commit bd1a9b9

29 files changed

+174
-0
lines changed

src/transformers/models/aria/modeling_aria.py

+6
Original file line numberDiff line numberDiff line change
@@ -460,6 +460,12 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
460460
"""
461461
cos = cos.unsqueeze(unsqueeze_dim)
462462
sin = sin.unsqueeze(unsqueeze_dim)
463+
464+
# Adjust the rotary embedding dimensions if they don't match q's last dimension.
465+
if cos.shape[-1] != q.shape[-1]:
466+
cos = cos[..., : q.shape[-1]]
467+
sin = sin[..., : q.shape[-1]]
468+
463469
q_embed = (q * cos) + (rotate_half(q) * sin)
464470
k_embed = (k * cos) + (rotate_half(k) * sin)
465471
return q_embed, k_embed

src/transformers/models/chameleon/modeling_chameleon.py

+6
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,12 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
176176
"""
177177
cos = cos.unsqueeze(unsqueeze_dim)
178178
sin = sin.unsqueeze(unsqueeze_dim)
179+
180+
# Adjust the rotary embedding dimensions if they don't match q's last dimension.
181+
if cos.shape[-1] != q.shape[-1]:
182+
cos = cos[..., : q.shape[-1]]
183+
sin = sin[..., : q.shape[-1]]
184+
179185
q_embed = (q * cos) + (rotate_half(q) * sin)
180186
k_embed = (k * cos) + (rotate_half(k) * sin)
181187
return q_embed, k_embed

src/transformers/models/dbrx/modeling_dbrx.py

+6
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,12 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
107107
"""
108108
cos = cos.unsqueeze(unsqueeze_dim)
109109
sin = sin.unsqueeze(unsqueeze_dim)
110+
111+
# Adjust the rotary embedding dimensions if they don't match q's last dimension.
112+
if cos.shape[-1] != q.shape[-1]:
113+
cos = cos[..., : q.shape[-1]]
114+
sin = sin[..., : q.shape[-1]]
115+
110116
q_embed = (q * cos) + (rotate_half(q) * sin)
111117
k_embed = (k * cos) + (rotate_half(k) * sin)
112118
return q_embed, k_embed

src/transformers/models/diffllama/modeling_diffllama.py

+6
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,12 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
106106
"""
107107
cos = cos.unsqueeze(unsqueeze_dim)
108108
sin = sin.unsqueeze(unsqueeze_dim)
109+
110+
# Adjust the rotary embedding dimensions if they don't match q's last dimension.
111+
if cos.shape[-1] != q.shape[-1]:
112+
cos = cos[..., : q.shape[-1]]
113+
sin = sin[..., : q.shape[-1]]
114+
109115
q_embed = (q * cos) + (rotate_half(q) * sin)
110116
k_embed = (k * cos) + (rotate_half(k) * sin)
111117
return q_embed, k_embed

src/transformers/models/emu3/modeling_emu3.py

+6
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,12 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
119119
"""
120120
cos = cos.unsqueeze(unsqueeze_dim)
121121
sin = sin.unsqueeze(unsqueeze_dim)
122+
123+
# Adjust the rotary embedding dimensions if they don't match q's last dimension.
124+
if cos.shape[-1] != q.shape[-1]:
125+
cos = cos[..., : q.shape[-1]]
126+
sin = sin[..., : q.shape[-1]]
127+
122128
q_embed = (q * cos) + (rotate_half(q) * sin)
123129
k_embed = (k * cos) + (rotate_half(k) * sin)
124130
return q_embed, k_embed

src/transformers/models/falcon/modeling_falcon.py

+6
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,12 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
104104
"""
105105
cos = cos.unsqueeze(unsqueeze_dim)
106106
sin = sin.unsqueeze(unsqueeze_dim)
107+
108+
# Adjust the rotary embedding dimensions if they don't match q's last dimension.
109+
if cos.shape[-1] != q.shape[-1]:
110+
cos = cos[..., : q.shape[-1]]
111+
sin = sin[..., : q.shape[-1]]
112+
107113
q_embed = (q * cos) + (rotate_half(q) * sin)
108114
k_embed = (k * cos) + (rotate_half(k) * sin)
109115
return q_embed, k_embed

src/transformers/models/gemma/modeling_gemma.py

+6
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,12 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
182182
"""
183183
cos = cos.unsqueeze(unsqueeze_dim)
184184
sin = sin.unsqueeze(unsqueeze_dim)
185+
186+
# Adjust the rotary embedding dimensions if they don't match q's last dimension.
187+
if cos.shape[-1] != q.shape[-1]:
188+
cos = cos[..., : q.shape[-1]]
189+
sin = sin[..., : q.shape[-1]]
190+
185191
q_embed = (q * cos) + (rotate_half(q) * sin)
186192
k_embed = (k * cos) + (rotate_half(k) * sin)
187193
return q_embed, k_embed

src/transformers/models/gemma2/modeling_gemma2.py

+6
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,12 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
121121
"""
122122
cos = cos.unsqueeze(unsqueeze_dim)
123123
sin = sin.unsqueeze(unsqueeze_dim)
124+
125+
# Adjust the rotary embedding dimensions if they don't match q's last dimension.
126+
if cos.shape[-1] != q.shape[-1]:
127+
cos = cos[..., : q.shape[-1]]
128+
sin = sin[..., : q.shape[-1]]
129+
124130
q_embed = (q * cos) + (rotate_half(q) * sin)
125131
k_embed = (k * cos) + (rotate_half(k) * sin)
126132
return q_embed, k_embed

src/transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py

+6
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,12 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
314314
"""
315315
cos = cos.unsqueeze(unsqueeze_dim)
316316
sin = sin.unsqueeze(unsqueeze_dim)
317+
318+
# Adjust the rotary embedding dimensions if they don't match q's last dimension.
319+
if cos.shape[-1] != q.shape[-1]:
320+
cos = cos[..., : q.shape[-1]]
321+
sin = sin[..., : q.shape[-1]]
322+
317323
q_embed = (q * cos) + (rotate_half(q) * sin)
318324
k_embed = (k * cos) + (rotate_half(k) * sin)
319325
return q_embed, k_embed

src/transformers/models/granite/modeling_granite.py

+6
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,12 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
7777
"""
7878
cos = cos.unsqueeze(unsqueeze_dim)
7979
sin = sin.unsqueeze(unsqueeze_dim)
80+
81+
# Adjust the rotary embedding dimensions if they don't match q's last dimension.
82+
if cos.shape[-1] != q.shape[-1]:
83+
cos = cos[..., : q.shape[-1]]
84+
sin = sin[..., : q.shape[-1]]
85+
8086
q_embed = (q * cos) + (rotate_half(q) * sin)
8187
k_embed = (k * cos) + (rotate_half(k) * sin)
8288
return q_embed, k_embed

src/transformers/models/jetmoe/modeling_jetmoe.py

+6
Original file line numberDiff line numberDiff line change
@@ -479,6 +479,12 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
479479
"""
480480
cos = cos.unsqueeze(unsqueeze_dim)
481481
sin = sin.unsqueeze(unsqueeze_dim)
482+
483+
# Adjust the rotary embedding dimensions if they don't match q's last dimension.
484+
if cos.shape[-1] != q.shape[-1]:
485+
cos = cos[..., : q.shape[-1]]
486+
sin = sin[..., : q.shape[-1]]
487+
482488
q_embed = (q * cos) + (rotate_half(q) * sin)
483489
k_embed = (k * cos) + (rotate_half(k) * sin)
484490
return q_embed, k_embed

src/transformers/models/mimi/modeling_mimi.py

+6
Original file line numberDiff line numberDiff line change
@@ -456,6 +456,12 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
456456
"""
457457
cos = cos.unsqueeze(unsqueeze_dim)
458458
sin = sin.unsqueeze(unsqueeze_dim)
459+
460+
# Adjust the rotary embedding dimensions if they don't match q's last dimension.
461+
if cos.shape[-1] != q.shape[-1]:
462+
cos = cos[..., : q.shape[-1]]
463+
sin = sin[..., : q.shape[-1]]
464+
459465
q_embed = (q * cos) + (rotate_half(q) * sin)
460466
k_embed = (k * cos) + (rotate_half(k) * sin)
461467
return q_embed, k_embed

src/transformers/models/mistral/modeling_mistral.py

+6
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,12 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
8787
"""
8888
cos = cos.unsqueeze(unsqueeze_dim)
8989
sin = sin.unsqueeze(unsqueeze_dim)
90+
91+
# Adjust the rotary embedding dimensions if they don't match q's last dimension.
92+
if cos.shape[-1] != q.shape[-1]:
93+
cos = cos[..., : q.shape[-1]]
94+
sin = sin[..., : q.shape[-1]]
95+
9096
q_embed = (q * cos) + (rotate_half(q) * sin)
9197
k_embed = (k * cos) + (rotate_half(k) * sin)
9298
return q_embed, k_embed

src/transformers/models/mixtral/modeling_mixtral.py

+6
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,12 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
200200
"""
201201
cos = cos.unsqueeze(unsqueeze_dim)
202202
sin = sin.unsqueeze(unsqueeze_dim)
203+
204+
# Adjust the rotary embedding dimensions if they don't match q's last dimension.
205+
if cos.shape[-1] != q.shape[-1]:
206+
cos = cos[..., : q.shape[-1]]
207+
sin = sin[..., : q.shape[-1]]
208+
203209
q_embed = (q * cos) + (rotate_half(q) * sin)
204210
k_embed = (k * cos) + (rotate_half(k) * sin)
205211
return q_embed, k_embed

src/transformers/models/mllama/modeling_mllama.py

+6
Original file line numberDiff line numberDiff line change
@@ -641,6 +641,12 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
641641
"""
642642
cos = cos.unsqueeze(unsqueeze_dim)
643643
sin = sin.unsqueeze(unsqueeze_dim)
644+
645+
# Adjust the rotary embedding dimensions if they don't match q's last dimension.
646+
if cos.shape[-1] != q.shape[-1]:
647+
cos = cos[..., : q.shape[-1]]
648+
sin = sin[..., : q.shape[-1]]
649+
644650
q_embed = (q * cos) + (rotate_half(q) * sin)
645651
k_embed = (k * cos) + (rotate_half(k) * sin)
646652
return q_embed, k_embed

src/transformers/models/modernbert/modeling_modernbert.py

+6
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,12 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
330330
"""
331331
cos = cos.unsqueeze(unsqueeze_dim)
332332
sin = sin.unsqueeze(unsqueeze_dim)
333+
334+
# Adjust the rotary embedding dimensions if they don't match q's last dimension.
335+
if cos.shape[-1] != q.shape[-1]:
336+
cos = cos[..., : q.shape[-1]]
337+
sin = sin[..., : q.shape[-1]]
338+
333339
q_embed = (q * cos) + (rotate_half(q) * sin)
334340
k_embed = (k * cos) + (rotate_half(k) * sin)
335341
return q_embed, k_embed

src/transformers/models/moshi/modeling_moshi.py

+6
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,12 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
400400
"""
401401
cos = cos.unsqueeze(unsqueeze_dim)
402402
sin = sin.unsqueeze(unsqueeze_dim)
403+
404+
# Adjust the rotary embedding dimensions if they don't match q's last dimension.
405+
if cos.shape[-1] != q.shape[-1]:
406+
cos = cos[..., : q.shape[-1]]
407+
sin = sin[..., : q.shape[-1]]
408+
403409
q_embed = (q * cos) + (rotate_half(q) * sin)
404410
k_embed = (k * cos) + (rotate_half(k) * sin)
405411
return q_embed, k_embed

src/transformers/models/olmo/modeling_olmo.py

+6
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,12 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
9393
"""
9494
cos = cos.unsqueeze(unsqueeze_dim)
9595
sin = sin.unsqueeze(unsqueeze_dim)
96+
97+
# Adjust the rotary embedding dimensions if they don't match q's last dimension.
98+
if cos.shape[-1] != q.shape[-1]:
99+
cos = cos[..., : q.shape[-1]]
100+
sin = sin[..., : q.shape[-1]]
101+
96102
q_embed = (q * cos) + (rotate_half(q) * sin)
97103
k_embed = (k * cos) + (rotate_half(k) * sin)
98104
return q_embed, k_embed

src/transformers/models/olmo2/modeling_olmo2.py

+6
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,12 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
8282
"""
8383
cos = cos.unsqueeze(unsqueeze_dim)
8484
sin = sin.unsqueeze(unsqueeze_dim)
85+
86+
# Adjust the rotary embedding dimensions if they don't match q's last dimension.
87+
if cos.shape[-1] != q.shape[-1]:
88+
cos = cos[..., : q.shape[-1]]
89+
sin = sin[..., : q.shape[-1]]
90+
8591
q_embed = (q * cos) + (rotate_half(q) * sin)
8692
k_embed = (k * cos) + (rotate_half(k) * sin)
8793
return q_embed, k_embed

src/transformers/models/olmoe/modeling_olmoe.py

+6
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,12 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
250250
"""
251251
cos = cos.unsqueeze(unsqueeze_dim)
252252
sin = sin.unsqueeze(unsqueeze_dim)
253+
254+
# Adjust the rotary embedding dimensions if they don't match q's last dimension.
255+
if cos.shape[-1] != q.shape[-1]:
256+
cos = cos[..., : q.shape[-1]]
257+
sin = sin[..., : q.shape[-1]]
258+
253259
q_embed = (q * cos) + (rotate_half(q) * sin)
254260
k_embed = (k * cos) + (rotate_half(k) * sin)
255261
return q_embed, k_embed

src/transformers/models/persimmon/modeling_persimmon.py

+6
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,12 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
148148
"""
149149
cos = cos.unsqueeze(unsqueeze_dim)
150150
sin = sin.unsqueeze(unsqueeze_dim)
151+
152+
# Adjust the rotary embedding dimensions if they don't match q's last dimension.
153+
if cos.shape[-1] != q.shape[-1]:
154+
cos = cos[..., : q.shape[-1]]
155+
sin = sin[..., : q.shape[-1]]
156+
151157
q_embed = (q * cos) + (rotate_half(q) * sin)
152158
k_embed = (k * cos) + (rotate_half(k) * sin)
153159
return q_embed, k_embed

src/transformers/models/phi/modeling_phi.py

+6
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,12 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
7070
"""
7171
cos = cos.unsqueeze(unsqueeze_dim)
7272
sin = sin.unsqueeze(unsqueeze_dim)
73+
74+
# Adjust the rotary embedding dimensions if they don't match q's last dimension.
75+
if cos.shape[-1] != q.shape[-1]:
76+
cos = cos[..., : q.shape[-1]]
77+
sin = sin[..., : q.shape[-1]]
78+
7379
q_embed = (q * cos) + (rotate_half(q) * sin)
7480
k_embed = (k * cos) + (rotate_half(k) * sin)
7581
return q_embed, k_embed

src/transformers/models/phi3/modeling_phi3.py

+6
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,12 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
104104
"""
105105
cos = cos.unsqueeze(unsqueeze_dim)
106106
sin = sin.unsqueeze(unsqueeze_dim)
107+
108+
# Adjust the rotary embedding dimensions if they don't match q's last dimension.
109+
if cos.shape[-1] != q.shape[-1]:
110+
cos = cos[..., : q.shape[-1]]
111+
sin = sin[..., : q.shape[-1]]
112+
107113
q_embed = (q * cos) + (rotate_half(q) * sin)
108114
k_embed = (k * cos) + (rotate_half(k) * sin)
109115
return q_embed, k_embed

src/transformers/models/qwen2/modeling_qwen2.py

+6
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,12 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
8787
"""
8888
cos = cos.unsqueeze(unsqueeze_dim)
8989
sin = sin.unsqueeze(unsqueeze_dim)
90+
91+
# Adjust the rotary embedding dimensions if they don't match q's last dimension.
92+
if cos.shape[-1] != q.shape[-1]:
93+
cos = cos[..., : q.shape[-1]]
94+
sin = sin[..., : q.shape[-1]]
95+
9096
q_embed = (q * cos) + (rotate_half(q) * sin)
9197
k_embed = (k * cos) + (rotate_half(k) * sin)
9298
return q_embed, k_embed

src/transformers/models/qwen2_moe/modeling_qwen2_moe.py

+6
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,12 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
259259
"""
260260
cos = cos.unsqueeze(unsqueeze_dim)
261261
sin = sin.unsqueeze(unsqueeze_dim)
262+
263+
# Adjust the rotary embedding dimensions if they don't match q's last dimension.
264+
if cos.shape[-1] != q.shape[-1]:
265+
cos = cos[..., : q.shape[-1]]
266+
sin = sin[..., : q.shape[-1]]
267+
262268
q_embed = (q * cos) + (rotate_half(q) * sin)
263269
k_embed = (k * cos) + (rotate_half(k) * sin)
264270
return q_embed, k_embed

src/transformers/models/recurrent_gemma/modeling_recurrent_gemma.py

+6
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,12 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
124124
"""
125125
cos = cos.unsqueeze(unsqueeze_dim)
126126
sin = sin.unsqueeze(unsqueeze_dim)
127+
128+
# Adjust the rotary embedding dimensions if they don't match q's last dimension.
129+
if cos.shape[-1] != q.shape[-1]:
130+
cos = cos[..., : q.shape[-1]]
131+
sin = sin[..., : q.shape[-1]]
132+
127133
q_embed = (q * cos) + (rotate_half(q) * sin)
128134
k_embed = (k * cos) + (rotate_half(k) * sin)
129135
return q_embed, k_embed

src/transformers/models/stablelm/modeling_stablelm.py

+6
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,12 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
154154
"""
155155
cos = cos.unsqueeze(unsqueeze_dim)
156156
sin = sin.unsqueeze(unsqueeze_dim)
157+
158+
# Adjust the rotary embedding dimensions if they don't match q's last dimension.
159+
if cos.shape[-1] != q.shape[-1]:
160+
cos = cos[..., : q.shape[-1]]
161+
sin = sin[..., : q.shape[-1]]
162+
157163
q_embed = (q * cos) + (rotate_half(q) * sin)
158164
k_embed = (k * cos) + (rotate_half(k) * sin)
159165
return q_embed, k_embed

src/transformers/models/starcoder2/modeling_starcoder2.py

+6
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,12 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
107107
"""
108108
cos = cos.unsqueeze(unsqueeze_dim)
109109
sin = sin.unsqueeze(unsqueeze_dim)
110+
111+
# Adjust the rotary embedding dimensions if they don't match q's last dimension.
112+
if cos.shape[-1] != q.shape[-1]:
113+
cos = cos[..., : q.shape[-1]]
114+
sin = sin[..., : q.shape[-1]]
115+
110116
q_embed = (q * cos) + (rotate_half(q) * sin)
111117
k_embed = (k * cos) + (rotate_half(k) * sin)
112118
return q_embed, k_embed

src/transformers/models/zamba2/modeling_zamba2.py

+6
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,12 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
350350
"""
351351
cos = cos.unsqueeze(unsqueeze_dim)
352352
sin = sin.unsqueeze(unsqueeze_dim)
353+
354+
# Adjust the rotary embedding dimensions if they don't match q's last dimension.
355+
if cos.shape[-1] != q.shape[-1]:
356+
cos = cos[..., : q.shape[-1]]
357+
sin = sin[..., : q.shape[-1]]
358+
353359
q_embed = (q * cos) + (rotate_half(q) * sin)
354360
k_embed = (k * cos) + (rotate_half(k) * sin)
355361
return q_embed, k_embed

0 commit comments

Comments
 (0)