File tree Expand file tree Collapse file tree 2 files changed +21
-2
lines changed
transformer_lens/model_bridge Expand file tree Collapse file tree 2 files changed +21
-2
lines changed Original file line number Diff line number Diff line change @@ -107,7 +107,16 @@ def __init__(
107107
108108 # Infer vocab size from tokenizer (similar to HookedTransformer)
109109 if self .cfg .d_vocab == - 1 :
110- self .cfg .d_vocab = max (self .tokenizer .vocab .values ()) + 1
110+ # Use get_vocab() method which works across different tokenizer types
111+ # Some tokenizers (like CodeGenTokenizer) don't support direct .vocab access
112+ if hasattr (self .tokenizer , 'get_vocab' ):
113+ vocab = self .tokenizer .get_vocab ()
114+ self .cfg .d_vocab = max (vocab .values ()) + 1
115+ elif hasattr (self .tokenizer , 'vocab' ):
116+ self .cfg .d_vocab = max (self .tokenizer .vocab .values ()) + 1
117+ else :
118+ # Fallback: use vocab_size attribute if available
119+ self .cfg .d_vocab = getattr (self .tokenizer , 'vocab_size' , 50257 )
111120 if self .cfg .d_vocab_out == - 1 :
112121 self .cfg .d_vocab_out = self .cfg .d_vocab
113122
Original file line number Diff line number Diff line change @@ -74,7 +74,17 @@ def __init__(self, cfg: Any) -> None:
7474 submodules = {
7575 "ln1" : NormalizationBridge (name = "input_layernorm" , config = self .cfg ),
7676 "ln2" : NormalizationBridge (name = "post_attention_layernorm" , config = self .cfg ),
77- "attn" : AttentionBridge (name = "self_attn" , config = self .cfg ),
77+ "attn" : AttentionBridge (
78+ name = "self_attn" ,
79+ config = self .cfg ,
80+ submodules = {
81+ # Phi-3 uses combined qkv_proj, but we still need submodules for hooks
82+ "q" : LinearBridge (name = "qkv_proj" ),
83+ "k" : LinearBridge (name = "qkv_proj" ),
84+ "v" : LinearBridge (name = "qkv_proj" ),
85+ "o" : LinearBridge (name = "o_proj" ),
86+ },
87+ ),
7888 "mlp" : MLPBridge (name = "mlp" ),
7989 },
8090 ),
You can’t perform that action at this time.
0 commit comments