|
| 1 | +""" |
| 2 | +Each modeling file in this library is a mapping between |
| 3 | +abstract naming of intervention anchor points and actual |
| 4 | +model module defined in the huggingface library. |
| 5 | +
|
| 6 | +We also want to let the intervention library know how to |
| 7 | +config the dimensions of intervention based on model config |
| 8 | +defined in the huggingface library. |
| 9 | +""" |
| 10 | + |
| 11 | + |
| 12 | +import torch |
| 13 | +from ..constants import * |
| 14 | + |
| 15 | + |
| 16 | +mistral_type_to_module_mapping = { |
| 17 | + "block_input": ("layers[%s]", CONST_INPUT_HOOK), |
| 18 | + "block_output": ("layers[%s]", CONST_OUTPUT_HOOK), |
| 19 | + "mlp_activation": ("layers[%s].mlp.act_fn", CONST_OUTPUT_HOOK), |
| 20 | + "mlp_output": ("layers[%s].mlp", CONST_OUTPUT_HOOK), |
| 21 | + "mlp_input": ("layers[%s].mlp", CONST_INPUT_HOOK), |
| 22 | + "attention_value_output": ("layers[%s].self_attn.o_proj", CONST_INPUT_HOOK), |
| 23 | + "head_attention_value_output": ("layers[%s].self_attn.o_proj", CONST_INPUT_HOOK), |
| 24 | + "attention_output": ("layers[%s].self_attn", CONST_OUTPUT_HOOK), |
| 25 | + "attention_input": ("layers[%s].self_attn", CONST_INPUT_HOOK), |
| 26 | + "query_output": ("layers[%s].self_attn.q_proj", CONST_OUTPUT_HOOK), |
| 27 | + "key_output": ("layers[%s].self_attn.k_proj", CONST_OUTPUT_HOOK), |
| 28 | + "value_output": ("layers[%s].self_attn.v_proj", CONST_OUTPUT_HOOK), |
| 29 | + "head_query_output": ("layers[%s].self_attn.q_proj", CONST_OUTPUT_HOOK), |
| 30 | + "head_key_output": ("layers[%s].self_attn.k_proj", CONST_OUTPUT_HOOK), |
| 31 | + "head_value_output": ("layers[%s].self_attn.v_proj", CONST_OUTPUT_HOOK), |
| 32 | +} |
| 33 | + |
| 34 | + |
| 35 | +mistral_type_to_dimension_mapping = { |
| 36 | + "block_input": ("hidden_size",), |
| 37 | + "block_output": ("hidden_size",), |
| 38 | + "mlp_activation": ("intermediate_size",), |
| 39 | + "mlp_output": ("hidden_size",), |
| 40 | + "mlp_input": ("hidden_size",), |
| 41 | + "attention_value_output": ("hidden_size",), |
| 42 | + "head_attention_value_output": ("hidden_size/num_attention_heads",), |
| 43 | + "attention_output": ("hidden_size",), |
| 44 | + "attention_input": ("hidden_size",), |
| 45 | + "query_output": ("hidden_size",), |
| 46 | + "key_output": ("hidden_size",), |
| 47 | + "value_output": ("hidden_size",), |
| 48 | + "head_query_output": ("hidden_size/num_attention_heads",), |
| 49 | + "head_key_output": ("hidden_size/num_attention_heads",), |
| 50 | + "head_value_output": ("hidden_size/num_attention_heads",), |
| 51 | +} |
| 52 | + |
| 53 | + |
| 54 | +"""llama model with LM head""" |
| 55 | +mistral_lm_type_to_module_mapping = {} |
| 56 | +for k, v in mistral_type_to_module_mapping.items(): |
| 57 | + mistral_lm_type_to_module_mapping[k] = (f"model.{v[0]}", v[1]) |
| 58 | + |
| 59 | + |
| 60 | +mistral_lm_type_to_dimension_mapping = mistral_type_to_dimension_mapping |
| 61 | + |
| 62 | + |
| 63 | +def create_mistral( |
| 64 | + name="mistralai/Mistral-7B-v0.1", cache_dir=None |
| 65 | +): |
| 66 | + """Creates a Mistral Causal LM model, config, and tokenizer from the given name and revision""" |
| 67 | + from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig |
| 68 | + |
| 69 | + config = AutoConfig.from_pretrained(name, cache_dir=cache_dir) |
| 70 | + tokenizer = AutoTokenizer.from_pretrained(name, cache_dir=cache_dir) |
| 71 | + llama = AutoModelForCausalLM.from_pretrained( |
| 72 | + name, |
| 73 | + config=config, |
| 74 | + cache_dir=cache_dir, |
| 75 | + torch_dtype=torch.bfloat16, # save memory |
| 76 | + ) |
| 77 | + print("loaded model") |
| 78 | + return config, tokenizer, llama |
0 commit comments