mistral_small_model_descriptor

Classes

MistralSmallModelDescriptor

MistralFFNIntermediateLayerDescriptor

MistralFFNIntermediateLayerDescriptor(down_proj_name: str = 'mlp.down_proj', ffn_prefix_name: str = 'model.layers.{layer_idx}.mlp', linear_weight_names: List[str] = <factory>)

MistralKVHeadsLayerDescriptor

MistralKVHeadsLayerDescriptor(o_proj_name: str = 'self_attn.o_proj', attn_prefix_name: str = 'model.layers.{layer_idx}.self_attn', qkvo_weight_names: List[str] = <factory>)

class MistralFFNIntermediateLayerDescriptor

Bases: FFNIntermediateLayerDescriptor

MistralFFNIntermediateLayerDescriptor(down_proj_name: str = ‘mlp.down_proj’, ffn_prefix_name: str = ‘model.layers.{layer_idx}.mlp’, linear_weight_names: List[str] = <factory>)

__init__(down_proj_name='mlp.down_proj', ffn_prefix_name='model.layers.{layer_idx}.mlp', linear_weight_names=<factory>)
Parameters:
  • down_proj_name (str)

  • ffn_prefix_name (str)

  • linear_weight_names (List[str])

Return type:

None

down_proj_name: str = 'mlp.down_proj'
ffn_prefix_name: str = 'model.layers.{layer_idx}.mlp'
linear_weight_names: List[str]
class MistralKVHeadsLayerDescriptor

Bases: KVHeadsLayerDescriptor

MistralKVHeadsLayerDescriptor(o_proj_name: str = ‘self_attn.o_proj’, attn_prefix_name: str = ‘model.layers.{layer_idx}.self_attn’, qkvo_weight_names: List[str] = <factory>)

__init__(o_proj_name='self_attn.o_proj', attn_prefix_name='model.layers.{layer_idx}.self_attn', qkvo_weight_names=<factory>)
Parameters:
  • o_proj_name (str)

  • attn_prefix_name (str)

  • qkvo_weight_names (List[str])

Return type:

None

attn_prefix_name: str = 'model.layers.{layer_idx}.self_attn'
o_proj_name: str = 'self_attn.o_proj'
qkvo_weight_names: List[str]
class MistralSmallModelDescriptor

Bases: ModelDescriptor

static attn_no_op_post_init(decoder_layer)
Parameters:

decoder_layer (MistralDecoderLayer)

static block_config_to_layer_overrides(block_config)
Parameters:

block_config (BlockConfig)

static decoder_layer_cls()
static final_norm_name()
static init_rotary_embedding(model, runtime)
Parameters:

model (MistralForCausalLM)

static input_embedding_name()
static layer_block_name(index)
Parameters:

index (int)

static layer_name_predicates(num_layers)
Parameters:

num_layers (int)

Return type:

Dict[str, Pattern]

static mlp_no_op_post_init(decoder_layer)
Parameters:

decoder_layer (MistralDecoderLayer)

static output_embedding_name()