llama_model_descriptor

Llama model descriptor for AnyModel compression.

Classes

LlamaModelDescriptor

Model descriptor for Llama models (Llama 2, Llama 3, Llama 3.1, Llama 3.2).

LlamaFFNIntermediateLayerDescriptor

Layer descriptor for Llama FFN intermediate pruning.

LlamaKVHeadsLayerDescriptor

LlamaKVHeadsLayerDescriptor(o_proj_name: str = 'self_attn.o_proj', attn_prefix_name: str = 'model.layers.{layer_idx}.self_attn', qkvo_weight_names: List[str] = <factory>)

class LlamaFFNIntermediateLayerDescriptor

Bases: FFNIntermediateLayerDescriptor

Layer descriptor for Llama FFN intermediate pruning.

__init__(down_proj_name='mlp.down_proj', ffn_prefix_name='model.layers.{layer_idx}.mlp', linear_weight_names=<factory>)
Parameters:
  • down_proj_name (str)

  • ffn_prefix_name (str)

  • linear_weight_names (List[str])

Return type:

None

down_proj_name: str = 'mlp.down_proj'
ffn_prefix_name: str = 'model.layers.{layer_idx}.mlp'
linear_weight_names: List[str]
class LlamaKVHeadsLayerDescriptor

Bases: KVHeadsLayerDescriptor

LlamaKVHeadsLayerDescriptor(o_proj_name: str = ‘self_attn.o_proj’, attn_prefix_name: str = ‘model.layers.{layer_idx}.self_attn’, qkvo_weight_names: List[str] = <factory>)

__init__(o_proj_name='self_attn.o_proj', attn_prefix_name='model.layers.{layer_idx}.self_attn', qkvo_weight_names=<factory>)
Parameters:
  • o_proj_name (str)

  • attn_prefix_name (str)

  • qkvo_weight_names (List[str])

Return type:

None

attn_prefix_name: str = 'model.layers.{layer_idx}.self_attn'
o_proj_name: str = 'self_attn.o_proj'
qkvo_weight_names: List[str]
class LlamaModelDescriptor

Bases: ModelDescriptor

Model descriptor for Llama models (Llama 2, Llama 3, Llama 3.1, Llama 3.2).

static attn_no_op_post_init(decoder_layer)
Parameters:

decoder_layer (LlamaDecoderLayer)

static block_config_to_layer_overrides(block_config)
Parameters:

block_config (BlockConfig)

static decoder_layer_cls()
static final_norm_name()
static init_rotary_embedding(model, runtime)
Parameters:

model (LlamaForCausalLM)

static input_embedding_name()
static layer_block_name(index)
Parameters:

index (int)

static layer_name_predicates(num_layers)
Parameters:

num_layers (int)

Return type:

Dict[str, Pattern]

static mlp_no_op_post_init(decoder_layer)
Parameters:

decoder_layer (LlamaDecoderLayer)

static output_embedding_name()