Source code for nvtripy.frontend.module.linear

#
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from dataclasses import dataclass
from typing import Optional

from nvtripy import export, utils
from nvtripy.common import datatype
from nvtripy.frontend.module.module import Module
from nvtripy.frontend.module.parameter import DefaultParameter
from nvtripy.frontend.tensor import Tensor


[docs] @export.public_api(document_under="operations/modules") @dataclass @utils.utils.constant_fields(["dtype", "quant_dtype"]) class Linear(Module): r""" Applies a linear transformation to the input: :math:`Linear(x) = xW^T + b` """ dtype: datatype.dtype r"""The data type used to perform the operation""" weight: Tensor r"""The :math:`W` matrix of shape :math:`[\text{out_features}, \text{in_features}]`""" bias: Optional[Tensor] r"""The :math:`b` matrix of shape :math:`[1, \text{out_features}]`""" quant_dtype: Optional[datatype.dtype] r"""The quantization data type""" weight_scale: Optional[Tensor] r"""The quantization scale for weight""" input_scale: Optional[Tensor] r"""The quantization scale for input""" weight_quant_dim: Optional[int] r"""The dimension along which to apply the weight quantization scale.""" def __init__( self, in_features: int, out_features: int, bias: bool = True, dtype: datatype.dtype = datatype.float32, quant_dtype: Optional[datatype.dtype] = None, weight_quant_dim: Optional[int] = None, ) -> None: """ Args: in_features: Size of input features. out_features: Size of output features. bias: Whether to include the bias term. dtype: The data type to use for the weight and bias parameters. quant_dtype: The data type for quantization. weight_quant_dim: The dimension along which to apply the weight quantization scale. .. code-block:: python :linenos: linear = tp.Linear(3, 4) input = tp.iota((2, 3)) output = linear(input) assert cp.from_dlpack(output).get().shape == (2, 4) """ super().__init__() self.dtype = dtype # Replace with random weights when #74 is completed. self.weight = DefaultParameter((out_features, in_features), dtype=dtype) self.bias = None if bias: self.bias = DefaultParameter((out_features,), dtype=dtype) self.quant_dtype = quant_dtype self.weight_quant_dim = weight_quant_dim self.weight_scale = None self.input_scale = None if quant_dtype is not None: self.weight_scale = DefaultParameter( shape=[self.weight._shape[weight_quant_dim]] if weight_quant_dim is not None else None, dtype=dtype ) self.input_scale = DefaultParameter(shape=None, dtype=dtype)
[docs] def __call__(self, x: "nvtripy.Tensor") -> "nvtripy.Tensor": r""" Args: x: The input tensor, of shape :math:`[*, \text{in_features}]`. Returns: A tensor of shape :math:`[*, \text{out_features}]`. """ from nvtripy.common.exception import raise_error from nvtripy.frontend.ops.transpose import transpose from nvtripy.frontend.ops.unsqueeze import unsqueeze from nvtripy.frontend.trace.ops.dequantize import dequantize from nvtripy.frontend.trace.ops.quantize import quantize if self.quant_dtype is not None: if self.input_scale: if self.weight_quant_dim == 1: # TODO(#157): Give more informative error message to explain why # it is not supported. raise_error( "Unsupported quantization parameters for Linear module.", [ "weight_quant_dim cannot be 1 when input_scale is provided. ", f"input_scale={self.input_scale}, weight_quant_dim={self.weight_quant_dim}", ], ) q_x = quantize(x, self.input_scale, self.quant_dtype) x = dequantize(q_x, self.input_scale, self.dtype) q_weight = quantize(self.weight, self.weight_scale, self.quant_dtype, self.weight_quant_dim) weight = dequantize(q_weight, self.weight_scale, self.dtype, self.weight_quant_dim) else: weight = self.weight out = x @ (transpose(weight, 1, 0)) if self.bias is not None: out = out + unsqueeze(self.bias, 0) return out