/*
 * SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#pragma once

#include "multimodal/modelTypes.h"
#include <NvInfer.h>
#include <filesystem>
#include <nlohmann/json.hpp>
#include <sstream>
#include <string>

using Json = nlohmann::json;

namespace trt_edgellm
{

namespace builder
{

//! Configuration structure for visual model building.
//! Contains parameters needed to configure the TensorRT engine building process
//! for visual encoders used in Vision-Language Models.
struct VisualBuilderConfig
{
    int64_t minImageTokens{4};           //!< Minimum number of image tokens in a batch
    int64_t maxImageTokens{1024};        //!< Maximum number of image tokens in a batch
    int64_t maxImageTokensPerImage{512}; //!< Maximum number of image tokens per image
    bool profilingDetailed{false};       //!< Enable detailed profiling verbosity for layer info extraction

    //! Convert configuration to JSON format for serialization.
    //! @return JSON object containing all configuration parameters
    Json toJson() const
    {
        Json json;
        json["min_image_tokens"] = minImageTokens;
        json["max_image_tokens"] = maxImageTokens;
        json["max_image_tokens_per_image"] = maxImageTokensPerImage;
        return json;
    }

    //! Create configuration from JSON format.
    //! @param json JSON object containing configuration parameters
    //! @return VisualBuilderConfig object with parsed parameters
    static VisualBuilderConfig fromJson(Json const& json)
    {
        VisualBuilderConfig config;
        if (json.contains("min_image_tokens"))
        {
            config.minImageTokens = json["min_image_tokens"];
        }
        if (json.contains("max_image_tokens"))
        {
            config.maxImageTokens = json["max_image_tokens"];
        }
        if (json.contains("max_image_tokens_per_image"))
        {
            config.maxImageTokensPerImage = json["max_image_tokens_per_image"];
        }
        return config;
    }

    //! Convert configuration to human-readable string format.
    //! @return String representation of the configuration for debugging/logging
    std::string toString() const
    {
        std::ostringstream oss;
        oss << "VisualBuilderConfig:\n";
        oss << "  minImageTokens: " << minImageTokens << "\n";
        oss << "  maxImageTokens: " << maxImageTokens << "\n";
        oss << "  maxImageTokensPerImage: " << maxImageTokensPerImage << "\n";
        return oss.str();
    }
};

//! Builder class for visual encoder TensorRT engines.
//! Handles the complete process of building TensorRT engines from ONNX models
//! for visual encoders used in Vision-Language Models.
class VisualBuilder
{
public:
    //! Constructor for VisualBuilder.
    //! @param onnxDir Directory containing the ONNX model and configuration files
    //! @param engineDir Directory where the built engine and related files will be saved
    //! @param config Configuration object specifying build parameters
    VisualBuilder(std::filesystem::path const& onnxDir, std::filesystem::path const& engineDir,
        VisualBuilderConfig const& config);

    //! Destructor.
    ~VisualBuilder() noexcept = default;

    //! Build the TensorRT engine from the ONNX model.
    //! This method performs the complete build process including:
    //! - Loading and parsing the ONNX model
    //! - Setting up optimization profiles
    //! - Building the TensorRT engine
    //! - Copying necessary files to the engine directory
    //! @return true if build was successful, false otherwise
    bool build();

private:
    std::filesystem::path mOnnxDir;     //!< Directory containing ONNX model files
    std::filesystem::path mEngineDir;   //!< Directory for saving built engine
    VisualBuilderConfig mBuilderConfig; //!< Build configuration
    multimodal::ModelType mModelType;   //!< Model type inferred from config.json

    //! Parse the model configuration from config.json.
    //! Extracts model type and dimensions needed for optimization profile setup.
    //! @return true if parsing was successful, false otherwise
    bool parseConfig();

    //! Set up optimization profile for visual models.
    //! Creates a single optimization profile with appropriate dynamic shapes.
    //! @param builder TensorRT builder object
    //! @param config TensorRT builder config object
    //! @param network TensorRT network definition
    //! @return true if setup was successful, false otherwise
    bool setupVisualOptimizationProfile(
        nvinfer1::IBuilder& builder, nvinfer1::IBuilderConfig& config, nvinfer1::INetworkDefinition const& network);

    //! Set up optimization profile for Qwen ViT models.
    //! Configures inputs for Qwen2-VL and Qwen2.5-VL visual encoders.
    //! @param profile Optimization profile to configure
    //! @param network TensorRT network definition for input analysis
    //! @return true if setup was successful, false otherwise
    bool setupQwenViTProfile(nvinfer1::IOptimizationProfile& profile, nvinfer1::INetworkDefinition const& network);

    //! Set up optimization profile for InternVL or Phi4-MM ViT models.
    //! Configures inputs for InternVL or Phi4-MM visual encoders.
    //! @param profile Optimization profile to configure
    //! @return true if setup was successful, false otherwise
    bool setupInternPhi4ViTProfile(nvinfer1::IOptimizationProfile& profile);

    //! Set up optimization profile for Nemotron-Omni RADIO ViT model.
    //! Configures input for RADIO vision encoder with dynamic tile batching.
    //! @param profile Optimization profile to configure
    //! @return true if setup was successful, false otherwise
    bool setupNemotronOmniViTProfile(nvinfer1::IOptimizationProfile& profile);

    //! Copy and save the model configuration with builder config.
    //! Creates a config.json file in the engine directory with both original model config
    //! and builder configuration parameters.
    //! @return true if copying was successful, false otherwise
    bool copyConfig();

    // Model dimensions extracted from config.json
    int64_t mNumChannels{0};   //!< Number of input channels
    int64_t mImageSizeH{0};    //!< Image height
    int64_t mImageSizeW{0};    //!< Image width
    int64_t mInputDim{0};      //!< Input dimension for Qwen models
    int64_t mRopeEmbedSize{0}; //!< Rotary position embedding size
    Json mModelConfig;         //!< Parsed model configuration
};

} // namespace builder
} // namespace trt_edgellm
