Source code for accvlab.dali_pipeline_framework.internal_helpers.mini_parser.lexer

# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.



[docs]
class TokenType:
    '''Represents the type of a token'''

    LITERAL = "literal"
    VARIABLE = "variable"
    ASSIGNMENT = "assignment"
    COMPARISON = "comparison"
    LOGICAL_OR = "logical_or"
    LOGICAL_AND = "logical_and"
    LOGICAL_NOT = "logical_not"
    MINUS = "minus"
    PARENTHESIS_OPEN = "parenthesis_open"
    PARENTHESIS_CLOSE = "parenthesis_close"
    EOL = "end_of_line"




[docs]
class Token:
    '''Represents a token in the input stream.

    Note:
        The constructor parameters become attributes of the class.

    Args:
        type: Type of the token.
        value: Value of the token.
    '''

    def __init__(self, type: TokenType, value: str):
        self.type = type
        self.value = value

    def __str__(self):
        return f"Token(type='{self.type}', value='{self.value}')"

    def __repr__(self):
        return self.__str__()




[docs]
class Lexer:
    '''Lexer for the simple parser.

    Args:
        input: Input string to generate tokens from.
    '''

    _keyword_map = {
        "or": TokenType.LOGICAL_OR,
        "and": TokenType.LOGICAL_AND,
        "not": TokenType.LOGICAL_NOT,
    }
    _comparison_map_2_char = {
        "==": TokenType.COMPARISON,
        "!=": TokenType.COMPARISON,
        ">=": TokenType.COMPARISON,
        "<=": TokenType.COMPARISON,
    }
    _comparison_map_1_char = {
        ">": TokenType.COMPARISON,
        "<": TokenType.COMPARISON,
    }
    _comparison_start_chars = {">", "<", "=", "!"}
    _assignment = {
        "=": TokenType.ASSIGNMENT,
    }

    _minus_map = {
        "-": TokenType.MINUS,
    }

    def __init__(self, input: str):
        self._input = input
        self._position = 0


[docs]
    def next_token(self) -> Token:
        '''Returns the next token in the input stream'''
        if self._position >= len(self._input):
            return Token(TokenType.EOL, "")
        self._skip_whitespaces()
        current_char = self._input[self._position]
        if self._is_digit_dot(current_char):
            return self._process_number()
        elif self._is_alpha_underscore(current_char):
            return self._process_string()
        else:
            return self._process_char()


    def _is_alpha_underscore(self, char: str) -> bool:
        '''Checks if a character is an alpha character or an underscore'''
        return char.isalpha() or char == "_"

    def _is_alpha_underscore_digit(self, char: str) -> bool:
        '''Checks if a character is an alpha character, an underscore, or a digit'''
        return self._is_alpha_underscore(char) or char.isdigit()

    def _is_digit_dot(self, char: str) -> bool:
        '''Checks if a character is a digit or a dot'''
        return char.isdigit() or char == "."

    def _skip_whitespaces(self):
        '''Skips whitespace in the input stream'''
        while self._position < len(self._input) and self._input[self._position].isspace():
            self._position += 1

    def _process_number(self) -> Token:
        '''Parses a number from the input stream'''
        start_pos = self._position
        while self._position < len(self._input) and self._is_digit_dot(self._input[self._position]):
            self._position += 1
        res_content = self._input[start_pos : self._position]
        if res_content.count(".") > 1:
            raise ValueError(f"Invalid number: {res_content}")
        return Token(TokenType.LITERAL, res_content)

    def _process_string(self) -> Token:
        '''Parses a string from the input stream'''

        def check_type(input: str) -> TokenType:
            if input in self._keyword_map:
                return self._keyword_map[input]
            return TokenType.VARIABLE

        if not self._is_alpha_underscore(self._input[self._position]):
            raise ValueError(
                f"Invalid identifier starting at position {self._position}: {self._input[self._position]}"
            )

        start_pos = self._position
        while self._position < len(self._input) and self._is_alpha_underscore_digit(
            self._input[self._position]
        ):
            self._position += 1
        token_str = self._input[start_pos : self._position]
        return Token(check_type(token_str), token_str)

    def _process_char(self) -> Token:
        '''Parses a single character from the input stream'''
        if self._input[self._position] in self._minus_map:
            return self._process_minus()
        if self._input[self._position] in self._comparison_start_chars:
            return self._process_assignment_and_comparison()
        else:
            return self._process_parenthesis()
            raise ValueError(f"Invalid character: {self._input[self._position]}")

    def _process_minus(self) -> Token:
        '''Parses a minus from the input stream'''
        self._position += 1
        return Token(TokenType.MINUS, "-")

    def _process_assignment_and_comparison(self) -> Token:
        '''Parses a comparison from the input stream'''
        if self._input[self._position : self._position + 2] in self._comparison_map_2_char:
            token_str = self._input[self._position : self._position + 2]
            self._position += 2
            return Token(self._comparison_map_2_char[token_str], token_str)
        elif self._input[self._position] in self._comparison_map_1_char:
            token_str = self._input[self._position]
            self._position += 1
            return Token(self._comparison_map_1_char[token_str], token_str)
        elif self._input[self._position] in self._assignment:
            token_str = self._input[self._position]
            self._position += 1
            return Token(self._assignment[token_str], token_str)
        else:
            raise ValueError(f"Invalid comparison operator: {self._input[self._position]}")

    def _process_parenthesis(self) -> Token:
        '''Parses a parenthesis from the input stream'''
        if self._input[self._position] == "(":
            self._position += 1
            return Token(TokenType.PARENTHESIS_OPEN, "(")
        elif self._input[self._position] == ")":
            self._position += 1
            return Token(TokenType.PARENTHESIS_CLOSE, ")")
        else:
            raise ValueError(f"Invalid parenthesis: {self._input[self._position]}")