Source code for accvlab.dali_pipeline_framework.internal_helpers.mini_parser.parser

# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


from .lexer import Token, TokenType, Lexer
from . import ast



[docs]
class Parser:
    '''The actual parser.

    The input string must start with a variable name, followed by an assignment operator, followed by an
    expression.

    The expression is parsed recursively. The expression can contain variables, literals, and operators.

    The operators are:
        - Logical operators: ``or``, ``and``, ``not``
        - Comparison operators: ``==``, ``!=``, ``>``, ``>=``, ``<``, ``<=``
        - Parentheses: ``(`` and ``)``
        - Unary minus: ``-``
        - Assignment operator: ``=``

    The syntax is similar to Python. However, note that
        - Only the operators defined above are supported.
        - Comparisons of more than two values are not supported (e.g. ``a < b < c`` is not supported).
        - Only numeric literals are supported. ``True`` and ``False`` are not supported (not needed in the current
          use case).

    Example:

        Some valid statements:
          - ``res_var = -_b1 < 10.5``
          - ``res_1_var = -_b1 < 10.5 and -c > -20``
          - ``res_3_var = not -_b1 < 10.5``
          - ``res_4_var = (-_b1 < 10.5 or a_bool_var) and another_nool_var``
          - ``res_5_var = (-_b1 < 10.5 or (-c > -20 and d == 10)) and another_var > 30``
          - ``res_7_var = (-_b1 < 10.5 or (-c > -20 and d == 10)) and (another_var > 30 and _e < 40) and f > 50``


    Args:
        input_str: Input string to parse.
    '''

    _priority_map = {
        TokenType.LOGICAL_OR: 1,
        TokenType.LOGICAL_AND: 2,
        TokenType.COMPARISON: 3,
        TokenType.LOGICAL_NOT: 4,
        TokenType.MINUS: 4,
        TokenType.PARENTHESIS_OPEN: 5,
    }

    def __init__(self, input_str: str):
        self._lexer = Lexer(input_str)
        self._tokens = []
        while True:
            token = self._lexer.next_token()
            self._tokens.append(token)
            if token.type == TokenType.EOL:
                break
        self._curr_token_idx = 0


[docs]
    def parse(self) -> ast.AST:
        '''Parses the input stream and returns an AST.

        See the class docstring for the syntax.

        Returns:
            The AST of the input stream.
        '''
        var = self._curr_token()
        if var.type != TokenType.VARIABLE:
            raise ValueError(
                f"The condition must start with `<res_var_name> = ...` (replace `<res_var_name>` with the name of the variable to store the result in)"
            )
        self._move_to_next_token()
        assignment = self._curr_token()
        if assignment.type != TokenType.ASSIGNMENT:
            raise ValueError(
                f"The condition must start with `<res_var_name> = ...` (replace `<res_var_name>` with the name of the variable to store the result in)"
            )
        self._move_to_next_token()
        expression = self._parse_expression(0)
        res = ast.Assignment(ast.Variable(var.value), expression)
        return res


    def _curr_token(self) -> Token:
        '''Returns the current token.

        Returns:
            The current token.
        '''
        return self._tokens[self._curr_token_idx]

    def _curr_token_priority(self) -> int:
        '''Returns the priority of the current token.

        Returns:
            The priority of the current token.
        '''
        return self._priority_map[self._curr_token().type]

    def _move_to_next_token(self):
        '''Move current token forward'''
        self._curr_token_idx += 1

    def _parse_expression(self, priority: int) -> ast.AST:
        '''Parses the input stream and returns an AST.

        Args:
            priority: The priority of the expression to parse.
        '''
        left = self._parse_prefix()
        end_token_types = (TokenType.EOL, TokenType.PARENTHESIS_CLOSE)
        while not self._curr_token().type in end_token_types and self._curr_token_priority() > priority:
            left = self._parse_infix(left, self._curr_token_priority())
        return left

    def _parse_prefix(self) -> ast.AST:
        curr_token = self._curr_token()
        if curr_token.type in (TokenType.LOGICAL_NOT, TokenType.MINUS):
            priority = self._curr_token_priority()
            self._move_to_next_token()
            inner = self._parse_expression(priority)
            if curr_token.type == TokenType.LOGICAL_NOT:
                res = ast.Not(inner)
            elif curr_token.type == TokenType.MINUS:
                res = ast.UnaryMinus(inner)
            return res
        elif curr_token.type == TokenType.PARENTHESIS_OPEN:
            self._move_to_next_token()
            # Parse the inner expression.
            # We reset the priority to 0 because we are parsing the inner expression
            res = self._parse_expression(0)
            # The right parenthesis serves as the end token for the inner expression.
            # We do not advance it inside the inner expression (as it may need to serve as the end token
            # for multiple recursion levels). Instead, we advance it here.
            self._move_to_next_token()
            return res
        elif curr_token.type == TokenType.LITERAL or curr_token.type == TokenType.VARIABLE:
            if curr_token.type == TokenType.LITERAL:
                value = ast.Literal(curr_token.value)
            elif curr_token.type == TokenType.VARIABLE:
                value = ast.Variable(curr_token.value)
            self._move_to_next_token()
            return value
        else:
            raise ValueError(f"Invalid token: {curr_token}")

    def _parse_infix(self, left_value: ast.AST, priority: int) -> ast.AST:
        '''Parses an infix expression from the input stream and returns an AST.

        Args:
            left_value: The left value of the infix expression.
            priority: The priority of the infix expression.
        '''
        combination = self._curr_token()
        self._move_to_next_token()
        right_value = self._parse_expression(priority)
        if combination.type == TokenType.LOGICAL_OR:
            res = ast.Or(left_value, right_value)
        elif combination.type == TokenType.LOGICAL_AND:
            res = ast.And(left_value, right_value)
        elif combination.type == TokenType.COMPARISON:
            res = ast.Comparison(left_value, combination.value, right_value)
        return res