Source code for accvlab.dali_pipeline_framework.internal_helpers.mini_parser.parser

# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


from .lexer import Token, TokenType, Lexer
from . import ast


[docs] class Parser: '''The actual parser. The input string must start with a variable name, followed by an assignment operator, followed by an expression. The expression is parsed recursively. The expression can contain variables, literals, and operators. The operators are: - Logical operators: ``or``, ``and``, ``not`` - Comparison operators: ``==``, ``!=``, ``>``, ``>=``, ``<``, ``<=`` - Parentheses: ``(`` and ``)`` - Unary minus: ``-`` - Assignment operator: ``=`` The syntax is similar to Python. However, note that - Only the operators defined above are supported. - Comparisons of more than two values are not supported (e.g. ``a < b < c`` is not supported). - Only numeric literals are supported. ``True`` and ``False`` are not supported (not needed in the current use case). Example: Some valid statements: - ``res_var = -_b1 < 10.5`` - ``res_1_var = -_b1 < 10.5 and -c > -20`` - ``res_3_var = not -_b1 < 10.5`` - ``res_4_var = (-_b1 < 10.5 or a_bool_var) and another_nool_var`` - ``res_5_var = (-_b1 < 10.5 or (-c > -20 and d == 10)) and another_var > 30`` - ``res_7_var = (-_b1 < 10.5 or (-c > -20 and d == 10)) and (another_var > 30 and _e < 40) and f > 50`` Args: input_str: Input string to parse. ''' _priority_map = { TokenType.LOGICAL_OR: 1, TokenType.LOGICAL_AND: 2, TokenType.COMPARISON: 3, TokenType.LOGICAL_NOT: 4, TokenType.MINUS: 4, TokenType.PARENTHESIS_OPEN: 5, } def __init__(self, input_str: str): self._lexer = Lexer(input_str) self._tokens = [] while True: token = self._lexer.next_token() self._tokens.append(token) if token.type == TokenType.EOL: break self._curr_token_idx = 0
[docs] def parse(self) -> ast.AST: '''Parses the input stream and returns an AST. See the class docstring for the syntax. Returns: The AST of the input stream. ''' var = self._curr_token() if var.type != TokenType.VARIABLE: raise ValueError( f"The condition must start with `<res_var_name> = ...` (replace `<res_var_name>` with the name of the variable to store the result in)" ) self._move_to_next_token() assignment = self._curr_token() if assignment.type != TokenType.ASSIGNMENT: raise ValueError( f"The condition must start with `<res_var_name> = ...` (replace `<res_var_name>` with the name of the variable to store the result in)" ) self._move_to_next_token() expression = self._parse_expression(0) res = ast.Assignment(ast.Variable(var.value), expression) return res
def _curr_token(self) -> Token: '''Returns the current token. Returns: The current token. ''' return self._tokens[self._curr_token_idx] def _curr_token_priority(self) -> int: '''Returns the priority of the current token. Returns: The priority of the current token. ''' return self._priority_map[self._curr_token().type] def _move_to_next_token(self): '''Move current token forward''' self._curr_token_idx += 1 def _parse_expression(self, priority: int) -> ast.AST: '''Parses the input stream and returns an AST. Args: priority: The priority of the expression to parse. ''' left = self._parse_prefix() end_token_types = (TokenType.EOL, TokenType.PARENTHESIS_CLOSE) while not self._curr_token().type in end_token_types and self._curr_token_priority() > priority: left = self._parse_infix(left, self._curr_token_priority()) return left def _parse_prefix(self) -> ast.AST: curr_token = self._curr_token() if curr_token.type in (TokenType.LOGICAL_NOT, TokenType.MINUS): priority = self._curr_token_priority() self._move_to_next_token() inner = self._parse_expression(priority) if curr_token.type == TokenType.LOGICAL_NOT: res = ast.Not(inner) elif curr_token.type == TokenType.MINUS: res = ast.UnaryMinus(inner) return res elif curr_token.type == TokenType.PARENTHESIS_OPEN: self._move_to_next_token() # Parse the inner expression. # We reset the priority to 0 because we are parsing the inner expression res = self._parse_expression(0) # The right parenthesis serves as the end token for the inner expression. # We do not advance it inside the inner expression (as it may need to serve as the end token # for multiple recursion levels). Instead, we advance it here. self._move_to_next_token() return res elif curr_token.type == TokenType.LITERAL or curr_token.type == TokenType.VARIABLE: if curr_token.type == TokenType.LITERAL: value = ast.Literal(curr_token.value) elif curr_token.type == TokenType.VARIABLE: value = ast.Variable(curr_token.value) self._move_to_next_token() return value else: raise ValueError(f"Invalid token: {curr_token}") def _parse_infix(self, left_value: ast.AST, priority: int) -> ast.AST: '''Parses an infix expression from the input stream and returns an AST. Args: left_value: The left value of the infix expression. priority: The priority of the infix expression. ''' combination = self._curr_token() self._move_to_next_token() right_value = self._parse_expression(priority) if combination.type == TokenType.LOGICAL_OR: res = ast.Or(left_value, right_value) elif combination.type == TokenType.LOGICAL_AND: res = ast.And(left_value, right_value) elif combination.type == TokenType.COMPARISON: res = ast.Comparison(left_value, combination.value, right_value) return res