third_party.pylibs.pylint.src/pylint/utils/pragma_parser.py
Pierre Sassoulas 9e0baf370a Simplify hard to maintain copyright notice
git is the source of truth for the copyright, copyrite (the tool)
was taking exponentially longer with each release, and it's
polluting the code with sometime as much as 50 lines of names.
2022-03-24 13:06:15 +01:00

132 lines
4.9 KiB
Python

# Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
# For details: https://github.com/PyCQA/pylint/blob/main/LICENSE
# Copyright (c) https://github.com/PyCQA/pylint/graphs/contributors
import re
from collections import namedtuple
from typing import Generator, List, Optional
# Allow stopping after the first semicolon/hash encountered,
# so that an option can be continued with the reasons
# why it is active or disabled.
OPTION_RGX = r"""
(?:^\s*\#.*|\s*| # Comment line, or whitespaces,
\s*\#.*(?=\#.*?\bpylint:)) # or a beginning of an inline comment
# followed by "pylint:" pragma
(\# # Beginning of comment
.*? # Anything (as little as possible)
\bpylint: # pylint word and column
\s* # Any number of whitespaces
([^;#\n]+)) # Anything except semicolon or hash or
# newline (it is the second matched group)
# and end of the first matched group
[;#]{0,1} # From 0 to 1 repetition of semicolon or hash
"""
OPTION_PO = re.compile(OPTION_RGX, re.VERBOSE)
PragmaRepresenter = namedtuple("PragmaRepresenter", "action messages")
ATOMIC_KEYWORDS = frozenset(("disable-all", "skip-file"))
MESSAGE_KEYWORDS = frozenset(
("disable-next", "disable-msg", "enable-msg", "disable", "enable")
)
# sorted is necessary because sets are unordered collections and ALL_KEYWORDS
# string should not vary between executions
# reverse is necessary in order to have the longest keywords first, so that, for example,
# 'disable' string should not be matched instead of 'disable-all'
ALL_KEYWORDS = "|".join(
sorted(ATOMIC_KEYWORDS | MESSAGE_KEYWORDS, key=len, reverse=True)
)
TOKEN_SPECIFICATION = [
("KEYWORD", rf"\b({ALL_KEYWORDS:s})\b"),
("MESSAGE_STRING", r"[0-9A-Za-z\-\_]{2,}"), # Identifiers
("ASSIGN", r"="), # Assignment operator
("MESSAGE_NUMBER", r"[CREIWF]{1}\d*"),
]
TOK_REGEX = "|".join(
f"(?P<{token_name:s}>{token_rgx:s})"
for token_name, token_rgx in TOKEN_SPECIFICATION
)
def emit_pragma_representer(action: str, messages: List[str]) -> PragmaRepresenter:
if not messages and action in MESSAGE_KEYWORDS:
raise InvalidPragmaError(
"The keyword is not followed by message identifier", action
)
return PragmaRepresenter(action, messages)
class PragmaParserError(Exception):
"""A class for exceptions thrown by pragma_parser module."""
def __init__(self, message: str, token: str) -> None:
""":args message: explain the reason why the exception has been thrown
:args token: token concerned by the exception
"""
self.message = message
self.token = token
super().__init__(self.message)
class UnRecognizedOptionError(PragmaParserError):
"""Thrown in case the of a valid but unrecognized option."""
class InvalidPragmaError(PragmaParserError):
"""Thrown in case the pragma is invalid."""
def parse_pragma(pylint_pragma: str) -> Generator[PragmaRepresenter, None, None]:
action: Optional[str] = None
messages: List[str] = []
assignment_required = False
previous_token = ""
for mo in re.finditer(TOK_REGEX, pylint_pragma):
kind = mo.lastgroup
value = mo.group()
if kind == "ASSIGN":
if not assignment_required:
if action:
# A keyword has been found previously but doesn't support assignment
raise UnRecognizedOptionError(
"The keyword doesn't support assignment", action
)
if previous_token:
# Something found previously but not a known keyword
raise UnRecognizedOptionError(
"The keyword is unknown", previous_token
)
# Nothing at all detected before this assignment
raise InvalidPragmaError("Missing keyword before assignment", "")
assignment_required = False
elif assignment_required:
raise InvalidPragmaError(
"The = sign is missing after the keyword", action or ""
)
elif kind == "KEYWORD":
if action:
yield emit_pragma_representer(action, messages)
action = value
messages = []
assignment_required = action in MESSAGE_KEYWORDS
elif kind in {"MESSAGE_STRING", "MESSAGE_NUMBER"}:
messages.append(value)
assignment_required = False
else:
raise RuntimeError("Token not recognized")
previous_token = value
if action:
yield emit_pragma_representer(action, messages)
else:
raise UnRecognizedOptionError("The keyword is unknown", previous_token)