from typing import Optional, TYPE_CHECKING from regex import Pattern if TYPE_CHECKING: from watfag.parsers.generic import WATFAG, Release class DataParser: """Base class for all data parsers.""" def __init__(self, release: 'Release'): self.release = release self.priority = 50 # Default priority, can be overridden in subclasses def __lt__(self, other): return self.priority < other.priority def parse(self) -> bool: """ Override this method in subclasses to implement the parsing logic. Should return True if parsing was successful. """ raise NotImplementedError("Subclasses must implement the parse() method.") class CheckParser(DataParser): """ Type of parser that checks many regex patterns and assigns a WATFAG enum value based on the first match found. Will keep checking for redundant matches to remove all instances of the pattern, but will only assign the WATFAG value once. A remove_checks list can also be provided, which will remove any matches without assigning a WATFAG value. This is useful for cases where a releaser might use a certain word that matches a WATFAG value, but actually means something else and should not be scored as that WATFAG value. """ checks: dict[Pattern, 'WATFAG'] remove_checks: list[Pattern] attribute_name: str # The name of the attribute to set on the release, e.g. "quality" or "source" def __init__(self, release): super().__init__(release) self.default: Optional['WATFAG'] = None # Default value to assign if no matches are found, can be set in subclasses def parse(self) -> bool: parsed = False for pattern, wf_value in self.checks.items(): found = False while match := pattern.search(self.release.metadata_text): found = True if not hasattr(self.release, self.attribute_name) or getattr(self.release, self.attribute_name) is None: setattr(self.release, self.attribute_name, wf_value) self.release.metadata_text = ( self.release.metadata_text[:match.span()[0]] + self.release.metadata_text[match.span()[1]:] ) self.release.metadata_text = self.release.metadata_text.strip() # Clean up extra spaces if found: parsed = True for pattern in self.remove_checks: while match := pattern.search(self.release.metadata_text): self.release.metadata_text = ( self.release.metadata_text[:match.span()[0]] + self.release.metadata_text[match.span()[1]:] ) self.release.metadata_text = self.release.metadata_text.strip() # Clean up extra spaces if not parsed and self.default is not None: setattr(self.release, self.attribute_name, self.default) parsed = True return parsed