Files
WATFAG/src/watfag/parsers/generic/parsers.py

74 lines
2.9 KiB
Python

from typing import Optional, TYPE_CHECKING
from regex import Pattern
if TYPE_CHECKING:
from watfag.parsers.generic import WATFAG, Release
class DataParser:
"""Base class for all data parsers."""
def __init__(self, release: 'Release'):
self.release = release
self.priority = 50 # Default priority, can be overridden in subclasses
def __lt__(self, other):
return self.priority < other.priority
def parse(self) -> bool:
"""
Override this method in subclasses to implement the parsing logic.
Should return True if parsing was successful.
"""
raise NotImplementedError("Subclasses must implement the parse() method.")
class CheckParser(DataParser):
"""
Type of parser that checks many regex patterns and assigns a WATFAG enum value based on the first match found.
Will keep checking for redundant matches to remove all instances of the pattern,
but will only assign the WATFAG value once.
A remove_checks list can also be provided, which will remove any matches without assigning a WATFAG value.
This is useful for cases where a releaser might use a certain word that matches a WATFAG value,
but actually means something else and should not be scored as that WATFAG value.
"""
checks: dict[Pattern, 'WATFAG']
remove_checks: list[Pattern]
attribute_name: str # The name of the attribute to set on the release, e.g. "quality" or "source"
def __init__(self, release):
super().__init__(release)
self.default: Optional['WATFAG'] = None # Default value to assign if no matches are found, can be set in subclasses
def parse(self) -> bool:
parsed = False
for pattern, wf_value in self.checks.items():
found = False
while match := pattern.search(self.release.metadata_text):
found = True
if not hasattr(self.release, self.attribute_name) or getattr(self.release, self.attribute_name) is None:
setattr(self.release, self.attribute_name, wf_value)
self.release.metadata_text = (
self.release.metadata_text[:match.span()[0]] + self.release.metadata_text[match.span()[1]:]
)
self.release.metadata_text = self.release.metadata_text.strip() # Clean up extra spaces
if found:
parsed = True
for pattern in self.remove_checks:
while match := pattern.search(self.release.metadata_text):
self.release.metadata_text = (
self.release.metadata_text[:match.span()[0]] + self.release.metadata_text[match.span()[1]:]
)
self.release.metadata_text = self.release.metadata_text.strip() # Clean up extra spaces
if not parsed and self.default is not None:
setattr(self.release, self.attribute_name, self.default)
parsed = True
return parsed