74 lines
2.9 KiB
Python
74 lines
2.9 KiB
Python
from typing import Optional, TYPE_CHECKING
|
|
|
|
from regex import Pattern
|
|
|
|
if TYPE_CHECKING:
|
|
from watfag.parsers.generic import WATFAG, Release
|
|
|
|
|
|
class DataParser:
|
|
"""Base class for all data parsers."""
|
|
def __init__(self, release: 'Release'):
|
|
self.release = release
|
|
self.priority = 50 # Default priority, can be overridden in subclasses
|
|
|
|
def __lt__(self, other):
|
|
return self.priority < other.priority
|
|
|
|
def parse(self) -> bool:
|
|
"""
|
|
Override this method in subclasses to implement the parsing logic.
|
|
Should return True if parsing was successful.
|
|
"""
|
|
raise NotImplementedError("Subclasses must implement the parse() method.")
|
|
|
|
|
|
|
|
class CheckParser(DataParser):
|
|
"""
|
|
Type of parser that checks many regex patterns and assigns a WATFAG enum value based on the first match found.
|
|
Will keep checking for redundant matches to remove all instances of the pattern,
|
|
but will only assign the WATFAG value once.
|
|
A remove_checks list can also be provided, which will remove any matches without assigning a WATFAG value.
|
|
This is useful for cases where a releaser might use a certain word that matches a WATFAG value,
|
|
but actually means something else and should not be scored as that WATFAG value.
|
|
"""
|
|
checks: dict[Pattern, 'WATFAG']
|
|
remove_checks: list[Pattern]
|
|
attribute_name: str # The name of the attribute to set on the release, e.g. "quality" or "source"
|
|
|
|
def __init__(self, release):
|
|
super().__init__(release)
|
|
self.default: Optional['WATFAG'] = None # Default value to assign if no matches are found, can be set in subclasses
|
|
|
|
def parse(self) -> bool:
|
|
parsed = False
|
|
|
|
for pattern, wf_value in self.checks.items():
|
|
found = False
|
|
while match := pattern.search(self.release.metadata_text):
|
|
found = True
|
|
if not hasattr(self.release, self.attribute_name) or getattr(self.release, self.attribute_name) is None:
|
|
setattr(self.release, self.attribute_name, wf_value)
|
|
|
|
self.release.metadata_text = (
|
|
self.release.metadata_text[:match.span()[0]] + self.release.metadata_text[match.span()[1]:]
|
|
)
|
|
self.release.metadata_text = self.release.metadata_text.strip() # Clean up extra spaces
|
|
|
|
if found:
|
|
parsed = True
|
|
|
|
for pattern in self.remove_checks:
|
|
while match := pattern.search(self.release.metadata_text):
|
|
self.release.metadata_text = (
|
|
self.release.metadata_text[:match.span()[0]] + self.release.metadata_text[match.span()[1]:]
|
|
)
|
|
self.release.metadata_text = self.release.metadata_text.strip() # Clean up extra spaces
|
|
|
|
if not parsed and self.default is not None:
|
|
setattr(self.release, self.attribute_name, self.default)
|
|
parsed = True
|
|
|
|
return parsed
|