Initial commit
This commit is contained in:
73
src/watfag/parsers/generic/parsers.py
Normal file
73
src/watfag/parsers/generic/parsers.py
Normal file
@@ -0,0 +1,73 @@
|
||||
from typing import Optional, TYPE_CHECKING
|
||||
|
||||
from regex import Pattern
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from parsers.generic import WATFAG, Release
|
||||
|
||||
|
||||
class DataParser:
|
||||
"""Base class for all data parsers."""
|
||||
def __init__(self, release: 'Release'):
|
||||
self.release = release
|
||||
self.priority = 50 # Default priority, can be overridden in subclasses
|
||||
|
||||
def __lt__(self, other):
|
||||
return self.priority < other.priority
|
||||
|
||||
def parse(self) -> bool:
|
||||
"""
|
||||
Override this method in subclasses to implement the parsing logic.
|
||||
Should return True if parsing was successful.
|
||||
"""
|
||||
raise NotImplementedError("Subclasses must implement the parse() method.")
|
||||
|
||||
|
||||
|
||||
class CheckParser(DataParser):
|
||||
"""
|
||||
Type of parser that checks many regex patterns and assigns a WATFAG enum value based on the first match found.
|
||||
Will keep checking for redundant matches to remove all instances of the pattern,
|
||||
but will only assign the WATFAG value once.
|
||||
A remove_checks list can also be provided, which will remove any matches without assigning a WATFAG value.
|
||||
This is useful for cases where a releaser might use a certain word that matches a WATFAG value,
|
||||
but actually means something else and should not be scored as that WATFAG value.
|
||||
"""
|
||||
checks: dict[Pattern, 'WATFAG']
|
||||
remove_checks: list[Pattern]
|
||||
attribute_name: str # The name of the attribute to set on the release, e.g. "quality" or "source"
|
||||
|
||||
def __init__(self, release):
|
||||
super().__init__(release)
|
||||
self.default: Optional['WATFAG'] = None # Default value to assign if no matches are found, can be set in subclasses
|
||||
|
||||
def parse(self) -> bool:
|
||||
parsed = False
|
||||
|
||||
for pattern, wf_value in self.checks.items():
|
||||
found = False
|
||||
while match := pattern.search(self.release.metadata_text):
|
||||
found = True
|
||||
if not hasattr(self.release, self.attribute_name) or getattr(self.release, self.attribute_name) is None:
|
||||
setattr(self.release, self.attribute_name, wf_value)
|
||||
|
||||
self.release.metadata_text = (
|
||||
self.release.metadata_text[:match.span()[0]] + self.release.metadata_text[match.span()[1]:]
|
||||
)
|
||||
self.release.metadata_text = self.release.metadata_text.strip() # Clean up extra spaces
|
||||
|
||||
if found:
|
||||
parsed = True
|
||||
|
||||
for pattern in self.remove_checks:
|
||||
while match := pattern.search(self.release.metadata_text):
|
||||
self.release.metadata_text = (
|
||||
self.release.metadata_text[:match.span()[0]] + self.release.metadata_text[match.span()[1]:]
|
||||
)
|
||||
self.release.metadata_text = self.release.metadata_text.strip() # Clean up extra spaces
|
||||
|
||||
if not parsed and self.default is not None:
|
||||
setattr(self.release, self.attribute_name, self.default)
|
||||
parsed = True
|
||||
|
||||
return parsed
|
||||
Reference in New Issue
Block a user