Initial commit
This commit is contained in:
45
src/watfag/parsers/generic/group.py
Normal file
45
src/watfag/parsers/generic/group.py
Normal file
@@ -0,0 +1,45 @@
|
||||
import regex as re
|
||||
|
||||
from parsers.generic import GenericParser
|
||||
from parsers.generic.watfag import Group
|
||||
from parsers.generic.parsers import DataParser
|
||||
|
||||
patterns = [
|
||||
re.compile(r"(?:-| - )(?P<group>[a-zA-Z0-9 &]*)\)?$", re.UNICODE),
|
||||
re.compile(r"(?: )\[?(?P<group>[a-zA-Z0-9]*?)]?\)?$", re.UNICODE)
|
||||
]
|
||||
|
||||
invalid_group_substrs = [ # lowercase-only matches that can be any substring of a group name
|
||||
' hevc ', # Can appear at end of release name while not being a group
|
||||
' x264 ',
|
||||
' x265 ',
|
||||
' truehd ',
|
||||
'bluray'
|
||||
]
|
||||
invalid_groups = [ # Case sensitive full group names that are invalid
|
||||
'MP4'
|
||||
]
|
||||
|
||||
class GroupParser(DataParser, GenericParser):
|
||||
"""Parses the SCENE release group from the unparsed text."""
|
||||
def parse(self) -> bool:
|
||||
for pattern in patterns:
|
||||
match = pattern.search(self.release.metadata_text)
|
||||
if match:
|
||||
if any(substr in match.group("group").lower() for substr in invalid_group_substrs):
|
||||
continue # Skip this match if it contains any invalid substrings
|
||||
|
||||
if match.group("group").strip() in invalid_groups:
|
||||
continue # Skip this match if it is in the list of invalid group names
|
||||
|
||||
if len(match.group("group").strip()) < 2:
|
||||
continue # Skip groups that are too short to be valid
|
||||
|
||||
group = match.group("group").strip()
|
||||
self.release.group = Group.from_string(group)
|
||||
self.release.group_name = group
|
||||
|
||||
self.release.metadata_text = self.release.metadata_text[:match.span()[0]] + self.release.metadata_text[match.span()[1]:]
|
||||
self.release.metadata_text = re.sub(r"\s+", " ", self.release.metadata_text).strip() # Clean up extra spaces
|
||||
return True
|
||||
return False
|
||||
Reference in New Issue
Block a user