47 lines
1.9 KiB
Python
47 lines
1.9 KiB
Python
import regex as re
|
|
|
|
from watfag.parsers.generic import GenericParser
|
|
from watfag.parsers.generic.watfag import Group
|
|
from watfag.parsers.generic.parsers import DataParser
|
|
|
|
patterns = [
|
|
re.compile(r"(?:-| - )(?P<group>[a-zA-Z0-9 &]*)\)?$", re.UNICODE),
|
|
re.compile(r"\[(?P<group>[a-zA-Z0-9 &]*)\]?$", re.UNICODE),
|
|
re.compile(r"(?: )\[?(?P<group>[a-zA-Z0-9]*?)]?\)?$", re.UNICODE)
|
|
]
|
|
|
|
invalid_group_substrs = [ # lowercase-only matches that can be any substring of a group name
|
|
' hevc ', # Can appear at end of release name while not being a group
|
|
' x264 ',
|
|
' x265 ',
|
|
' truehd ',
|
|
'bluray'
|
|
]
|
|
invalid_groups = [ # Case sensitive full group names that are invalid
|
|
'MP4'
|
|
]
|
|
|
|
class GroupParser(DataParser, GenericParser):
|
|
"""Parses the SCENE release group from the unparsed text."""
|
|
def parse(self) -> bool:
|
|
for pattern in patterns:
|
|
match = pattern.search(self.release.metadata_text)
|
|
if match:
|
|
if any(substr in match.group("group").lower() for substr in invalid_group_substrs):
|
|
continue # Skip this match if it contains any invalid substrings
|
|
|
|
if match.group("group").strip() in invalid_groups:
|
|
continue # Skip this match if it is in the list of invalid group names
|
|
|
|
if len(match.group("group").strip()) < 2:
|
|
continue # Skip groups that are too short to be valid
|
|
|
|
group = match.group("group").strip()
|
|
self.release.group = Group.from_string(group)
|
|
self.release.group_name = group
|
|
|
|
self.release.metadata_text = self.release.metadata_text[:match.span()[0]] + self.release.metadata_text[match.span()[1]:]
|
|
self.release.metadata_text = re.sub(r"\s+", " ", self.release.metadata_text).strip() # Clean up extra spaces
|
|
return True
|
|
return False
|