import regex as re from watfag.parsers.generic import GenericParser from watfag.parsers.generic.watfag import Group from watfag.parsers.generic.parsers import DataParser patterns = [ re.compile(r"(?:-| - )(?P[a-zA-Z0-9 &]*)\)?$", re.UNICODE), re.compile(r"(?: )\[?(?P[a-zA-Z0-9]*?)]?\)?$", re.UNICODE) ] invalid_group_substrs = [ # lowercase-only matches that can be any substring of a group name ' hevc ', # Can appear at end of release name while not being a group ' x264 ', ' x265 ', ' truehd ', 'bluray' ] invalid_groups = [ # Case sensitive full group names that are invalid 'MP4' ] class GroupParser(DataParser, GenericParser): """Parses the SCENE release group from the unparsed text.""" def parse(self) -> bool: for pattern in patterns: match = pattern.search(self.release.metadata_text) if match: if any(substr in match.group("group").lower() for substr in invalid_group_substrs): continue # Skip this match if it contains any invalid substrings if match.group("group").strip() in invalid_groups: continue # Skip this match if it is in the list of invalid group names if len(match.group("group").strip()) < 2: continue # Skip groups that are too short to be valid group = match.group("group").strip() self.release.group = Group.from_string(group) self.release.group_name = group self.release.metadata_text = self.release.metadata_text[:match.span()[0]] + self.release.metadata_text[match.span()[1]:] self.release.metadata_text = re.sub(r"\s+", " ", self.release.metadata_text).strip() # Clean up extra spaces return True return False