mirror of
https://github.com/Dictionarry-Hub/profilarr.git
synced 2026-01-22 19:01:02 +01:00
- Added RegexReplace class for handling regex replacements. - Created ReleaseGroupParser for extracting release groups from titles. - Developed TitleParser for parsing movie titles, including editions and IDs. - Introduced QualitySource, Resolution, QualityModifier enums and QualityResult class for quality metadata. - Set up Dockerfile and docker-compose for containerized deployment. - Implemented ASP.NET Core web API for parsing requests. - Added TypeScript client for interacting with the parser service. - Enhanced configuration to support dynamic parser service URL.
165 lines
11 KiB
C#
165 lines
11 KiB
C#
using System.Text.RegularExpressions;
|
|
|
|
namespace Parser.Core;
|
|
|
|
public static class LanguageParser
|
|
{
|
|
private static readonly Regex LanguageRegex = new(@"(?:\W|_|^)(?<english>\beng\b)|
|
|
(?<italian>\b(?:ita|italian)\b)|
|
|
(?<german>(?:swiss)?german\b|videomann|ger[. ]dub|\bger\b)|
|
|
(?<flemish>flemish)|
|
|
(?<bulgarian>bgaudio)|
|
|
(?<romanian>rodubbed)|
|
|
(?<brazilian>\b(dublado|pt-BR)\b)|
|
|
(?<greek>greek)|
|
|
(?<french>\b(?:FR|VO|VF|VFF|VFQ|VFI|VF2|TRUEFRENCH|FRENCH|FRE|FRA)\b)|
|
|
(?<russian>\b(?:rus|ru)\b)|
|
|
(?<hungarian>\b(?:HUNDUB|HUN)\b)|
|
|
(?<hebrew>\b(?:HebDub|HebDubbed)\b)|
|
|
(?<polish>\b(?:PL\W?DUB|DUB\W?PL|LEK\W?PL|PL\W?LEK)\b)|
|
|
(?<chinese>\[(?:CH[ST]|BIG5|GB)\]|简|繁|字幕)|
|
|
(?<ukrainian>(?:(?:\dx)?UKR))|
|
|
(?<spanish>\b(?:español|castellano)\b)|
|
|
(?<catalan>\b(?:catalan?|catalán|català)\b)|
|
|
(?<latvian>\b(?:lat|lav|lv)\b)|
|
|
(?<telugu>\btel\b)|
|
|
(?<vietnamese>\bVIE\b)|
|
|
(?<japanese>\bJAP\b)|
|
|
(?<korean>\bKOR\b)|
|
|
(?<urdu>\burdu\b)|
|
|
(?<romansh>\b(?:romansh|rumantsch|romansch)\b)|
|
|
(?<mongolian>\b(?:mongolian|khalkha)\b)|
|
|
(?<georgian>\b(?:georgian|geo|ka|kat)\b)|
|
|
(?<original>\b(?:orig|original)\b)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.IgnorePatternWhitespace);
|
|
|
|
private static readonly Regex CaseSensitiveLanguageRegex = new(@"(?:(?i)(?<!SUB[\W|_|^]))(?:(?<english>\bEN\b)|
|
|
(?<lithuanian>\bLT\b)|
|
|
(?<czech>\bCZ\b)|
|
|
(?<polish>\bPL\b)|
|
|
(?<bulgarian>\bBG\b)|
|
|
(?<slovak>\bSK\b)|
|
|
(?<german>\bDE\b)|
|
|
(?<spanish>\b(?<!DTS[._ -])ES\b))(?:(?i)(?![\W|_|^]SUB))",
|
|
RegexOptions.Compiled | RegexOptions.IgnorePatternWhitespace);
|
|
|
|
private static readonly Regex GermanDualLanguageRegex = new(@"(?<!WEB[-_. ]?)\bDL\b", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
|
private static readonly Regex GermanMultiLanguageRegex = new(@"\bML\b", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
|
|
|
public static List<Language> ParseLanguages(string title)
|
|
{
|
|
var lowerTitle = title.ToLower();
|
|
var languages = new List<Language>();
|
|
|
|
// Full word matches
|
|
if (lowerTitle.Contains("english")) languages.Add(Language.English);
|
|
if (lowerTitle.Contains("spanish")) languages.Add(Language.Spanish);
|
|
if (lowerTitle.Contains("danish")) languages.Add(Language.Danish);
|
|
if (lowerTitle.Contains("dutch")) languages.Add(Language.Dutch);
|
|
if (lowerTitle.Contains("japanese")) languages.Add(Language.Japanese);
|
|
if (lowerTitle.Contains("icelandic")) languages.Add(Language.Icelandic);
|
|
if (lowerTitle.Contains("mandarin") || lowerTitle.Contains("cantonese") || lowerTitle.Contains("chinese")) languages.Add(Language.Chinese);
|
|
if (lowerTitle.Contains("korean")) languages.Add(Language.Korean);
|
|
if (lowerTitle.Contains("russian")) languages.Add(Language.Russian);
|
|
if (lowerTitle.Contains("romanian")) languages.Add(Language.Romanian);
|
|
if (lowerTitle.Contains("hindi")) languages.Add(Language.Hindi);
|
|
if (lowerTitle.Contains("arabic")) languages.Add(Language.Arabic);
|
|
if (lowerTitle.Contains("thai")) languages.Add(Language.Thai);
|
|
if (lowerTitle.Contains("bulgarian")) languages.Add(Language.Bulgarian);
|
|
if (lowerTitle.Contains("polish")) languages.Add(Language.Polish);
|
|
if (lowerTitle.Contains("vietnamese")) languages.Add(Language.Vietnamese);
|
|
if (lowerTitle.Contains("swedish")) languages.Add(Language.Swedish);
|
|
if (lowerTitle.Contains("norwegian")) languages.Add(Language.Norwegian);
|
|
if (lowerTitle.Contains("finnish")) languages.Add(Language.Finnish);
|
|
if (lowerTitle.Contains("turkish")) languages.Add(Language.Turkish);
|
|
if (lowerTitle.Contains("portuguese")) languages.Add(Language.Portuguese);
|
|
if (lowerTitle.Contains("brazilian")) languages.Add(Language.PortugueseBR);
|
|
if (lowerTitle.Contains("hungarian")) languages.Add(Language.Hungarian);
|
|
if (lowerTitle.Contains("hebrew")) languages.Add(Language.Hebrew);
|
|
if (lowerTitle.Contains("ukrainian")) languages.Add(Language.Ukrainian);
|
|
if (lowerTitle.Contains("persian")) languages.Add(Language.Persian);
|
|
if (lowerTitle.Contains("bengali")) languages.Add(Language.Bengali);
|
|
if (lowerTitle.Contains("slovak")) languages.Add(Language.Slovak);
|
|
if (lowerTitle.Contains("latvian")) languages.Add(Language.Latvian);
|
|
if (lowerTitle.Contains("latino")) languages.Add(Language.SpanishLatino);
|
|
if (lowerTitle.Contains("tamil")) languages.Add(Language.Tamil);
|
|
if (lowerTitle.Contains("telugu")) languages.Add(Language.Telugu);
|
|
if (lowerTitle.Contains("malayalam")) languages.Add(Language.Malayalam);
|
|
if (lowerTitle.Contains("kannada")) languages.Add(Language.Kannada);
|
|
if (lowerTitle.Contains("albanian")) languages.Add(Language.Albanian);
|
|
if (lowerTitle.Contains("afrikaans")) languages.Add(Language.Afrikaans);
|
|
if (lowerTitle.Contains("marathi")) languages.Add(Language.Marathi);
|
|
if (lowerTitle.Contains("tagalog")) languages.Add(Language.Tagalog);
|
|
|
|
// Case-sensitive regex matches
|
|
var caseSensitiveMatches = CaseSensitiveLanguageRegex.Matches(title);
|
|
foreach (Match match in caseSensitiveMatches)
|
|
{
|
|
if (match.Groups["english"].Success) languages.Add(Language.English);
|
|
if (match.Groups["lithuanian"].Success) languages.Add(Language.Lithuanian);
|
|
if (match.Groups["czech"].Success) languages.Add(Language.Czech);
|
|
if (match.Groups["polish"].Success) languages.Add(Language.Polish);
|
|
if (match.Groups["bulgarian"].Success) languages.Add(Language.Bulgarian);
|
|
if (match.Groups["slovak"].Success) languages.Add(Language.Slovak);
|
|
if (match.Groups["spanish"].Success) languages.Add(Language.Spanish);
|
|
if (match.Groups["german"].Success) languages.Add(Language.German);
|
|
}
|
|
|
|
// Case-insensitive regex matches
|
|
var matches = LanguageRegex.Matches(title);
|
|
foreach (Match match in matches)
|
|
{
|
|
if (match.Groups["english"].Success) languages.Add(Language.English);
|
|
if (match.Groups["italian"].Success) languages.Add(Language.Italian);
|
|
if (match.Groups["german"].Success) languages.Add(Language.German);
|
|
if (match.Groups["flemish"].Success) languages.Add(Language.Flemish);
|
|
if (match.Groups["greek"].Success) languages.Add(Language.Greek);
|
|
if (match.Groups["french"].Success) languages.Add(Language.French);
|
|
if (match.Groups["russian"].Success) languages.Add(Language.Russian);
|
|
if (match.Groups["bulgarian"].Success) languages.Add(Language.Bulgarian);
|
|
if (match.Groups["brazilian"].Success) languages.Add(Language.PortugueseBR);
|
|
if (match.Groups["hungarian"].Success) languages.Add(Language.Hungarian);
|
|
if (match.Groups["hebrew"].Success) languages.Add(Language.Hebrew);
|
|
if (match.Groups["polish"].Success) languages.Add(Language.Polish);
|
|
if (match.Groups["chinese"].Success) languages.Add(Language.Chinese);
|
|
if (match.Groups["spanish"].Success) languages.Add(Language.Spanish);
|
|
if (match.Groups["catalan"].Success) languages.Add(Language.Catalan);
|
|
if (match.Groups["ukrainian"].Success) languages.Add(Language.Ukrainian);
|
|
if (match.Groups["latvian"].Success) languages.Add(Language.Latvian);
|
|
if (match.Groups["romanian"].Success) languages.Add(Language.Romanian);
|
|
if (match.Groups["telugu"].Success) languages.Add(Language.Telugu);
|
|
if (match.Groups["vietnamese"].Success) languages.Add(Language.Vietnamese);
|
|
if (match.Groups["japanese"].Success) languages.Add(Language.Japanese);
|
|
if (match.Groups["korean"].Success) languages.Add(Language.Korean);
|
|
if (match.Groups["urdu"].Success) languages.Add(Language.Urdu);
|
|
if (match.Groups["romansh"].Success) languages.Add(Language.Romansh);
|
|
if (match.Groups["mongolian"].Success) languages.Add(Language.Mongolian);
|
|
if (match.Groups["georgian"].Success) languages.Add(Language.Georgian);
|
|
if (match.Groups["original"].Success) languages.Add(Language.Original);
|
|
}
|
|
|
|
// Default to Unknown if no languages detected
|
|
if (languages.Count == 0)
|
|
{
|
|
languages.Add(Language.Unknown);
|
|
}
|
|
|
|
// German dual-language and multi-language handling
|
|
if (languages.Count == 1 && languages[0] == Language.German)
|
|
{
|
|
if (GermanDualLanguageRegex.IsMatch(title))
|
|
{
|
|
languages.Add(Language.Original);
|
|
}
|
|
else if (GermanMultiLanguageRegex.IsMatch(title))
|
|
{
|
|
languages.Add(Language.Original);
|
|
languages.Add(Language.English);
|
|
}
|
|
}
|
|
|
|
// Return distinct languages
|
|
return languages.Distinct().ToList();
|
|
}
|
|
}
|