From 5c26d6d7b2b7756fab9b97f904eb7a2043121ffa Mon Sep 17 00:00:00 2001 From: Sam Chau Date: Tue, 30 Dec 2025 10:33:52 +1030 Subject: [PATCH] feat(parser): implement C# parser microservice with regex-based title parsing - Added RegexReplace class for handling regex replacements. - Created ReleaseGroupParser for extracting release groups from titles. - Developed TitleParser for parsing movie titles, including editions and IDs. - Introduced QualitySource, Resolution, QualityModifier enums and QualityResult class for quality metadata. - Set up Dockerfile and docker-compose for containerized deployment. - Implemented ASP.NET Core web API for parsing requests. - Added TypeScript client for interacting with the parser service. - Enhanced configuration to support dynamic parser service URL. --- deno.json | 4 +- docs/PARSER_PORT_DESIGN.md | 293 +++++++++++ services/parser/Core/EpisodeParser.cs | 553 +++++++++++++++++++++ services/parser/Core/Language.cs | 64 +++ services/parser/Core/LanguageParser.cs | 164 ++++++ services/parser/Core/ParserCommon.cs | 55 ++ services/parser/Core/QualityParser.cs | 293 +++++++++++ services/parser/Core/RegexReplace.cs | 27 + services/parser/Core/ReleaseGroupParser.cs | 96 ++++ services/parser/Core/TitleParser.cs | 421 ++++++++++++++++ services/parser/Core/Types.cs | 52 ++ services/parser/Dockerfile | 17 + services/parser/Parser.csproj | 9 + services/parser/Program.cs | 142 ++++++ services/parser/docker-compose.yml | 14 + src/lib/server/utils/arr/parser/client.ts | 136 +++++ src/lib/server/utils/arr/parser/index.ts | 7 + src/lib/server/utils/arr/parser/types.ts | 154 ++++++ src/lib/server/utils/config/config.ts | 6 + 19 files changed, 2505 insertions(+), 2 deletions(-) create mode 100644 docs/PARSER_PORT_DESIGN.md create mode 100644 services/parser/Core/EpisodeParser.cs create mode 100644 services/parser/Core/Language.cs create mode 100644 services/parser/Core/LanguageParser.cs create mode 100644 services/parser/Core/ParserCommon.cs create mode 100644 services/parser/Core/QualityParser.cs create mode 100644 services/parser/Core/RegexReplace.cs create mode 100644 services/parser/Core/ReleaseGroupParser.cs create mode 100644 services/parser/Core/TitleParser.cs create mode 100644 services/parser/Core/Types.cs create mode 100644 services/parser/Dockerfile create mode 100644 services/parser/Parser.csproj create mode 100644 services/parser/Program.cs create mode 100644 services/parser/docker-compose.yml create mode 100644 src/lib/server/utils/arr/parser/client.ts create mode 100644 src/lib/server/utils/arr/parser/index.ts create mode 100644 src/lib/server/utils/arr/parser/types.ts diff --git a/deno.json b/deno.json index 6f28599..80019dd 100644 --- a/deno.json +++ b/deno.json @@ -23,10 +23,10 @@ "simple-icons": "npm:simple-icons@^15.17.0" }, "tasks": { - "dev": "APP_BASE_PATH=./dist/dev deno run -A npm:vite dev", + "dev": "APP_BASE_PATH=./dist/dev PARSER_HOST=localhost PARSER_PORT=5000 deno run -A npm:vite dev", "build": "APP_BASE_PATH=./dist/build deno run -A npm:vite build && deno compile --no-check --allow-net --allow-read --allow-write --allow-env --allow-ffi --allow-run --target x86_64-unknown-linux-gnu --output dist/linux/profilarr dist/build/mod.ts", "build:windows": "APP_BASE_PATH=./dist/build deno run -A npm:vite build && deno compile --no-check --allow-net --allow-read --allow-write --allow-env --allow-ffi --allow-run --target x86_64-pc-windows-msvc --output dist/windows/profilarr.exe dist/build/mod.ts", - "preview": "PORT=6868 APP_BASE_PATH=./dist/dev ./dist/linux/profilarr", + "preview": "PORT=6868 APP_BASE_PATH=./dist/dev PARSER_HOST=localhost PARSER_PORT=5000 ./dist/linux/profilarr", "format": "prettier --write .", "lint": "prettier --check . && eslint .", "test": "APP_BASE_PATH=./dist/test deno test src/tests --allow-read --allow-write --allow-env", diff --git a/docs/PARSER_PORT_DESIGN.md b/docs/PARSER_PORT_DESIGN.md new file mode 100644 index 0000000..7628ec2 --- /dev/null +++ b/docs/PARSER_PORT_DESIGN.md @@ -0,0 +1,293 @@ +# Unified Release Title Parser - C# Microservice + +Parser microservice for release title parsing, using native .NET regex for exact Radarr/Sonarr parity. + +--- + +## Goal + +Enable testing of custom format conditions against release titles without requiring a connected arr instance. Uses a C# microservice with regex patterns copied directly from Radarr/Sonarr source. + +--- + +## Architecture + +``` +┌─────────────────┐ HTTP ┌─────────────────────┐ +│ │ POST │ │ +│ Profilarr UI │ ───────────> │ Parser Service │ +│ (SvelteKit) │ /parse │ (C# / .NET 8) │ +│ │ <─────────── │ │ +└─────────────────┘ JSON └─────────────────────┘ +``` + +**Why C# microservice?** +- Native .NET regex - exact parity with Radarr/Sonarr +- Copy parser classes verbatim from source +- Fast (~1-5ms per parse) +- Easy to sync with upstream changes + +--- + +## Current Status + +### Completed (Phase 1-6) + +- [x] C# microservice scaffolded (`services/parser/`) +- [x] QualityParser ported from Radarr +- [x] TypeScript client in Profilarr +- [x] Config for `PARSER_HOST` / `PARSER_PORT` +- [x] LanguageParser ported from Radarr (58 languages supported) +- [x] ReleaseGroupParser ported from Radarr +- [x] TitleParser ported from Radarr (title, year, edition, IMDB/TMDB IDs) +- [x] EpisodeParser ported from Sonarr (ReleaseType, season/episode detection) + +### Remaining (Phase 7+) + +- [ ] Custom format testing UI integration + +--- + +## File Structure + +### C# Microservice + +``` +services/parser/ +├── Parser.csproj +├── Program.cs # Minimal API (POST /parse, GET /health) +├── Dockerfile +├── docker-compose.yml # Standalone docker compose +└── Core/ + ├── Types.cs # QualitySource, Resolution, QualityModifier enums + ├── Language.cs # Language enum (58 languages) + ├── RegexReplace.cs # Helper for regex replacement + ├── ParserCommon.cs # Shared regex patterns + ├── QualityParser.cs # Ported from Radarr (regex + decision tree) + ├── LanguageParser.cs # Ported from Radarr (language detection) + ├── ReleaseGroupParser.cs # Ported from Radarr (release group extraction) + ├── TitleParser.cs # Ported from Radarr (title, year, edition, IDs) + └── EpisodeParser.cs # Ported from Sonarr (season/episode, ReleaseType) +``` + +### TypeScript Client + +``` +src/lib/server/utils/arr/parser/ +├── index.ts # Exports +├── types.ts # Matching TypeScript enums +└── client.ts # HTTP client (uses config.parserUrl) +``` + +### Configuration + +``` +src/lib/server/utils/config/config.ts +``` + +Environment variables: +- `PARSER_HOST` (default: `localhost`) +- `PARSER_PORT` (default: `5000`) + +--- + +## API + +### POST /parse + +Request: +```json +{ "title": "Movie.Name.2024.1080p.BluRay.REMUX-GROUP" } +``` + +Response (movie): +```json +{ + "title": "Movie.Name.2024.1080p.BluRay.REMUX-GROUP", + "source": "Bluray", + "resolution": 1080, + "modifier": "Remux", + "revision": { + "version": 1, + "real": 0, + "isRepack": false + }, + "languages": ["Unknown"], + "releaseGroup": "GROUP", + "movieTitles": ["Movie Name"], + "year": 2024, + "edition": null, + "imdbId": null, + "tmdbId": 0, + "hardcodedSubs": null, + "releaseHash": null, + "episode": null +} +``` + +Response (TV series): +```json +{ + "title": "Show.Name.S01E05.Episode.Title.1080p.WEB-DL-GROUP", + "source": "WebDL", + "resolution": 1080, + "modifier": "None", + "revision": { "version": 1, "real": 0, "isRepack": false }, + "languages": ["Unknown"], + "releaseGroup": "GROUP", + "movieTitles": [], + "year": 0, + "edition": null, + "imdbId": null, + "tmdbId": 0, + "hardcodedSubs": null, + "releaseHash": null, + "episode": { + "seriesTitle": "Show Name", + "seasonNumber": 1, + "episodeNumbers": [5], + "absoluteEpisodeNumbers": [], + "airDate": null, + "fullSeason": false, + "isPartialSeason": false, + "isMultiSeason": false, + "isMiniSeries": false, + "special": false, + "releaseType": "SingleEpisode" + } +} +``` + +### GET /health + +Response: +```json +{ "status": "healthy" } +``` + +--- + +## Enums + +### QualitySource +```csharp +Unknown, Cam, Telesync, Telecine, Workprint, DVD, TV, WebDL, WebRip, Bluray +``` + +### Resolution +```csharp +Unknown = 0, R360p = 360, R480p = 480, R540p = 540, R576p = 576, +R720p = 720, R1080p = 1080, R2160p = 2160 +``` + +### QualityModifier +```csharp +None, Regional, Screener, RawHD, BRDisk, Remux +``` + +### ReleaseType +```csharp +Unknown, SingleEpisode, MultiEpisode, SeasonPack +``` + +### Language (58 supported) +```csharp +Unknown, English, French, Spanish, German, Italian, Danish, Dutch, Japanese, +Icelandic, Chinese, Russian, Polish, Vietnamese, Swedish, Norwegian, Finnish, +Turkish, Portuguese, Flemish, Greek, Korean, Hungarian, Hebrew, Lithuanian, +Czech, Hindi, Romanian, Thai, Bulgarian, PortugueseBR, Arabic, Ukrainian, +Persian, Bengali, Slovak, Latvian, SpanishLatino, Catalan, Croatian, Serbian, +Bosnian, Estonian, Tamil, Indonesian, Telugu, Macedonian, Slovenian, Malayalam, +Kannada, Albanian, Afrikaans, Marathi, Tagalog, Urdu, Romansh, Mongolian, +Georgian, Original +``` + +--- + +## Running the Service + +### Local Development + +```bash +cd services/parser +dotnet run +``` + +### Docker + +```bash +docker build -t profilarr-parser services/parser +docker run -p 5000:5000 profilarr-parser +``` + +### Docker Compose (standalone) + +```bash +cd services/parser +docker compose up -d +``` + +This uses the `services/parser/docker-compose.yml` which builds and runs the parser service. + +### Docker Compose (integrate with Profilarr) + +Add to your main docker-compose: + +```yaml +services: + parser: + build: ./services/parser + ports: + - "5000:5000" +``` + +Set in Profilarr environment: +``` +PARSER_HOST=parser +PARSER_PORT=5000 +``` + +--- + +## Source Reference + +Radarr parser source (cloned to `dist/parser-research/Radarr/`): + +| File | Purpose | Status | +|------|---------|--------| +| `QualityParser.cs` | Source, resolution, modifier detection | ✅ Ported | +| `LanguageParser.cs` | Language detection (58 languages) | ✅ Ported | +| `ReleaseGroupParser.cs` | Release group extraction | ✅ Ported | +| `Parser.cs` | Title/year/edition extraction | ✅ Ported | + +Sonarr additions (cloned to `dist/parser-research/Sonarr/`): + +| File | Purpose | Status | +|------|---------|--------| +| `Parser.cs` | Episode/season detection (40+ regex patterns) | ✅ Ported | +| `Model/ReleaseType.cs` | SingleEpisode, MultiEpisode, SeasonPack | ✅ Ported | +| `Model/ParsedEpisodeInfo.cs` | Episode info structure | ✅ Ported | + +--- + +## Next Steps + +1. **UI integration** - Custom format testing component + +--- + +## Maintenance + +To sync with upstream Radarr/Sonarr changes: + +```bash +cd dist/parser-research/Radarr +git pull +git diff HEAD~50 src/NzbDrone.Core/Parser/ + +cd dist/parser-research/Sonarr +git pull +git diff HEAD~50 src/NzbDrone.Core/Parser/ +``` + +Copy updated regex patterns and logic to `services/parser/Core/`. diff --git a/services/parser/Core/EpisodeParser.cs b/services/parser/Core/EpisodeParser.cs new file mode 100644 index 0000000..3d18745 --- /dev/null +++ b/services/parser/Core/EpisodeParser.cs @@ -0,0 +1,553 @@ +using System.Text.RegularExpressions; + +namespace Parser.Core; + +public enum ReleaseType +{ + Unknown = 0, + SingleEpisode = 1, + MultiEpisode = 2, + SeasonPack = 3 +} + +public class ParsedEpisodeInfo +{ + public string? SeriesTitle { get; set; } + public int SeasonNumber { get; set; } + public int[] EpisodeNumbers { get; set; } = Array.Empty(); + public int[] AbsoluteEpisodeNumbers { get; set; } = Array.Empty(); + public string? AirDate { get; set; } + public bool FullSeason { get; set; } + public bool IsPartialSeason { get; set; } + public bool IsMultiSeason { get; set; } + public bool IsSeasonExtra { get; set; } + public bool IsSplitEpisode { get; set; } + public bool IsMiniSeries { get; set; } + public bool Special { get; set; } + public int SeasonPart { get; set; } + public int? DailyPart { get; set; } + + public bool IsDaily => !string.IsNullOrWhiteSpace(AirDate); + public bool IsAbsoluteNumbering => AbsoluteEpisodeNumbers.Any(); + + public ReleaseType ReleaseType + { + get + { + if (EpisodeNumbers.Length > 1 || AbsoluteEpisodeNumbers.Length > 1) + { + return ReleaseType.MultiEpisode; + } + + if (EpisodeNumbers.Length == 1 || AbsoluteEpisodeNumbers.Length == 1) + { + return ReleaseType.SingleEpisode; + } + + if (FullSeason) + { + return ReleaseType.SeasonPack; + } + + return ReleaseType.Unknown; + } + } +} + +public static class EpisodeParser +{ + private static readonly Regex[] ReportTitleRegex = new[] + { + // Daily episode with year in series title and air time after date (Plex DVR format) + new Regex(@"^^(?.+?\((?<titleyear>\d{4})\))[-_. ]+(?<airyear>19[4-9]\d|20\d\d)(?<sep>[-_]?)(?<airmonth>0\d|1[0-2])\k<sep>(?<airday>[0-2]\d|3[01])[-_. ]\d{2}[-_. ]\d{2}[-_. ]\d{2}", + RegexOptions.IgnoreCase | RegexOptions.Compiled), + + // Daily episodes without title (2018-10-12, 20181012) + new Regex(@"^(?<airyear>19[6-9]\d|20\d\d)(?<sep>[-_]?)(?<airmonth>0\d|1[0-2])\k<sep>(?<airday>[0-2]\d|3[01])(?!\d)", + RegexOptions.IgnoreCase | RegexOptions.Compiled), + + // Multi-Part episodes without a title (S01E05.S01E06) + new Regex(@"^(?:\W*S(?<season>(?<!\d+)(?:\d{1,2}|\d{4})(?!\d+))(?:e{1,2}(?<episode>\d{1,3}(?!\d+)))+){2,}", + RegexOptions.IgnoreCase | RegexOptions.Compiled), + + // Multi-Part episodes without a title (1x05.1x06) + new Regex(@"^(?:\W*(?<season>(?<!\d+)(?:\d{1,2}|\d{4})(?!\d+))(?:x{1,2}(?<episode>\d{1,3}(?!\d+)))+){2,}", + RegexOptions.IgnoreCase | RegexOptions.Compiled), + + // Episodes without a title, Multi (S01E04E05, 1x04x05, etc) + new Regex(@"^(?:S?(?<season>(?<!\d+)(?:\d{1,2}|\d{4})(?!\d+))(?:(?:[-_]|[ex]){1,2}(?<episode>\d{2,3}(?!\d+))){2,})", + RegexOptions.IgnoreCase | RegexOptions.Compiled), + + // Split episodes (S01E05a, S01E05b, etc) + new Regex(@"^(?<title>.+?)(?:S?(?<season>(?<!\d+)(?:\d{1,2}|\d{4})(?!\d+))(?:(?:[-_ ]?[ex])(?<episode>\d{2,3}(?!\d+))(?<splitepisode>[a-d])(?:[ _.])))", + RegexOptions.IgnoreCase | RegexOptions.Compiled), + + // Episodes without a title, Single (S01E05, 1x05) + new Regex(@"^(?:S?(?<season>(?<!\d+)(?:\d{1,2}|\d{4})(?!\d+))(?:(?:[-_ ]?[ex])(?<episode>\d{2,3}(?!\d+))))", + RegexOptions.IgnoreCase | RegexOptions.Compiled), + + // Anime - [SubGroup] Title Absolute (Season+Episode) + new Regex(@"^(?:\[(?<subgroup>.+?)\](?:_|-|\s|\.)?)(?<title>.+?)[-_. ]+(?<absoluteepisode>(?<!\d+)\d{2,3}(\.\d{1,2})?(?!\d+))(?:[-_. ])+\((?:S(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:[ex]|\W[ex]){1,2}(?<episode>\d{2}(?!\d+))))(?:v\d+)?(?:\)(?!\d+)).*?(?<hash>[(\[]\w{8}[)\]])?$", + RegexOptions.IgnoreCase | RegexOptions.Compiled), + + // Anime - [SubGroup] Title Season+Episode + new Regex(@"^(?:\[(?<subgroup>.+?)\](?:_|-|\s|\.)?)(?<title>.+?)(?:[-_\W](?<![()\[!]))+(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:[ex]|\W[ex]){1,2}(?<episode>\d{2}(?!\d+)))+)(?:v\d+)?(?:[_. ](?!\d+)).*?(?<hash>[(\[]\w{8}[)\]])?$", + RegexOptions.IgnoreCase | RegexOptions.Compiled), + + // Anime - [SubGroup] Title Episode Absolute Episode Number + new Regex(@"^(?:\[(?<subgroup>.+?)\][-_. ]?)(?<title>.+?)[-_. ]+?(?:Episode)(?:[-_. ]+(?<absoluteepisode>(?<!\d+)\d{2,3}(\.\d{1,2})?(?!\d+)))+.*?(?<hash>[(\[]\w{8}[)\]])?$", + RegexOptions.IgnoreCase | RegexOptions.Compiled), + + // Anime - [SubGroup] Title with trailing number Absolute Episode Number + new Regex(@"^\[(?<subgroup>.+?)\][-_. ]?(?<title>[^-]+?)(?:(?<![-_. ]|\b[0]\d+) - )(?:[-_. ]?(?<absoluteepisode>\d{2,3}(\.\d{1,2})?(?!\d+)))+(?:[-_. ]+(?<special>special|ova|ovd))?.*?(?<hash>[(\[]\w{8}[)\]])?(?:$|\.mkv)", + RegexOptions.IgnoreCase | RegexOptions.Compiled), + + // Anime - [SubGroup] Title Absolute Episode Number + new Regex(@"^\[(?<subgroup>.+?)\][-_. ]?(?<title>.+?)[-_. ]+\(?(?:[-_. ]?#?(?<absoluteepisode>\d{2,3}(\.\d{1,2})?(?!\d+|-[a-z]+)))+\)?(?:[-_. ]+(?<special>special|ova|ovd))?.*?(?<hash>[(\[]\w{8}[)\]])?(?:$|\.mkv)", + RegexOptions.IgnoreCase | RegexOptions.Compiled), + + // Multi-episode Repeated (S01E05 - S01E06) + new Regex(@"^(?<title>.+?)(?:(?:[-_\W](?<![()\[!]))+S(?<season>(?<!\d+)(?:\d{1,2}|\d{4})(?!\d+))(?:(?:e|[-_. ]e){1,2}(?<episode>\d{1,3}(?!\d+)))+){2,}", + RegexOptions.IgnoreCase | RegexOptions.Compiled), + + // Multi-episode Repeated (1x05 - 1x06) + new Regex(@"^(?<title>.+?)(?:(?:[-_\W](?<![()\[!]))+(?<season>(?<!\d+)(?:\d{1,2}|\d{4})(?!\d+))(?:x{1,2}(?<episode>\d{1,3}(?!\d+)))+){2,}", + RegexOptions.IgnoreCase | RegexOptions.Compiled), + + // Multi-episode with title (S01E99-100, S01E05-06) + new Regex(@"^(?<title>.+?)(?:[-_\W](?<![()\[!]))+S(?<season>(?<!\d+)(?:\d{1,2})(?!\d+))E(?<episode>\d{2,3}(?!\d+))(?:-(?<episode>\d{2,3}(?!\d+)))+(?:[-_. ]|$)", + RegexOptions.IgnoreCase | RegexOptions.Compiled), + + // Episodes with a title, Single episodes (S01E05, 1x05, etc) & Multi-episode (S01E05E06, S01E05-06, etc) + new Regex(@"^(?<title>.+?)(?:(?:[-_\W](?<![()\[!]))+S?(?<season>(?<!\d+)(?:\d{1,2})(?!\d+))(?:[ex]|\W[ex]){1,2}(?<episode>\d{2,3}(?!\d+))(?:(?:\-|[ex]|\W[ex]|_){1,2}(?<episode>\d{2,3}(?!\d+)))*)(?:[-_. ]|$)", + RegexOptions.IgnoreCase | RegexOptions.Compiled), + + // Episodes with a title, 4 digit season number (S2016E05, etc) + new Regex(@"^(?<title>.+?)(?:(?:[-_\W](?<![()\[!]))+S(?<season>(?<!\d+)(?:\d{4})(?!\d+))(?:e|\We|_){1,2}(?<episode>\d{2,4}(?!\d+))(?:(?:\-|e|\We|_){1,2}(?<episode>\d{2,3}(?!\d+)))*)\W?(?!\\)", + RegexOptions.IgnoreCase | RegexOptions.Compiled), + + // Multi-season pack + new Regex(@"^(?<title>.+?)(Complete Series)?[-_. ]+(?:S|(?:Season|Saison|Series|Stagione)[_. ])(?<season>(?<!\d+)(?:\d{1,2})(?!\d+))(?:[-_. ]{1}|[-_. ]{3})(?:S|(?:Season|Saison|Series|Stagione)[_. ])?(?<season>(?<!\d+)(?:\d{1,2})(?!\d+))", + RegexOptions.IgnoreCase | RegexOptions.Compiled), + + // Partial season pack + new Regex(@"^(?<title>.+?)(?:\W+S(?<season>(?<!\d+)(?:\d{1,2})(?!\d+))\W+(?:(?:(?:Part|Vol)\W?|(?<!\d+\W+)e|p)(?<seasonpart>\d{1,2}(?!\d+)))+)", + RegexOptions.IgnoreCase | RegexOptions.Compiled), + + // Season only releases followed by year + new Regex(@"^(?<title>.+?)[-_. ]+?(?:S|Season|Saison|Series|Stagione)[-_. ]?(?<season>\d{1,2}(?=[-_. ]\d{4}[-_. ]+))(?<extras>EXTRAS|SUBPACK)?(?!\\)", + RegexOptions.IgnoreCase | RegexOptions.Compiled), + + // Season only releases + new Regex(@"^(?<title>.+?)[-_. ]+?(?:S|Season|Saison|Series|Stagione)[-_. ]?(?<season>\d{1,2}(?![-_. ]?\d+))(?:[-_. ]|$)+(?<extras>EXTRAS|SUBPACK)?(?!\\)", + RegexOptions.IgnoreCase | RegexOptions.Compiled), + + // 4 digit season only releases + new Regex(@"^(?<title>.+?)[-_. ]+?(?:S|Season|Saison|Series|Stagione)[-_. ]?(?<season>\d{4}(?![-_. ]?\d+))(\W+|_|$)(?<extras>EXTRAS|SUBPACK)?(?!\\)", + RegexOptions.IgnoreCase | RegexOptions.Compiled), + + // Mini-Series with year in title (Part01, Part 01, Part.1) + new Regex(@"^(?<title>.+?\d{4})(?:\W+(?:(?:Part\W?|e)(?<episode>\d{1,2}(?!\d+)))+)", + RegexOptions.IgnoreCase | RegexOptions.Compiled), + + // Mini-Series (E1-E2) + new Regex(@"^(?<title>.+?)(?:[-._ ][e])(?<episode>\d{2,3}(?!\d+))(?:(?:\-?[e])(?<episode>\d{2,3}(?!\d+)))+", + RegexOptions.IgnoreCase | RegexOptions.Compiled), + + // Episodes with airdate (2018.04.28) + new Regex(@"^(?<title>.+?)?\W*(?<airyear>\d{4})[-_. ]+(?<airmonth>[0-1][0-9])[-_. ]+(?<airday>[0-3][0-9])(?![-_. ]+[0-3][0-9])", + RegexOptions.IgnoreCase | RegexOptions.Compiled), + + // Episodes with airdate (04.28.2018) + new Regex(@"^(?<title>.+?)?\W*(?<ambiguousairmonth>[0-1][0-9])[-_. ]+(?<ambiguousairday>[0-3][0-9])[-_. ]+(?<airyear>\d{4})(?!\d+)", + RegexOptions.IgnoreCase | RegexOptions.Compiled), + + // Episodes with airdate (20180428) + new Regex(@"^(?<title>.+?)?\W*(?<!\d+)(?<airyear>\d{4})(?<airmonth>[0-1][0-9])(?<airday>[0-3][0-9])(?!\d+)", + RegexOptions.IgnoreCase | RegexOptions.Compiled), + + // Mini-Series (Part01, Part 01, Part.1) + new Regex(@"^(?<title>.+?)(?:\W+(?:(?:(?<!\()Part\W?|(?<!\d+\W+)e)(?<episode>\d{1,2}(?!\d+|\))))+)", + RegexOptions.IgnoreCase | RegexOptions.Compiled), + + // Mini-Series (Part One/Two/Three/...Nine) + new Regex(@"^(?<title>.+?)(?:\W+(?:Part[-._ ](?<episode>One|Two|Three|Four|Five|Six|Seven|Eight|Nine)(?>[-._ ])))", + RegexOptions.IgnoreCase | RegexOptions.Compiled), + + // Mini-Series (XofY) + new Regex(@"^(?<title>.+?)(?:\W+(?:(?<episode>(?<!\d+)\d{1,2}(?!\d+))of\d+)+)", + RegexOptions.IgnoreCase | RegexOptions.Compiled), + + // Supports Season 01 Episode 03 + new Regex(@"(?:.*(?:\""|^))(?<title>.*?)(?:[-_\W](?<![()\[]))+(?:\W?Season\W?)(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:\W|_)+(?:Episode\W)(?:[-_. ]?(?<episode>(?<!\d+)\d{1,2}(?!\d+)))+", + RegexOptions.IgnoreCase | RegexOptions.Compiled), + + // Multi-episode with single episode numbers (S6.E1-E2, S6.E1E2, S6E1E2, etc) + new Regex(@"^(?<title>.+?)[-_. ]S(?<season>(?<!\d+)(?:\d{1,2}|\d{4})(?!\d+))(?:[-_. ]?[ex]?(?<episode>(?<!\d+)\d{1,2}(?!\d+)))+", + RegexOptions.IgnoreCase | RegexOptions.Compiled), + + // Single episode season or episode S1E1 or S1-E1 or S1.Ep1 + new Regex(@"(?:.*(?:\""|^))(?<title>.*?)(?:\W?|_)S(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:\W|_)?Ep?[ ._]?(?<episode>(?<!\d+)\d{1,2}(?!\d+))", + RegexOptions.IgnoreCase | RegexOptions.Compiled), + + // Supports 103/113 naming + new Regex(@"^(?<title>.+?)?(?:(?:[_.-](?<![()\[!]))+(?<season>(?<!\d+)[1-9])(?<episode>[1-9][0-9]|[0][1-9])(?![a-z]|\d+))+(?:[_.]|$)", + RegexOptions.IgnoreCase | RegexOptions.Compiled), + + // Anime - Title Absolute Episode Number + new Regex(@"^(?:\[(?<subgroup>.+?)\][-_. ]?)?(?<title>.+?)(?:[-_. ]+(?<absoluteepisode>(?<!\d+)\d{2,4}(\.\d{1,2})?(?!\d+|[ip])))+.*?(?<hash>[(\[]\w{8}[)\]])?$", + RegexOptions.IgnoreCase | RegexOptions.Compiled), + + // Anime OVA special + new Regex(@"^\[(?<subgroup>.+?)\][-_. ]?(?<title>.+?)(?:[-_. ]+(?<special>special|ova|ovd)).*?(?<hash>[(\[]\w{8}[)\]])?(?:$|\.mkv)", + RegexOptions.IgnoreCase | RegexOptions.Compiled) + }; + + private static readonly Regex[] RejectHashedReleasesRegex = new Regex[] + { + new Regex(@"^[0-9a-zA-Z]{32}", RegexOptions.Compiled), + new Regex(@"^[a-z0-9]{24}$", RegexOptions.Compiled), + new Regex(@"^[A-Z]{11}\d{3}$", RegexOptions.Compiled), + new Regex(@"^[a-z]{12}\d{3}$", RegexOptions.Compiled), + new Regex(@"^Backup_\d{5,}S\d{2}-\d{2}$", RegexOptions.Compiled), + new Regex(@"^123$", RegexOptions.Compiled), + new Regex(@"^abc$", RegexOptions.Compiled | RegexOptions.IgnoreCase), + new Regex(@"^abc[-_. ]xyz", RegexOptions.Compiled | RegexOptions.IgnoreCase), + new Regex(@"^b00bs$", RegexOptions.Compiled | RegexOptions.IgnoreCase) + }; + + private static readonly Regex ReversedTitleRegex = new( + @"(?:^|[-._ ])(p027|p0801|\d{2,3}E\d{2}S)[-._ ]", + RegexOptions.Compiled); + + private static readonly RegexReplace SimpleTitleRegex = new( + @"(?:(480|540|576|720|1080|1440|2160)[ip]|[xh][\W_]?26[45]|DD\W?5\W1|[<>?*]|848x480|1280x720|1920x1080|3840x2160|4096x2160|(?<![a-f0-9])(8|10)[ -]?(b(?![a-z0-9])|bit))\s*?", + string.Empty, + RegexOptions.IgnoreCase | RegexOptions.Compiled); + + private static readonly Regex CleanQualityBracketsRegex = new( + @"\[[a-z0-9 ._-]+\]$", + RegexOptions.IgnoreCase | RegexOptions.Compiled); + + private static readonly Regex SixDigitAirDateRegex = new( + @"(?<=[_.-])(?<airdate>(?<!\d)(?<airyear>[1-9]\d{1})(?<airmonth>[0-1][0-9])(?<airday>[0-3][0-9]))(?=[_.-])", + RegexOptions.IgnoreCase | RegexOptions.Compiled); + + private static readonly Regex RequestInfoRegex = new( + @"^(?:\[.+?\])+", + RegexOptions.Compiled); + + private static readonly string[] Numbers = { "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine" }; + + public static ParsedEpisodeInfo? ParseTitle(string title) + { + try + { + if (!ValidateBeforeParsing(title)) + { + return null; + } + + if (ReversedTitleRegex.IsMatch(title)) + { + var titleWithoutExtension = ParserCommon.RemoveFileExtension(title).ToCharArray(); + Array.Reverse(titleWithoutExtension); + title = $"{new string(titleWithoutExtension)}{title.Substring(titleWithoutExtension.Length)}"; + } + + var releaseTitle = ParserCommon.RemoveFileExtension(title); + releaseTitle = releaseTitle.Replace("【", "[").Replace("】", "]"); + + foreach (var replace in ParserCommon.PreSubstitutionRegex) + { + if (replace.TryReplace(ref releaseTitle)) + { + break; + } + } + + var simpleTitle = SimpleTitleRegex.Replace(releaseTitle); + simpleTitle = ParserCommon.WebsitePrefixRegex.Replace(simpleTitle); + simpleTitle = ParserCommon.WebsitePostfixRegex.Replace(simpleTitle); + simpleTitle = ParserCommon.CleanTorrentSuffixRegex.Replace(simpleTitle); + simpleTitle = CleanQualityBracketsRegex.Replace(simpleTitle, string.Empty); + + // Handle 6-digit air dates (YYMMDD) + var sixDigitAirDateMatch = SixDigitAirDateRegex.Match(simpleTitle); + if (sixDigitAirDateMatch.Success) + { + var airYear = sixDigitAirDateMatch.Groups["airyear"].Value; + var airMonth = sixDigitAirDateMatch.Groups["airmonth"].Value; + var airDay = sixDigitAirDateMatch.Groups["airday"].Value; + + if (airMonth != "00" || airDay != "00") + { + var fixedDate = $"20{airYear}.{airMonth}.{airDay}"; + simpleTitle = simpleTitle.Replace(sixDigitAirDateMatch.Groups["airdate"].Value, fixedDate); + } + } + + foreach (var regex in ReportTitleRegex) + { + var match = regex.Matches(simpleTitle); + + if (match.Count != 0) + { + var result = ParseMatchCollection(match, releaseTitle); + if (result != null) + { + if (result.FullSeason && releaseTitle.Contains("Special", StringComparison.OrdinalIgnoreCase)) + { + result.FullSeason = false; + result.Special = true; + } + return result; + } + } + } + } + catch + { + // Parsing failed + } + + return null; + } + + private static ParsedEpisodeInfo? ParseMatchCollection(MatchCollection matchCollection, string releaseTitle) + { + var seriesName = matchCollection[0].Groups["title"].Value.Replace('.', ' ').Replace('_', ' '); + seriesName = RequestInfoRegex.Replace(seriesName, "").Trim(' '); + + int.TryParse(matchCollection[0].Groups["airyear"].Value, out var airYear); + + ParsedEpisodeInfo result; + + if (airYear < 1900) + { + result = new ParsedEpisodeInfo + { + EpisodeNumbers = Array.Empty<int>(), + AbsoluteEpisodeNumbers = Array.Empty<int>() + }; + + foreach (Match matchGroup in matchCollection) + { + var episodeCaptures = matchGroup.Groups["episode"].Captures.Cast<Capture>().ToList(); + var absoluteEpisodeCaptures = matchGroup.Groups["absoluteepisode"].Captures.Cast<Capture>().ToList(); + + if (episodeCaptures.Any()) + { + var first = ParseNumber(episodeCaptures.First().Value); + var last = ParseNumber(episodeCaptures.Last().Value); + + if (first > last) + { + return null; + } + + var count = last - first + 1; + result.EpisodeNumbers = Enumerable.Range(first, count).ToArray(); + + if (matchGroup.Groups["special"].Success) + { + result.Special = true; + } + + if (matchGroup.Groups["splitepisode"].Success) + { + result.IsSplitEpisode = true; + } + } + + if (absoluteEpisodeCaptures.Any()) + { + var first = ParseDecimal(absoluteEpisodeCaptures.First().Value); + var last = ParseDecimal(absoluteEpisodeCaptures.Last().Value); + + if (first > last) + { + return null; + } + + if ((first % 1) != 0 || (last % 1) != 0) + { + // Special episode with decimal + result.Special = true; + } + else + { + var count = last - first + 1; + result.AbsoluteEpisodeNumbers = Enumerable.Range((int)first, (int)count).ToArray(); + + if (matchGroup.Groups["special"].Success) + { + result.Special = true; + } + } + } + + if (!episodeCaptures.Any() && !absoluteEpisodeCaptures.Any()) + { + if (!string.IsNullOrWhiteSpace(matchCollection[0].Groups["extras"].Value)) + { + result.IsSeasonExtra = true; + } + + var seasonPart = matchCollection[0].Groups["seasonpart"].Value; + + if (!string.IsNullOrWhiteSpace(seasonPart)) + { + result.SeasonPart = Convert.ToInt32(seasonPart); + result.IsPartialSeason = true; + } + else if (matchCollection[0].Groups["special"].Success) + { + result.Special = true; + } + else + { + result.FullSeason = true; + } + } + } + + var seasons = new List<int>(); + + foreach (Capture seasonCapture in matchCollection[0].Groups["season"].Captures) + { + if (int.TryParse(seasonCapture.Value, out var parsedSeason)) + { + seasons.Add(parsedSeason); + } + } + + if (seasons.Distinct().Count() > 1) + { + result.IsMultiSeason = true; + } + + if (seasons.Any()) + { + result.SeasonNumber = seasons.First(); + } + else if (!result.AbsoluteEpisodeNumbers.Any() && result.EpisodeNumbers.Any()) + { + result.SeasonNumber = 1; + result.IsMiniSeries = true; + } + } + else + { + // Daily show with air date + var airmonth = 0; + var airday = 0; + + if (matchCollection[0].Groups["ambiguousairmonth"].Success && + matchCollection[0].Groups["ambiguousairday"].Success) + { + var ambiguousAirMonth = Convert.ToInt32(matchCollection[0].Groups["ambiguousairmonth"].Value); + var ambiguousAirDay = Convert.ToInt32(matchCollection[0].Groups["ambiguousairday"].Value); + + if (ambiguousAirDay <= 12 && ambiguousAirMonth <= 12) + { + // Ambiguous date + return null; + } + + airmonth = ambiguousAirMonth; + airday = ambiguousAirDay; + } + else + { + airmonth = Convert.ToInt32(matchCollection[0].Groups["airmonth"].Value); + airday = Convert.ToInt32(matchCollection[0].Groups["airday"].Value); + } + + // Swap day and month if month is bigger than 12 + if (airmonth > 12) + { + (airday, airmonth) = (airmonth, airday); + } + + try + { + var airDate = new DateTime(airYear, airmonth, airday); + + if (airDate > DateTime.Now.AddDays(1).Date || airDate < new DateTime(1970, 1, 1)) + { + return null; + } + + result = new ParsedEpisodeInfo + { + AirDate = airDate.ToString("yyyy-MM-dd") + }; + + var partMatch = matchCollection[0].Groups["part"]; + if (partMatch.Success) + { + result.DailyPart = Convert.ToInt32(partMatch.Value); + } + } + catch + { + return null; + } + } + + result.SeriesTitle = seriesName; + + return result; + } + + private static bool ValidateBeforeParsing(string title) + { + if (title.ToLower().Contains("password") && title.ToLower().Contains("yenc")) + { + return false; + } + + if (!title.Any(char.IsLetterOrDigit)) + { + return false; + } + + var titleWithoutExtension = ParserCommon.RemoveFileExtension(title); + + if (RejectHashedReleasesRegex.Any(v => v.IsMatch(titleWithoutExtension))) + { + return false; + } + + return true; + } + + private static int ParseNumber(string value) + { + if (int.TryParse(value, out var number)) + { + return number; + } + + number = Array.IndexOf(Numbers, value.ToLower()); + + if (number != -1) + { + return number; + } + + throw new FormatException($"{value} isn't a number"); + } + + private static decimal ParseDecimal(string value) + { + if (decimal.TryParse(value, System.Globalization.NumberStyles.Float, + System.Globalization.CultureInfo.InvariantCulture, out var number)) + { + return number; + } + + throw new FormatException($"{value} isn't a number"); + } +} diff --git a/services/parser/Core/Language.cs b/services/parser/Core/Language.cs new file mode 100644 index 0000000..939f15b --- /dev/null +++ b/services/parser/Core/Language.cs @@ -0,0 +1,64 @@ +namespace Parser.Core; + +public enum Language +{ + Unknown = 0, + English = 1, + French = 2, + Spanish = 3, + German = 4, + Italian = 5, + Danish = 6, + Dutch = 7, + Japanese = 8, + Icelandic = 9, + Chinese = 10, + Russian = 11, + Polish = 12, + Vietnamese = 13, + Swedish = 14, + Norwegian = 15, + Finnish = 16, + Turkish = 17, + Portuguese = 18, + Flemish = 19, + Greek = 20, + Korean = 21, + Hungarian = 22, + Hebrew = 23, + Lithuanian = 24, + Czech = 25, + Hindi = 26, + Romanian = 27, + Thai = 28, + Bulgarian = 29, + PortugueseBR = 30, + Arabic = 31, + Ukrainian = 32, + Persian = 33, + Bengali = 34, + Slovak = 35, + Latvian = 36, + SpanishLatino = 37, + Catalan = 38, + Croatian = 39, + Serbian = 40, + Bosnian = 41, + Estonian = 42, + Tamil = 43, + Indonesian = 44, + Telugu = 45, + Macedonian = 46, + Slovenian = 47, + Malayalam = 48, + Kannada = 49, + Albanian = 50, + Afrikaans = 51, + Marathi = 52, + Tagalog = 53, + Urdu = 54, + Romansh = 55, + Mongolian = 56, + Georgian = 57, + Original = 58 // Special: indicates original audio track +} diff --git a/services/parser/Core/LanguageParser.cs b/services/parser/Core/LanguageParser.cs new file mode 100644 index 0000000..6a5a644 --- /dev/null +++ b/services/parser/Core/LanguageParser.cs @@ -0,0 +1,164 @@ +using System.Text.RegularExpressions; + +namespace Parser.Core; + +public static class LanguageParser +{ + private static readonly Regex LanguageRegex = new(@"(?:\W|_|^)(?<english>\beng\b)| + (?<italian>\b(?:ita|italian)\b)| + (?<german>(?:swiss)?german\b|videomann|ger[. ]dub|\bger\b)| + (?<flemish>flemish)| + (?<bulgarian>bgaudio)| + (?<romanian>rodubbed)| + (?<brazilian>\b(dublado|pt-BR)\b)| + (?<greek>greek)| + (?<french>\b(?:FR|VO|VF|VFF|VFQ|VFI|VF2|TRUEFRENCH|FRENCH|FRE|FRA)\b)| + (?<russian>\b(?:rus|ru)\b)| + (?<hungarian>\b(?:HUNDUB|HUN)\b)| + (?<hebrew>\b(?:HebDub|HebDubbed)\b)| + (?<polish>\b(?:PL\W?DUB|DUB\W?PL|LEK\W?PL|PL\W?LEK)\b)| + (?<chinese>\[(?:CH[ST]|BIG5|GB)\]|简|繁|字幕)| + (?<ukrainian>(?:(?:\dx)?UKR))| + (?<spanish>\b(?:español|castellano)\b)| + (?<catalan>\b(?:catalan?|catalán|català)\b)| + (?<latvian>\b(?:lat|lav|lv)\b)| + (?<telugu>\btel\b)| + (?<vietnamese>\bVIE\b)| + (?<japanese>\bJAP\b)| + (?<korean>\bKOR\b)| + (?<urdu>\burdu\b)| + (?<romansh>\b(?:romansh|rumantsch|romansch)\b)| + (?<mongolian>\b(?:mongolian|khalkha)\b)| + (?<georgian>\b(?:georgian|geo|ka|kat)\b)| + (?<original>\b(?:orig|original)\b)", + RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.IgnorePatternWhitespace); + + private static readonly Regex CaseSensitiveLanguageRegex = new(@"(?:(?i)(?<!SUB[\W|_|^]))(?:(?<english>\bEN\b)| + (?<lithuanian>\bLT\b)| + (?<czech>\bCZ\b)| + (?<polish>\bPL\b)| + (?<bulgarian>\bBG\b)| + (?<slovak>\bSK\b)| + (?<german>\bDE\b)| + (?<spanish>\b(?<!DTS[._ -])ES\b))(?:(?i)(?![\W|_|^]SUB))", + RegexOptions.Compiled | RegexOptions.IgnorePatternWhitespace); + + private static readonly Regex GermanDualLanguageRegex = new(@"(?<!WEB[-_. ]?)\bDL\b", RegexOptions.Compiled | RegexOptions.IgnoreCase); + private static readonly Regex GermanMultiLanguageRegex = new(@"\bML\b", RegexOptions.Compiled | RegexOptions.IgnoreCase); + + public static List<Language> ParseLanguages(string title) + { + var lowerTitle = title.ToLower(); + var languages = new List<Language>(); + + // Full word matches + if (lowerTitle.Contains("english")) languages.Add(Language.English); + if (lowerTitle.Contains("spanish")) languages.Add(Language.Spanish); + if (lowerTitle.Contains("danish")) languages.Add(Language.Danish); + if (lowerTitle.Contains("dutch")) languages.Add(Language.Dutch); + if (lowerTitle.Contains("japanese")) languages.Add(Language.Japanese); + if (lowerTitle.Contains("icelandic")) languages.Add(Language.Icelandic); + if (lowerTitle.Contains("mandarin") || lowerTitle.Contains("cantonese") || lowerTitle.Contains("chinese")) languages.Add(Language.Chinese); + if (lowerTitle.Contains("korean")) languages.Add(Language.Korean); + if (lowerTitle.Contains("russian")) languages.Add(Language.Russian); + if (lowerTitle.Contains("romanian")) languages.Add(Language.Romanian); + if (lowerTitle.Contains("hindi")) languages.Add(Language.Hindi); + if (lowerTitle.Contains("arabic")) languages.Add(Language.Arabic); + if (lowerTitle.Contains("thai")) languages.Add(Language.Thai); + if (lowerTitle.Contains("bulgarian")) languages.Add(Language.Bulgarian); + if (lowerTitle.Contains("polish")) languages.Add(Language.Polish); + if (lowerTitle.Contains("vietnamese")) languages.Add(Language.Vietnamese); + if (lowerTitle.Contains("swedish")) languages.Add(Language.Swedish); + if (lowerTitle.Contains("norwegian")) languages.Add(Language.Norwegian); + if (lowerTitle.Contains("finnish")) languages.Add(Language.Finnish); + if (lowerTitle.Contains("turkish")) languages.Add(Language.Turkish); + if (lowerTitle.Contains("portuguese")) languages.Add(Language.Portuguese); + if (lowerTitle.Contains("brazilian")) languages.Add(Language.PortugueseBR); + if (lowerTitle.Contains("hungarian")) languages.Add(Language.Hungarian); + if (lowerTitle.Contains("hebrew")) languages.Add(Language.Hebrew); + if (lowerTitle.Contains("ukrainian")) languages.Add(Language.Ukrainian); + if (lowerTitle.Contains("persian")) languages.Add(Language.Persian); + if (lowerTitle.Contains("bengali")) languages.Add(Language.Bengali); + if (lowerTitle.Contains("slovak")) languages.Add(Language.Slovak); + if (lowerTitle.Contains("latvian")) languages.Add(Language.Latvian); + if (lowerTitle.Contains("latino")) languages.Add(Language.SpanishLatino); + if (lowerTitle.Contains("tamil")) languages.Add(Language.Tamil); + if (lowerTitle.Contains("telugu")) languages.Add(Language.Telugu); + if (lowerTitle.Contains("malayalam")) languages.Add(Language.Malayalam); + if (lowerTitle.Contains("kannada")) languages.Add(Language.Kannada); + if (lowerTitle.Contains("albanian")) languages.Add(Language.Albanian); + if (lowerTitle.Contains("afrikaans")) languages.Add(Language.Afrikaans); + if (lowerTitle.Contains("marathi")) languages.Add(Language.Marathi); + if (lowerTitle.Contains("tagalog")) languages.Add(Language.Tagalog); + + // Case-sensitive regex matches + var caseSensitiveMatches = CaseSensitiveLanguageRegex.Matches(title); + foreach (Match match in caseSensitiveMatches) + { + if (match.Groups["english"].Success) languages.Add(Language.English); + if (match.Groups["lithuanian"].Success) languages.Add(Language.Lithuanian); + if (match.Groups["czech"].Success) languages.Add(Language.Czech); + if (match.Groups["polish"].Success) languages.Add(Language.Polish); + if (match.Groups["bulgarian"].Success) languages.Add(Language.Bulgarian); + if (match.Groups["slovak"].Success) languages.Add(Language.Slovak); + if (match.Groups["spanish"].Success) languages.Add(Language.Spanish); + if (match.Groups["german"].Success) languages.Add(Language.German); + } + + // Case-insensitive regex matches + var matches = LanguageRegex.Matches(title); + foreach (Match match in matches) + { + if (match.Groups["english"].Success) languages.Add(Language.English); + if (match.Groups["italian"].Success) languages.Add(Language.Italian); + if (match.Groups["german"].Success) languages.Add(Language.German); + if (match.Groups["flemish"].Success) languages.Add(Language.Flemish); + if (match.Groups["greek"].Success) languages.Add(Language.Greek); + if (match.Groups["french"].Success) languages.Add(Language.French); + if (match.Groups["russian"].Success) languages.Add(Language.Russian); + if (match.Groups["bulgarian"].Success) languages.Add(Language.Bulgarian); + if (match.Groups["brazilian"].Success) languages.Add(Language.PortugueseBR); + if (match.Groups["hungarian"].Success) languages.Add(Language.Hungarian); + if (match.Groups["hebrew"].Success) languages.Add(Language.Hebrew); + if (match.Groups["polish"].Success) languages.Add(Language.Polish); + if (match.Groups["chinese"].Success) languages.Add(Language.Chinese); + if (match.Groups["spanish"].Success) languages.Add(Language.Spanish); + if (match.Groups["catalan"].Success) languages.Add(Language.Catalan); + if (match.Groups["ukrainian"].Success) languages.Add(Language.Ukrainian); + if (match.Groups["latvian"].Success) languages.Add(Language.Latvian); + if (match.Groups["romanian"].Success) languages.Add(Language.Romanian); + if (match.Groups["telugu"].Success) languages.Add(Language.Telugu); + if (match.Groups["vietnamese"].Success) languages.Add(Language.Vietnamese); + if (match.Groups["japanese"].Success) languages.Add(Language.Japanese); + if (match.Groups["korean"].Success) languages.Add(Language.Korean); + if (match.Groups["urdu"].Success) languages.Add(Language.Urdu); + if (match.Groups["romansh"].Success) languages.Add(Language.Romansh); + if (match.Groups["mongolian"].Success) languages.Add(Language.Mongolian); + if (match.Groups["georgian"].Success) languages.Add(Language.Georgian); + if (match.Groups["original"].Success) languages.Add(Language.Original); + } + + // Default to Unknown if no languages detected + if (languages.Count == 0) + { + languages.Add(Language.Unknown); + } + + // German dual-language and multi-language handling + if (languages.Count == 1 && languages[0] == Language.German) + { + if (GermanDualLanguageRegex.IsMatch(title)) + { + languages.Add(Language.Original); + } + else if (GermanMultiLanguageRegex.IsMatch(title)) + { + languages.Add(Language.Original); + languages.Add(Language.English); + } + } + + // Return distinct languages + return languages.Distinct().ToList(); + } +} diff --git a/services/parser/Core/ParserCommon.cs b/services/parser/Core/ParserCommon.cs new file mode 100644 index 0000000..f8d8fc8 --- /dev/null +++ b/services/parser/Core/ParserCommon.cs @@ -0,0 +1,55 @@ +using System.Text.RegularExpressions; + +namespace Parser.Core; + +internal static class ParserCommon +{ + internal static readonly RegexReplace[] PreSubstitutionRegex = Array.Empty<RegexReplace>(); + + // Valid TLDs - removes website prefixes like [www.example.com] or www.example.com - + internal static readonly RegexReplace WebsitePrefixRegex = new( + @"^(?:(?:\[|\()\s*)?(?:www\.)?[-a-z0-9-]{1,256}\.(?<!Naruto-Kun\.)(?:[a-z]{2,6}\.[a-z]{2,6}|xn--[a-z0-9-]{4,}|[a-z]{2,})\b(?:\s*(?:\]|\))|[ -]{2,})[ -]*", + string.Empty, + RegexOptions.IgnoreCase | RegexOptions.Compiled); + + internal static readonly RegexReplace WebsitePostfixRegex = new( + @"(?:\[\s*)?(?:www\.)?[-a-z0-9-]{1,256}\.(?:xn--[a-z0-9-]{4,}|[a-z]{2,6})\b(?:\s*\])$", + string.Empty, + RegexOptions.IgnoreCase | RegexOptions.Compiled); + + // Removes torrent site suffixes like [ettv], [rartv], etc. + internal static readonly RegexReplace CleanTorrentSuffixRegex = new( + @"\[(?:ettv|rartv|rarbg|cttv|publichd)\]$", + string.Empty, + RegexOptions.IgnoreCase | RegexOptions.Compiled); + + // Common video file extensions + private static readonly HashSet<string> VideoExtensions = new(StringComparer.OrdinalIgnoreCase) + { + ".mkv", ".mp4", ".avi", ".wmv", ".mov", ".m4v", ".mpg", ".mpeg", + ".m2ts", ".ts", ".flv", ".webm", ".vob", ".ogv", ".divx", ".xvid", + ".3gp", ".asf", ".rm", ".rmvb", ".iso", ".img" + }; + + private static readonly HashSet<string> UsenetExtensions = new(StringComparer.OrdinalIgnoreCase) + { + ".par2", ".nzb" + }; + + private static readonly Regex FileExtensionRegex = new( + @"\.[a-z0-9]{2,4}$", + RegexOptions.IgnoreCase | RegexOptions.Compiled); + + public static string RemoveFileExtension(string title) + { + return FileExtensionRegex.Replace(title, m => + { + var extension = m.Value.ToLower(); + if (VideoExtensions.Contains(extension) || UsenetExtensions.Contains(extension)) + { + return string.Empty; + } + return m.Value; + }); + } +} diff --git a/services/parser/Core/QualityParser.cs b/services/parser/Core/QualityParser.cs new file mode 100644 index 0000000..844d606 --- /dev/null +++ b/services/parser/Core/QualityParser.cs @@ -0,0 +1,293 @@ +using System.Text.RegularExpressions; + +namespace Parser.Core; + +public static class QualityParser +{ + private static readonly Regex SourceRegex = new(@"\b(?: + (?<bluray>M?Blu[-_. ]?Ray|HD[-_. ]?DVD|BD(?!$)|UHD2?BD|BDISO|BDMux|BD25|BD50|BR[-_. ]?DISK)| + (?<webdl>WEB[-_. ]?DL(?:mux)?|AmazonHD|AmazonSD|iTunesHD|MaxdomeHD|NetflixU?HD|WebHD|HBOMaxHD|DisneyHD|[. ]WEB[. ](?:[xh][ .]?26[45]|AVC|HEVC|DDP?5[. ]1)|[. ](?-i:WEB)$|(?:\d{3,4}0p)[-. ](?:Hybrid[-_. ]?)?WEB[-. ]|[-. ]WEB[-. ]\d{3,4}0p|\b\s\/\sWEB\s\/\s\b|(?:AMZN|NF|DP)[. -]WEB[. -](?!Rip))| + (?<webrip>WebRip|Web-Rip|WEBMux)| + (?<hdtv>HDTV)| + (?<bdrip>BDRip|BDLight|HD[-_. ]?DVDRip|UHDBDRip)| + (?<brrip>BRRip)| + (?<dvdr>\d?x?M?DVD-?[R59])| + (?<dvd>DVD(?!-R)|DVDRip|xvidvd)| + (?<dsr>WS[-_. ]DSR|DSR)| + (?<regional>R[0-9]{1}|REGIONAL)| + (?<scr>SCR|SCREENER|DVDSCR|DVDSCREENER)| + (?<ts>TS[-_. ]|TELESYNCH?|HD-TS|HDTS|PDVD|TSRip|HDTSRip)| + (?<tc>TC|TELECINE|HD-TC|HDTC)| + (?<cam>CAMRIP|(?:NEW)?CAM|HD-?CAM(?:Rip)?|HQCAM)| + (?<wp>WORKPRINT|WP)| + (?<pdtv>PDTV)| + (?<sdtv>SDTV)| + (?<tvrip>TVRip) + )(?:\b|$|[ .])", + RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace); + + private static readonly Regex ResolutionRegex = new( + @"\b(?:(?<R360p>360p)|(?<R480p>480p|480i|640x480|848x480)|(?<R540p>540p)|(?<R576p>576p)|(?<R720p>720p|1280x720|960p)|(?<R1080p>1080p|1920x1080|1440p|FHD|1080i|4kto1080p)|(?<R2160p>2160p|3840x2160|4k[-_. ](?:UHD|HEVC|BD|H\.?265)|(?:UHD|HEVC|BD|H\.?265)[-_. ]4k))\b", + RegexOptions.Compiled | RegexOptions.IgnoreCase); + + private static readonly Regex AlternativeResolutionRegex = new( + @"\b(?<R2160p>UHD)\b|(?<R2160p>\[4K\])", + RegexOptions.Compiled | RegexOptions.IgnoreCase); + + private static readonly Regex RemuxRegex = new( + @"(?:[_. \[]|\d{4}p-|\bHybrid-)(?<remux>(?:(BD|UHD)[-_. ]?)?Remux)\b|(?<remux>(?:(BD|UHD)[-_. ]?)?Remux[_. ]\d{4}p)", + RegexOptions.Compiled | RegexOptions.IgnoreCase); + + private static readonly Regex ProperRegex = new(@"\b(?<proper>proper)\b", + RegexOptions.Compiled | RegexOptions.IgnoreCase); + + private static readonly Regex RepackRegex = new(@"\b(?<repack>repack\d?|rerip\d?)\b", + RegexOptions.Compiled | RegexOptions.IgnoreCase); + + private static readonly Regex VersionRegex = new( + @"\d[-._ ]?v(?<version>\d)[-._ ]|\[v(?<version>\d)\]|repack(?<version>\d)|rerip(?<version>\d)", + RegexOptions.Compiled | RegexOptions.IgnoreCase); + + private static readonly Regex RealRegex = new(@"\b(?<real>REAL)\b", RegexOptions.Compiled); + + private static readonly Regex RawHDRegex = new(@"\b(?<rawhd>RawHD|Raw[-_. ]HD)\b", + RegexOptions.Compiled | RegexOptions.IgnoreCase); + + private static readonly Regex BRDISKRegex = new( + @"^(?!.*\b((?<!HD[._ -]|HD)DVD|BDRip|720p|MKV|XviD|WMV|d3g|(BD)?REMUX|^(?=.*1080p)(?=.*HEVC)|[xh][-_. ]?26[45]|German.*[DM]L|((?<=\d{4}).*German.*([DM]L)?)(?=.*\b(AVC|HEVC|VC[-_. ]?1|MVC|MPEG[-_. ]?2)\b))\b)(((?=.*\b(Blu[-_. ]?ray|BD|HD[-_. ]?DVD)\b)(?=.*\b(AVC|HEVC|VC[-_. ]?1|MVC|MPEG[-_. ]?2|BDMV|ISO)\b))|^((?=.*\b(((?=.*\b((.*_)?COMPLETE.*|Dis[ck])\b)(?=.*(Blu[-_. ]?ray|HD[-_. ]?DVD)))|3D[-_. ]?BD|BR[-_. ]?DISK|Full[-_. ]?Blu[-_. ]?ray|^((?=.*((BD|UHD)[-_. ]?(25|50|66|100|ISO)))))))).*", + RegexOptions.Compiled | RegexOptions.IgnoreCase); + + private static readonly Regex CodecRegex = new( + @"\b(?:(?<x264>x264)|(?<h264>h264)|(?<xvidhd>XvidHD)|(?<xvid>X-?vid)|(?<divx>divx))\b", + RegexOptions.Compiled | RegexOptions.IgnoreCase); + + private static readonly Regex AnimeBlurayRegex = new( + @"bd(?:720|1080|2160)|(?<=[-_. (\[])bd(?=[-_. )\]])", + RegexOptions.Compiled | RegexOptions.IgnoreCase); + + private static readonly Regex AnimeWebDlRegex = new( + @"\[WEB\]|[\[\(]WEB[ .]", + RegexOptions.Compiled | RegexOptions.IgnoreCase); + + private static readonly Regex MPEG2Regex = new(@"\b(?<mpeg2>MPEG[-_. ]?2)\b", + RegexOptions.Compiled | RegexOptions.IgnoreCase); + + public static QualityResult ParseQuality(string name) + { + var normalizedName = name.Replace('_', ' ').Trim(); + var result = new QualityResult(); + + // Parse revision/modifiers + ParseRevision(name, normalizedName, result); + + // Parse resolution + var resolution = ParseResolution(normalizedName); + result.Resolution = resolution; + + // Check RawHD + if (RawHDRegex.IsMatch(normalizedName) && !BRDISKRegex.IsMatch(normalizedName)) + { + result.Modifier = QualityModifier.RawHD; + return result; + } + + // Check source + var sourceMatch = SourceRegex.Match(normalizedName); + var isRemux = RemuxRegex.IsMatch(normalizedName); + var isBRDisk = BRDISKRegex.IsMatch(normalizedName); + var codecMatch = CodecRegex.Match(normalizedName); + + if (sourceMatch.Success) + { + if (sourceMatch.Groups["bluray"].Success) + { + result.Source = QualitySource.Bluray; + + if (isBRDisk) + { + result.Modifier = QualityModifier.BRDisk; + return result; + } + + if (codecMatch.Groups["xvid"].Success || codecMatch.Groups["divx"].Success) + { + result.Resolution = Resolution.R480p; + return result; + } + + result.Modifier = isRemux ? QualityModifier.Remux : QualityModifier.None; + if (result.Resolution == Resolution.Unknown) + result.Resolution = Resolution.R720p; + return result; + } + + if (sourceMatch.Groups["webdl"].Success) + { + result.Source = QualitySource.WebDL; + if (result.Resolution == Resolution.Unknown) + result.Resolution = Resolution.R480p; + return result; + } + + if (sourceMatch.Groups["webrip"].Success) + { + result.Source = QualitySource.WebRip; + if (result.Resolution == Resolution.Unknown) + result.Resolution = Resolution.R480p; + return result; + } + + if (sourceMatch.Groups["hdtv"].Success) + { + result.Source = QualitySource.TV; + if (MPEG2Regex.IsMatch(normalizedName)) + { + result.Modifier = QualityModifier.RawHD; + } + return result; + } + + if (sourceMatch.Groups["bdrip"].Success || sourceMatch.Groups["brrip"].Success) + { + result.Source = QualitySource.Bluray; + if (result.Resolution == Resolution.Unknown) + result.Resolution = Resolution.R480p; + return result; + } + + if (sourceMatch.Groups["dvdr"].Success || sourceMatch.Groups["dvd"].Success) + { + result.Source = QualitySource.DVD; + result.Resolution = Resolution.R480p; + return result; + } + + if (sourceMatch.Groups["scr"].Success) + { + result.Source = QualitySource.DVD; + result.Resolution = Resolution.R480p; + result.Modifier = QualityModifier.Screener; + return result; + } + + if (sourceMatch.Groups["cam"].Success) + { + result.Source = QualitySource.Cam; + return result; + } + + if (sourceMatch.Groups["ts"].Success) + { + result.Source = QualitySource.Telesync; + return result; + } + + if (sourceMatch.Groups["tc"].Success) + { + result.Source = QualitySource.Telecine; + return result; + } + + if (sourceMatch.Groups["wp"].Success) + { + result.Source = QualitySource.Workprint; + return result; + } + + if (sourceMatch.Groups["regional"].Success) + { + result.Source = QualitySource.DVD; + result.Resolution = Resolution.R480p; + result.Modifier = QualityModifier.Regional; + return result; + } + + if (sourceMatch.Groups["pdtv"].Success || sourceMatch.Groups["sdtv"].Success || + sourceMatch.Groups["dsr"].Success || sourceMatch.Groups["tvrip"].Success) + { + result.Source = QualitySource.TV; + return result; + } + } + + // No source - check remux with resolution + if (isRemux && resolution != Resolution.Unknown) + { + result.Source = QualitySource.Bluray; + result.Modifier = QualityModifier.Remux; + return result; + } + + // Anime patterns + if (AnimeBlurayRegex.IsMatch(normalizedName)) + { + result.Source = QualitySource.Bluray; + result.Modifier = isRemux ? QualityModifier.Remux : QualityModifier.None; + if (result.Resolution == Resolution.Unknown) + result.Resolution = Resolution.R720p; + return result; + } + + if (AnimeWebDlRegex.IsMatch(normalizedName)) + { + result.Source = QualitySource.WebDL; + if (result.Resolution == Resolution.Unknown) + result.Resolution = Resolution.R720p; + return result; + } + + // Resolution only + if (resolution != Resolution.Unknown && isRemux) + { + result.Source = QualitySource.Bluray; + result.Modifier = QualityModifier.Remux; + } + + return result; + } + + private static Resolution ParseResolution(string name) + { + var match = ResolutionRegex.Match(name); + var altMatch = AlternativeResolutionRegex.Match(name); + + if (!match.Success && !altMatch.Success) + return Resolution.Unknown; + + if (match.Groups["R360p"].Success) return Resolution.R360p; + if (match.Groups["R480p"].Success) return Resolution.R480p; + if (match.Groups["R540p"].Success) return Resolution.R540p; + if (match.Groups["R576p"].Success) return Resolution.R576p; + if (match.Groups["R720p"].Success) return Resolution.R720p; + if (match.Groups["R1080p"].Success) return Resolution.R1080p; + if (match.Groups["R2160p"].Success || altMatch.Groups["R2160p"].Success) return Resolution.R2160p; + + return Resolution.Unknown; + } + + private static void ParseRevision(string name, string normalizedName, QualityResult result) + { + var versionMatch = VersionRegex.Match(normalizedName); + if (versionMatch.Success && versionMatch.Groups["version"].Success) + { + result.Revision.Version = int.Parse(versionMatch.Groups["version"].Value); + } + + if (ProperRegex.IsMatch(normalizedName)) + { + result.Revision.Version = versionMatch.Success ? result.Revision.Version + 1 : 2; + } + + if (RepackRegex.IsMatch(normalizedName)) + { + result.Revision.Version = versionMatch.Success ? result.Revision.Version + 1 : 2; + result.Revision.IsRepack = true; + } + + var realMatches = RealRegex.Matches(name); + if (realMatches.Count > 0) + { + result.Revision.Real = realMatches.Count; + } + } +} diff --git a/services/parser/Core/RegexReplace.cs b/services/parser/Core/RegexReplace.cs new file mode 100644 index 0000000..de95d6b --- /dev/null +++ b/services/parser/Core/RegexReplace.cs @@ -0,0 +1,27 @@ +using System.Text.RegularExpressions; + +namespace Parser.Core; + +public class RegexReplace +{ + private readonly Regex _regex; + private readonly string _replacementFormat; + + public RegexReplace(string pattern, string replacement, RegexOptions regexOptions) + { + _regex = new Regex(pattern, regexOptions); + _replacementFormat = replacement; + } + + public string Replace(string input) + { + return _regex.Replace(input, _replacementFormat); + } + + public bool TryReplace(ref string input) + { + var result = _regex.IsMatch(input); + input = _regex.Replace(input, _replacementFormat); + return result; + } +} diff --git a/services/parser/Core/ReleaseGroupParser.cs b/services/parser/Core/ReleaseGroupParser.cs new file mode 100644 index 0000000..2fd539e --- /dev/null +++ b/services/parser/Core/ReleaseGroupParser.cs @@ -0,0 +1,96 @@ +using System.Text.RegularExpressions; + +namespace Parser.Core; + +public static class ReleaseGroupParser +{ + private static readonly Regex ReleaseGroupRegex = new( + @"-(?<releasegroup>[a-z0-9]+(?<part2>-[a-z0-9]+)?(?!.+?(?:480p|576p|720p|1080p|2160p)))(?<!(?:WEB-(DL|Rip)|Blu-Ray|480p|576p|720p|1080p|2160p|DTS-HD|DTS-X|DTS-MA|DTS-ES|-ES|-EN|-CAT|-ENG|-JAP|-GER|-FRA|-FRE|-ITA|-HDRip|\d{1,2}-bit|[ ._]\d{4}-\d{2}|-\d{2}|tmdb(id)?-(?<tmdbid>\d+)|(?<imdbid>tt\d{7,8}))(?:\k<part2>)?)(?:\b|[-._ ]|$)|[-._ ]\[(?<releasegroup>[a-z0-9]+)\]$", + RegexOptions.IgnoreCase | RegexOptions.Compiled); + + private static readonly Regex InvalidReleaseGroupRegex = new( + @"^([se]\d+|[0-9a-f]{8})$", + RegexOptions.IgnoreCase | RegexOptions.Compiled); + + private static readonly Regex AnimeReleaseGroupRegex = new( + @"^(?:\[(?<subgroup>(?!\s).+?(?<!\s))\](?:_|-|\s|\.)?)", + RegexOptions.IgnoreCase | RegexOptions.Compiled); + + // Handle Exception Release Groups that don't follow -RlsGrp pattern + private static readonly Regex ExceptionReleaseGroupRegexExact = new( + @"\b(?<releasegroup>KRaLiMaRKo|E\.N\.D|D\-Z0N3|Koten_Gars|BluDragon|ZØNEHD|HQMUX|VARYG|YIFY|YTS(.(MX|LT|AG))?|TMd|Eml HDTeam|LMain|DarQ|BEN THE MEN|TAoE|QxR|126811)\b", + RegexOptions.IgnoreCase | RegexOptions.Compiled); + + // Groups whose releases end with RlsGroup) or RlsGroup] + private static readonly Regex ExceptionReleaseGroupRegex = new( + @"(?<=[._ \[])(?<releasegroup>(Silence|afm72|Panda|Ghost|MONOLITH|Tigole|Joy|ImE|UTR|t3nzin|Anime Time|Project Angel|Hakata Ramen|HONE|GiLG|Vyndros|SEV|Garshasp|Kappa|Natty|RCVR|SAMPA|YOGI|r00t|EDGE2020|RZeroX|FreetheFish|Anna|Bandi|Qman|theincognito|HDO|DusIctv|DHD|CtrlHD|-ZR-|ADC|XZVN|RH|Kametsu)(?=\]|\)))", + RegexOptions.IgnoreCase | RegexOptions.Compiled); + + private static readonly RegexReplace CleanReleaseGroupRegex = new( + @"(-(RP|1|NZBGeek|Obfuscated|Obfuscation|Scrambled|sample|Pre|postbot|xpost|Rakuv[a-z0-9]*|WhiteRev|BUYMORE|AsRequested|AlternativeToRequested|GEROV|Z0iDS3N|Chamele0n|4P|4Planet|AlteZachen|RePACKPOST))+$", + string.Empty, + RegexOptions.IgnoreCase | RegexOptions.Compiled); + + public static string? ParseReleaseGroup(string title) + { + title = title.Trim(); + title = ParserCommon.RemoveFileExtension(title); + + foreach (var replace in ParserCommon.PreSubstitutionRegex) + { + if (replace.TryReplace(ref title)) + { + break; + } + } + + title = ParserCommon.WebsitePrefixRegex.Replace(title); + title = ParserCommon.CleanTorrentSuffixRegex.Replace(title); + + // Check for anime-style release groups [SubGroup] + var animeMatch = AnimeReleaseGroupRegex.Match(title); + if (animeMatch.Success) + { + return animeMatch.Groups["subgroup"].Value; + } + + title = CleanReleaseGroupRegex.Replace(title); + + // Check exception groups (exact match) + var exceptionExactMatches = ExceptionReleaseGroupRegexExact.Matches(title); + if (exceptionExactMatches.Count != 0) + { + return exceptionExactMatches.Last().Groups["releasegroup"].Value; + } + + // Check exception groups (pattern match) + var exceptionMatches = ExceptionReleaseGroupRegex.Matches(title); + if (exceptionMatches.Count != 0) + { + return exceptionMatches.Last().Groups["releasegroup"].Value; + } + + // Standard release group pattern + var matches = ReleaseGroupRegex.Matches(title); + if (matches.Count != 0) + { + var group = matches.Last().Groups["releasegroup"].Value; + + // Filter out numeric-only groups + if (int.TryParse(group, out _)) + { + return null; + } + + // Filter out invalid patterns (like S01, E05, hex hashes) + if (InvalidReleaseGroupRegex.IsMatch(group)) + { + return null; + } + + return group; + } + + return null; + } +} diff --git a/services/parser/Core/TitleParser.cs b/services/parser/Core/TitleParser.cs new file mode 100644 index 0000000..9d7184a --- /dev/null +++ b/services/parser/Core/TitleParser.cs @@ -0,0 +1,421 @@ +using System.Text.RegularExpressions; + +namespace Parser.Core; + +public class ParsedMovieInfo +{ + public List<string> MovieTitles { get; set; } = new(); + public string PrimaryMovieTitle => MovieTitles.FirstOrDefault() ?? ""; + public int Year { get; set; } + public string? Edition { get; set; } + public string? ImdbId { get; set; } + public int TmdbId { get; set; } + public string? ReleaseHash { get; set; } + public string? HardcodedSubs { get; set; } +} + +public static class TitleParser +{ + private static readonly Regex EditionRegex = new( + @"\(?\b(?<edition>(((Recut.|Extended.|Ultimate.)?(Director.?s|Collector.?s|Theatrical|Ultimate|Extended|Despecialized|(Special|Rouge|Final|Assembly|Imperial|Diamond|Signature|Hunter|Rekall)(?=(.(Cut|Edition|Version)))|\d{2,3}(th)?.Anniversary)(?:.(Cut|Edition|Version))?(.(Extended|Uncensored|Remastered|Unrated|Uncut|Open.?Matte|IMAX|Fan.?Edit))?|((Uncensored|Remastered|Unrated|Uncut|Open?.Matte|IMAX|Fan.?Edit|Restored|((2|3|4)in1))))))\b\)?", + RegexOptions.Compiled | RegexOptions.IgnoreCase); + + private static readonly Regex ReportEditionRegex = new( + @"^.+?" + EditionRegex, + RegexOptions.Compiled | RegexOptions.IgnoreCase); + + private static readonly Regex HardcodedSubsRegex = new( + @"\b((?<hcsub>(\w+(?<!SOFT|MULTI|HORRIBLE)SUBS?))|(?<hc>(HC|SUBBED)))\b", + RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace); + + private static readonly Regex[] ReportMovieTitleRegex = new[] + { + // Anime [Subgroup] and Year + new Regex(@"^(?:\[(?<subgroup>.+?)\][-_. ]?)(?<title>(?![(\[]).+?)?(?:(?:[-_\W](?<![)\[!]))*(?<year>(1(8|9)|20)\d{2}(?!p|i|x|\d+|\]|\W\d+)))+.*?(?<hash>\[\w{8}\])?(?:$|\.)", RegexOptions.IgnoreCase | RegexOptions.Compiled), + + // Anime [Subgroup] no year, versioned title, hash + new Regex(@"^(?:\[(?<subgroup>.+?)\][-_. ]?)(?<title>(?![(\[]).+?)((v)(?:\d{1,2})(?:([-_. ])))(\[.*)?(?:[\[(][^])])?.*?(?<hash>\[\w{8}\])(?:$|\.)", RegexOptions.IgnoreCase | RegexOptions.Compiled), + + // Anime [Subgroup] no year, info in double sets of brackets, hash + new Regex(@"^(?:\[(?<subgroup>.+?)\][-_. ]?)(?<title>(?![(\[]).+?)(\[.*).*?(?<hash>\[\w{8}\])(?:$|\.)", RegexOptions.IgnoreCase | RegexOptions.Compiled), + + // Anime [Subgroup] no year, info in parentheses or brackets, hash + new Regex(@"^(?:\[(?<subgroup>.+?)\][-_. ]?)(?<title>(?![(\[]).+)(?:[\[(][^])]).*?(?<hash>\[\w{8}\])(?:$|\.)", RegexOptions.IgnoreCase | RegexOptions.Compiled), + + // Some german or french tracker formats (missing year, ...) - see ParserFixture for examples and tests + new Regex(@"^(?<title>(?![(\[]).+?)((\W|_))(" + EditionRegex + @".{1,3})?(?:(?<!(19|20)\d{2}.*?)(?<!(?:Good|The)[_ .-])(German|TrueFrench))(.+?)(?=((19|20)\d{2}|$))(?<year>(19|20)\d{2}(?!p|i|\d+|\]|\W\d+))?(\W+|_|$)(?!\\)", RegexOptions.IgnoreCase | RegexOptions.Compiled), + + // Special, Despecialized, etc. Edition Movies, e.g: Mission.Impossible.3.Special.Edition.2011 + new Regex(@"^(?<title>(?![(\[]).+?)?(?:(?:[-_\W](?<![)\[!]))*" + EditionRegex + @".{1,3}(?<year>(1(8|9)|20)\d{2}(?!p|i|\d+|\]|\W\d+)))+(\W+|_|$)(?!\\)", + RegexOptions.IgnoreCase | RegexOptions.Compiled), + + // Normal movie format, e.g: Mission.Impossible.3.2011 + new Regex(@"^(?<title>(?![(\[]).+?)?(?:(?:[-_\W](?<![)\[!]))*(?<year>(1(8|9)|20)\d{2}(?!p|i|(1(8|9)|20)\d{2}|\]|\W(1(8|9)|20)\d{2})))+(\W+|_|$)(?!\\)", RegexOptions.IgnoreCase | RegexOptions.Compiled), + + // PassThePopcorn Torrent names: Star.Wars[PassThePopcorn] + new Regex(@"^(?<title>.+?)?(?:(?:[-_\W](?<![()\[!]))*(?<year>(\[\w *\])))+(\W+|_|$)(?!\\)", RegexOptions.IgnoreCase | RegexOptions.Compiled), + + // That did not work? Maybe some tool uses [] for years. Who would do that? + new Regex(@"^(?<title>(?![(\[]).+?)?(?:(?:[-_\W](?<![)!]))*(?<year>(1(8|9)|20)\d{2}(?!p|i|\d+|\W\d+)))+(\W+|_|$)(?!\\)", RegexOptions.IgnoreCase | RegexOptions.Compiled), + + // As a last resort for movies that have ( or [ in their title. + new Regex(@"^(?<title>.+?)?(?:(?:[-_\W](?<![)\[!]))*(?<year>(1(8|9)|20)\d{2}(?!p|i|\d+|\]|\W\d+)))+(\W+|_|$)(?!\\)", RegexOptions.IgnoreCase | RegexOptions.Compiled) + }; + + private static readonly Regex[] ReportMovieTitleFolderRegex = new[] + { + // When year comes first. + new Regex(@"^(?:(?:[-_\W](?<![)!]))*(?<year>(19|20)\d{2}(?!p|i|\d+|\W\d+)))+(\W+|_|$)(?<title>.+?)?$") + }; + + private static readonly Regex[] RejectHashedReleasesRegex = new Regex[] + { + // Generic match for md5 and mixed-case hashes. + new Regex(@"^[0-9a-zA-Z]{32}", RegexOptions.Compiled), + + // Generic match for shorter lower-case hashes. + new Regex(@"^[a-z0-9]{24}$", RegexOptions.Compiled), + + // Format seen on some NZBGeek releases + new Regex(@"^[A-Z]{11}\d{3}$", RegexOptions.Compiled), + new Regex(@"^[a-z]{12}\d{3}$", RegexOptions.Compiled), + + // Backup filename (Unknown origins) + new Regex(@"^Backup_\d{5,}S\d{2}-\d{2}$", RegexOptions.Compiled), + + // 123 - Started appearing December 2014 + new Regex(@"^123$", RegexOptions.Compiled), + + // abc - Started appearing January 2015 + new Regex(@"^abc$", RegexOptions.Compiled | RegexOptions.IgnoreCase), + + // abc - Started appearing 2020 + new Regex(@"^abc[-_. ]xyz", RegexOptions.Compiled | RegexOptions.IgnoreCase), + + // b00bs - Started appearing January 2015 + new Regex(@"^b00bs$", RegexOptions.Compiled | RegexOptions.IgnoreCase) + }; + + // Regex to detect whether the title was reversed. + private static readonly Regex ReversedTitleRegex = new( + @"(?:^|[-._ ])(p027|p0801)[-._ ]", + RegexOptions.Compiled); + + // Regex to split movie titles that contain `AKA`. + private static readonly Regex AlternativeTitleRegex = new( + @"[ ]+(?:AKA|\/)[ ]+", + RegexOptions.IgnoreCase | RegexOptions.Compiled); + + // Regex to unbracket alternative titles. + private static readonly Regex BracketedAlternativeTitleRegex = new( + @"(.*) \([ ]*AKA[ ]+(.*)\)", + RegexOptions.IgnoreCase | RegexOptions.Compiled); + + private static readonly Regex NormalizeAlternativeTitleRegex = new( + @"[ ]+(?:A\.K\.A\.)[ ]+", + RegexOptions.IgnoreCase | RegexOptions.Compiled); + + private static readonly Regex ReportImdbId = new( + @"(?<imdbid>tt\d{7,8})", + RegexOptions.IgnoreCase | RegexOptions.Compiled); + + private static readonly Regex ReportTmdbId = new( + @"tmdb(id)?-(?<tmdbid>\d+)", + RegexOptions.IgnoreCase | RegexOptions.Compiled); + + private static readonly RegexReplace SimpleTitleRegex = new( + @"(?:(480|540|576|720|1080|2160)[ip]|[xh][\W_]?26[45]|DD\W?5\W1|[<>?*]|848x480|1280x720|1920x1080|3840x2160|4096x2160|(8|10)b(it)?|10-bit)\s*?(?![a-b0-9])", + string.Empty, + RegexOptions.IgnoreCase | RegexOptions.Compiled); + + private static readonly Regex SimpleReleaseTitleRegex = new( + @"\s*(?:[<>?*|])", + RegexOptions.Compiled | RegexOptions.IgnoreCase); + + private static readonly Regex CleanQualityBracketsRegex = new( + @"\[[a-z0-9 ._-]+\]$", + RegexOptions.IgnoreCase | RegexOptions.Compiled); + + private static readonly Regex RequestInfoRegex = new( + @"^(?:\[.+?\])+", + RegexOptions.Compiled); + + public static ParsedMovieInfo? ParseMovieTitle(string title, bool isDir = false) + { + var originalTitle = title; + + try + { + if (!ValidateBeforeParsing(title)) + { + return null; + } + + if (ReversedTitleRegex.IsMatch(title)) + { + var titleWithoutExtension = ParserCommon.RemoveFileExtension(title).ToCharArray(); + Array.Reverse(titleWithoutExtension); + title = $"{new string(titleWithoutExtension)}{title.Substring(titleWithoutExtension.Length)}"; + } + + var releaseTitle = ParserCommon.RemoveFileExtension(title); + + // Trim dashes from end + releaseTitle = releaseTitle.Trim('-', '_'); + + releaseTitle = releaseTitle.Replace("【", "[").Replace("】", "]"); + + foreach (var replace in ParserCommon.PreSubstitutionRegex) + { + if (replace.TryReplace(ref releaseTitle)) + { + break; + } + } + + var simpleTitle = SimpleTitleRegex.Replace(releaseTitle); + + // Remove website prefixes/postfixes + simpleTitle = ParserCommon.WebsitePrefixRegex.Replace(simpleTitle); + simpleTitle = ParserCommon.WebsitePostfixRegex.Replace(simpleTitle); + simpleTitle = ParserCommon.CleanTorrentSuffixRegex.Replace(simpleTitle); + + // Clean quality brackets at the end + simpleTitle = CleanQualityBracketsRegex.Replace(simpleTitle, string.Empty); + + var allRegexes = ReportMovieTitleRegex.ToList(); + + if (isDir) + { + allRegexes.AddRange(ReportMovieTitleFolderRegex); + } + + foreach (var regex in allRegexes) + { + var match = regex.Matches(simpleTitle); + + if (match.Count != 0) + { + var result = ParseMovieMatchCollection(match); + + if (result != null) + { + var simpleReleaseTitle = SimpleReleaseTitleRegex.Replace(releaseTitle, string.Empty); + + // Parse edition if not already set + if (string.IsNullOrWhiteSpace(result.Edition)) + { + result.Edition = ParseEdition(simpleReleaseTitle); + } + + // Parse hash + result.ReleaseHash = GetReleaseHash(match); + + // Parse hardcoded subs + result.HardcodedSubs = ParseHardcodeSubs(originalTitle); + + // Parse IMDB/TMDB IDs + result.ImdbId = ParseImdbId(simpleReleaseTitle); + result.TmdbId = ParseTmdbId(simpleReleaseTitle); + + return result; + } + } + } + } + catch + { + // Parsing failed + } + + return null; + } + + public static string? ParseImdbId(string title) + { + var match = ReportImdbId.Match(title); + if (match.Success && match.Groups["imdbid"].Success) + { + var imdbId = match.Groups["imdbid"].Value; + if (imdbId.Length == 9 || imdbId.Length == 10) + { + return imdbId; + } + } + return null; + } + + public static int ParseTmdbId(string title) + { + var match = ReportTmdbId.Match(title); + if (match.Success && match.Groups["tmdbid"].Success) + { + if (int.TryParse(match.Groups["tmdbid"].Value, out var tmdbId)) + { + return tmdbId; + } + } + return 0; + } + + public static string? ParseEdition(string title) + { + var editionMatch = ReportEditionRegex.Match(title); + + if (editionMatch.Success && editionMatch.Groups["edition"].Success && + !string.IsNullOrWhiteSpace(editionMatch.Groups["edition"].Value)) + { + return editionMatch.Groups["edition"].Value.Replace(".", " "); + } + + return null; + } + + public static string? ParseHardcodeSubs(string title) + { + var subMatch = HardcodedSubsRegex.Matches(title).LastOrDefault(); + + if (subMatch != null && subMatch.Success) + { + if (subMatch.Groups["hcsub"].Success) + { + return subMatch.Groups["hcsub"].Value; + } + else if (subMatch.Groups["hc"].Success) + { + return "Generic Hardcoded Subs"; + } + } + + return null; + } + + private static ParsedMovieInfo? ParseMovieMatchCollection(MatchCollection matchCollection) + { + if (!matchCollection[0].Groups["title"].Success || matchCollection[0].Groups["title"].Value == "(") + { + return null; + } + + var movieName = matchCollection[0].Groups["title"].Value.Replace('_', ' '); + movieName = NormalizeAlternativeTitleRegex.Replace(movieName, " AKA "); + movieName = RequestInfoRegex.Replace(movieName, "").Trim(' '); + + // Handle dots in title - preserve acronyms + var parts = movieName.Split('.'); + movieName = ""; + var n = 0; + var previousAcronym = false; + var nextPart = ""; + + foreach (var part in parts) + { + if (parts.Length >= n + 2) + { + nextPart = parts[n + 1]; + } + else + { + nextPart = ""; + } + + if (part.Length == 1 && part.ToLower() != "a" && !int.TryParse(part, out _) && + (previousAcronym || n < parts.Length - 1) && + (previousAcronym || nextPart.Length != 1 || !int.TryParse(nextPart, out _))) + { + movieName += part + "."; + previousAcronym = true; + } + else if (part.ToLower() == "a" && (previousAcronym || nextPart.Length == 1)) + { + movieName += part + "."; + previousAcronym = true; + } + else if (part.ToLower() == "dr") + { + movieName += part + "."; + previousAcronym = true; + } + else + { + if (previousAcronym) + { + movieName += " "; + previousAcronym = false; + } + + movieName += part + " "; + } + + n++; + } + + movieName = movieName.Trim(' '); + + int.TryParse(matchCollection[0].Groups["year"].Value, out var airYear); + + var result = new ParsedMovieInfo { Year = airYear }; + + if (matchCollection[0].Groups["edition"].Success) + { + result.Edition = matchCollection[0].Groups["edition"].Value.Replace(".", " "); + } + + var movieTitles = new List<string> { movieName }; + + // Delete parentheses of the form (aka ...). + var unbracketedName = BracketedAlternativeTitleRegex.Replace(movieName, "$1 AKA $2"); + + // Split by AKA and filter out empty and duplicate names. + var alternativeTitles = AlternativeTitleRegex + .Split(unbracketedName) + .Where(alternativeName => !string.IsNullOrWhiteSpace(alternativeName) && alternativeName != movieName); + + movieTitles.AddRange(alternativeTitles); + + result.MovieTitles = movieTitles; + + return result; + } + + private static bool ValidateBeforeParsing(string title) + { + if (title.ToLower().Contains("password") && title.ToLower().Contains("yenc")) + { + return false; + } + + if (!title.Any(char.IsLetterOrDigit)) + { + return false; + } + + var titleWithoutExtension = ParserCommon.RemoveFileExtension(title); + + if (RejectHashedReleasesRegex.Any(v => v.IsMatch(titleWithoutExtension))) + { + return false; + } + + return true; + } + + private static string? GetReleaseHash(MatchCollection matchCollection) + { + var hash = matchCollection[0].Groups["hash"]; + + if (hash.Success) + { + var hashValue = hash.Value.Trim('[', ']'); + + if (hashValue.Equals("1280x720")) + { + return null; + } + + return hashValue; + } + + return null; + } +} diff --git a/services/parser/Core/Types.cs b/services/parser/Core/Types.cs new file mode 100644 index 0000000..c25076c --- /dev/null +++ b/services/parser/Core/Types.cs @@ -0,0 +1,52 @@ +namespace Parser.Core; + +public enum QualitySource +{ + Unknown = 0, + Cam, + Telesync, + Telecine, + Workprint, + DVD, + TV, + WebDL, + WebRip, + Bluray +} + +public enum Resolution +{ + Unknown = 0, + R360p = 360, + R480p = 480, + R540p = 540, + R576p = 576, + R720p = 720, + R1080p = 1080, + R2160p = 2160 +} + +public enum QualityModifier +{ + None = 0, + Regional, + Screener, + RawHD, + BRDisk, + Remux +} + +public class Revision +{ + public int Version { get; set; } = 1; + public int Real { get; set; } + public bool IsRepack { get; set; } +} + +public class QualityResult +{ + public QualitySource Source { get; set; } = QualitySource.Unknown; + public Resolution Resolution { get; set; } = Resolution.Unknown; + public QualityModifier Modifier { get; set; } = QualityModifier.None; + public Revision Revision { get; set; } = new(); +} diff --git a/services/parser/Dockerfile b/services/parser/Dockerfile new file mode 100644 index 0000000..c41943a --- /dev/null +++ b/services/parser/Dockerfile @@ -0,0 +1,17 @@ +FROM mcr.microsoft.com/dotnet/sdk:8.0 AS build +WORKDIR /src + +COPY *.csproj . +RUN dotnet restore + +COPY . . +RUN dotnet publish -c Release -o /app + +FROM mcr.microsoft.com/dotnet/aspnet:8.0 +WORKDIR /app +COPY --from=build /app . + +ENV ASPNETCORE_URLS=http://+:5000 +EXPOSE 5000 + +ENTRYPOINT ["dotnet", "Parser.dll"] diff --git a/services/parser/Parser.csproj b/services/parser/Parser.csproj new file mode 100644 index 0000000..1b28a01 --- /dev/null +++ b/services/parser/Parser.csproj @@ -0,0 +1,9 @@ +<Project Sdk="Microsoft.NET.Sdk.Web"> + + <PropertyGroup> + <TargetFramework>net8.0</TargetFramework> + <Nullable>enable</Nullable> + <ImplicitUsings>enable</ImplicitUsings> + </PropertyGroup> + +</Project> diff --git a/services/parser/Program.cs b/services/parser/Program.cs new file mode 100644 index 0000000..77b165c --- /dev/null +++ b/services/parser/Program.cs @@ -0,0 +1,142 @@ +using Parser.Core; + +var builder = WebApplication.CreateBuilder(args); +builder.Services.AddEndpointsApiExplorer(); + +var app = builder.Build(); + +app.MapPost("/parse", (ParseRequest request) => +{ + if (string.IsNullOrWhiteSpace(request.Title)) + { + return Results.BadRequest(new { error = "Title is required" }); + } + + if (string.IsNullOrWhiteSpace(request.Type) || + (request.Type != "movie" && request.Type != "series")) + { + return Results.BadRequest(new { error = "Type is required and must be 'movie' or 'series'" }); + } + + var qualityResult = QualityParser.ParseQuality(request.Title); + var languages = LanguageParser.ParseLanguages(request.Title); + var releaseGroup = ReleaseGroupParser.ParseReleaseGroup(request.Title); + + if (request.Type == "movie") + { + var titleInfo = TitleParser.ParseMovieTitle(request.Title); + return Results.Ok(new ParseResponse + { + Title = request.Title, + Type = "movie", + Source = qualityResult.Source.ToString(), + Resolution = (int)qualityResult.Resolution, + Modifier = qualityResult.Modifier.ToString(), + Revision = new RevisionResponse + { + Version = qualityResult.Revision.Version, + Real = qualityResult.Revision.Real, + IsRepack = qualityResult.Revision.IsRepack + }, + Languages = languages.Select(l => l.ToString()).ToList(), + ReleaseGroup = releaseGroup, + MovieTitles = titleInfo?.MovieTitles ?? new List<string>(), + Year = titleInfo?.Year ?? 0, + Edition = titleInfo?.Edition, + ImdbId = titleInfo?.ImdbId, + TmdbId = titleInfo?.TmdbId ?? 0, + HardcodedSubs = titleInfo?.HardcodedSubs, + ReleaseHash = titleInfo?.ReleaseHash, + Episode = null + }); + } + else // series + { + var episodeInfo = EpisodeParser.ParseTitle(request.Title); + return Results.Ok(new ParseResponse + { + Title = request.Title, + Type = "series", + Source = qualityResult.Source.ToString(), + Resolution = (int)qualityResult.Resolution, + Modifier = qualityResult.Modifier.ToString(), + Revision = new RevisionResponse + { + Version = qualityResult.Revision.Version, + Real = qualityResult.Revision.Real, + IsRepack = qualityResult.Revision.IsRepack + }, + Languages = languages.Select(l => l.ToString()).ToList(), + ReleaseGroup = releaseGroup, + MovieTitles = new List<string>(), + Year = 0, + Edition = null, + ImdbId = null, + TmdbId = 0, + HardcodedSubs = null, + ReleaseHash = null, + Episode = episodeInfo != null ? new EpisodeResponse + { + SeriesTitle = episodeInfo.SeriesTitle, + SeasonNumber = episodeInfo.SeasonNumber, + EpisodeNumbers = episodeInfo.EpisodeNumbers.ToList(), + AbsoluteEpisodeNumbers = episodeInfo.AbsoluteEpisodeNumbers.ToList(), + AirDate = episodeInfo.AirDate, + FullSeason = episodeInfo.FullSeason, + IsPartialSeason = episodeInfo.IsPartialSeason, + IsMultiSeason = episodeInfo.IsMultiSeason, + IsMiniSeries = episodeInfo.IsMiniSeries, + Special = episodeInfo.Special, + ReleaseType = episodeInfo.ReleaseType.ToString() + } : null + }); + } +}); + +app.MapGet("/health", () => Results.Ok(new { status = "healthy" })); + +app.Run(); + +public record ParseRequest(string Title, string? Type); + +public record ParseResponse +{ + public string Title { get; init; } = ""; + public string Type { get; init; } = ""; + public string Source { get; init; } = ""; + public int Resolution { get; init; } + public string Modifier { get; init; } = ""; + public RevisionResponse Revision { get; init; } = new(); + public List<string> Languages { get; init; } = new(); + public string? ReleaseGroup { get; init; } + public List<string> MovieTitles { get; init; } = new(); + public int Year { get; init; } + public string? Edition { get; init; } + public string? ImdbId { get; init; } + public int TmdbId { get; init; } + public string? HardcodedSubs { get; init; } + public string? ReleaseHash { get; init; } + public EpisodeResponse? Episode { get; init; } +} + +public record RevisionResponse +{ + public int Version { get; init; } = 1; + public int Real { get; init; } + public bool IsRepack { get; init; } +} + +public record EpisodeResponse +{ + public string? SeriesTitle { get; init; } + public int SeasonNumber { get; init; } + public List<int> EpisodeNumbers { get; init; } = new(); + public List<int> AbsoluteEpisodeNumbers { get; init; } = new(); + public string? AirDate { get; init; } + public bool FullSeason { get; init; } + public bool IsPartialSeason { get; init; } + public bool IsMultiSeason { get; init; } + public bool IsMiniSeries { get; init; } + public bool Special { get; init; } + public string ReleaseType { get; init; } = "Unknown"; +} diff --git a/services/parser/docker-compose.yml b/services/parser/docker-compose.yml new file mode 100644 index 0000000..a235f31 --- /dev/null +++ b/services/parser/docker-compose.yml @@ -0,0 +1,14 @@ +services: + parser: + container_name: profilarr_parser + build: . + ports: + - "5000:5000" + environment: + - ASPNETCORE_ENVIRONMENT=Development + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:5000/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 10s diff --git a/src/lib/server/utils/arr/parser/client.ts b/src/lib/server/utils/arr/parser/client.ts new file mode 100644 index 0000000..56eb679 --- /dev/null +++ b/src/lib/server/utils/arr/parser/client.ts @@ -0,0 +1,136 @@ +/** + * Parser Service Client + * Calls the C# parser microservice + */ + +import { config } from '$config'; +import { + QualitySource, + QualityModifier, + Language, + ReleaseType, + type QualityInfo, + type ParseResult, + type EpisodeInfo, + type Resolution, + type MediaType +} from './types.ts'; + +interface EpisodeResponse { + seriesTitle: string | null; + seasonNumber: number; + episodeNumbers: number[]; + absoluteEpisodeNumbers: number[]; + airDate: string | null; + fullSeason: boolean; + isPartialSeason: boolean; + isMultiSeason: boolean; + isMiniSeries: boolean; + special: boolean; + releaseType: string; +} + +interface ParseResponse { + title: string; + type: MediaType; + source: string; + resolution: number; + modifier: string; + revision: { + version: number; + real: number; + isRepack: boolean; + }; + languages: string[]; + releaseGroup: string | null; + movieTitles: string[]; + year: number; + edition: string | null; + imdbId: string | null; + tmdbId: number; + hardcodedSubs: string | null; + releaseHash: string | null; + episode: EpisodeResponse | null; +} + +/** + * Parse a release title - returns quality, resolution, modifier, revision, and languages + * @param title - The release title to parse + * @param type - The media type: 'movie' or 'series' + */ +export async function parse(title: string, type: MediaType): Promise<ParseResult> { + const res = await fetch(`${config.parserUrl}/parse`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ title, type }) + }); + + if (!res.ok) { + throw new Error(`Parser error: ${res.status}`); + } + + const data: ParseResponse = await res.json(); + + return { + title: data.title, + type: data.type, + source: QualitySource[data.source as keyof typeof QualitySource] ?? QualitySource.Unknown, + resolution: data.resolution as Resolution, + modifier: + QualityModifier[data.modifier as keyof typeof QualityModifier] ?? QualityModifier.None, + revision: data.revision, + languages: data.languages.map( + (l) => Language[l as keyof typeof Language] ?? Language.Unknown + ), + releaseGroup: data.releaseGroup, + movieTitles: data.movieTitles, + year: data.year, + edition: data.edition, + imdbId: data.imdbId, + tmdbId: data.tmdbId, + hardcodedSubs: data.hardcodedSubs, + releaseHash: data.releaseHash, + episode: data.episode + ? { + seriesTitle: data.episode.seriesTitle, + seasonNumber: data.episode.seasonNumber, + episodeNumbers: data.episode.episodeNumbers, + absoluteEpisodeNumbers: data.episode.absoluteEpisodeNumbers, + airDate: data.episode.airDate, + fullSeason: data.episode.fullSeason, + isPartialSeason: data.episode.isPartialSeason, + isMultiSeason: data.episode.isMultiSeason, + isMiniSeries: data.episode.isMiniSeries, + special: data.episode.special, + releaseType: + ReleaseType[data.episode.releaseType as keyof typeof ReleaseType] ?? + ReleaseType.Unknown + } + : null + }; +} + +/** + * Parse quality info from a release title (legacy - use parse() for full results) + */ +export async function parseQuality(title: string, type: MediaType): Promise<QualityInfo> { + const result = await parse(title, type); + return { + source: result.source, + resolution: result.resolution, + modifier: result.modifier, + revision: result.revision + }; +} + +/** + * Check parser service health + */ +export async function isParserHealthy(): Promise<boolean> { + try { + const res = await fetch(`${config.parserUrl}/health`); + return res.ok; + } catch { + return false; + } +} diff --git a/src/lib/server/utils/arr/parser/index.ts b/src/lib/server/utils/arr/parser/index.ts new file mode 100644 index 0000000..6bf4c5e --- /dev/null +++ b/src/lib/server/utils/arr/parser/index.ts @@ -0,0 +1,7 @@ +/** + * Release Title Parser + * Client for the C# parser microservice + */ + +export * from './types.ts'; +export { parse, parseQuality, isParserHealthy } from './client.ts'; diff --git a/src/lib/server/utils/arr/parser/types.ts b/src/lib/server/utils/arr/parser/types.ts new file mode 100644 index 0000000..08c251e --- /dev/null +++ b/src/lib/server/utils/arr/parser/types.ts @@ -0,0 +1,154 @@ +/** + * Parser Types + * Matches the C# parser microservice types + */ + +export enum QualitySource { + Unknown = 0, + Cam = 1, + Telesync = 2, + Telecine = 3, + Workprint = 4, + DVD = 5, + TV = 6, + WebDL = 7, + WebRip = 8, + Bluray = 9 +} + +export enum QualityModifier { + None = 0, + Regional = 1, + Screener = 2, + RawHD = 3, + BRDisk = 4, + Remux = 5 +} + +export enum Resolution { + Unknown = 0, + R360p = 360, + R480p = 480, + R540p = 540, + R576p = 576, + R720p = 720, + R1080p = 1080, + R2160p = 2160 +} + +export enum Language { + Unknown = 0, + English = 1, + French = 2, + Spanish = 3, + German = 4, + Italian = 5, + Danish = 6, + Dutch = 7, + Japanese = 8, + Icelandic = 9, + Chinese = 10, + Russian = 11, + Polish = 12, + Vietnamese = 13, + Swedish = 14, + Norwegian = 15, + Finnish = 16, + Turkish = 17, + Portuguese = 18, + Flemish = 19, + Greek = 20, + Korean = 21, + Hungarian = 22, + Hebrew = 23, + Lithuanian = 24, + Czech = 25, + Hindi = 26, + Romanian = 27, + Thai = 28, + Bulgarian = 29, + PortugueseBR = 30, + Arabic = 31, + Ukrainian = 32, + Persian = 33, + Bengali = 34, + Slovak = 35, + Latvian = 36, + SpanishLatino = 37, + Catalan = 38, + Croatian = 39, + Serbian = 40, + Bosnian = 41, + Estonian = 42, + Tamil = 43, + Indonesian = 44, + Telugu = 45, + Macedonian = 46, + Slovenian = 47, + Malayalam = 48, + Kannada = 49, + Albanian = 50, + Afrikaans = 51, + Marathi = 52, + Tagalog = 53, + Urdu = 54, + Romansh = 55, + Mongolian = 56, + Georgian = 57, + Original = 58 +} + +export enum ReleaseType { + Unknown = 0, + SingleEpisode = 1, + MultiEpisode = 2, + SeasonPack = 3 +} + +export interface Revision { + version: number; + real: number; + isRepack: boolean; +} + +export interface QualityInfo { + source: QualitySource; + resolution: Resolution; + modifier: QualityModifier; + revision: Revision; +} + +export interface EpisodeInfo { + seriesTitle: string | null; + seasonNumber: number; + episodeNumbers: number[]; + absoluteEpisodeNumbers: number[]; + airDate: string | null; + fullSeason: boolean; + isPartialSeason: boolean; + isMultiSeason: boolean; + isMiniSeries: boolean; + special: boolean; + releaseType: ReleaseType; +} + +export type MediaType = 'movie' | 'series'; + +export interface ParseResult { + title: string; + type: MediaType; + source: QualitySource; + resolution: Resolution; + modifier: QualityModifier; + revision: Revision; + languages: Language[]; + releaseGroup: string | null; + movieTitles: string[]; + year: number; + edition: string | null; + imdbId: string | null; + tmdbId: number; + hardcodedSubs: string | null; + releaseHash: string | null; + episode: EpisodeInfo | null; +} diff --git a/src/lib/server/utils/config/config.ts b/src/lib/server/utils/config/config.ts index e68a5ab..bf56523 100644 --- a/src/lib/server/utils/config/config.ts +++ b/src/lib/server/utils/config/config.ts @@ -5,6 +5,7 @@ class Config { private basePath: string; public readonly timezone: string; + public readonly parserUrl: string; constructor() { // Default base path logic: @@ -24,6 +25,11 @@ class Config { // 1. Check TZ environment variable // 2. Fall back to system timezone this.timezone = Deno.env.get('TZ') || Intl.DateTimeFormat().resolvedOptions().timeZone; + + // Parser service configuration + const parserHost = Deno.env.get('PARSER_HOST') || 'localhost'; + const parserPort = Deno.env.get('PARSER_PORT') || '5000'; + this.parserUrl = `http://${parserHost}:${parserPort}`; } /**