From ef86fa251f8979917514af6a778bff72eb8bfc40 Mon Sep 17 00:00:00 2001 From: Sam Chau Date: Wed, 27 Aug 2025 01:34:10 +0930 Subject: [PATCH] feat(regex): add .NET regex validation via PowerShell and integrate into frontend --- Dockerfile | 19 ++++- backend/Dockerfile | 14 ++++ backend/app/data/__init__.py | 26 +++++++ backend/app/data/utils.py | 64 ++++++++++++++++ backend/scripts/validate.ps1 | 73 +++++++++++++++++++ frontend/src/api/data.js | 12 ++- .../src/components/regex/RegexGeneralTab.jsx | 58 ++++++++++++--- frontend/src/hooks/useRegexModal.js | 13 ++++ 8 files changed, 266 insertions(+), 13 deletions(-) create mode 100755 backend/scripts/validate.ps1 diff --git a/Dockerfile b/Dockerfile index 3edf8c5..6bb99f5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,13 +1,28 @@ # Dockerfile FROM python:3.9-slim WORKDIR /app -# Install git and gosu for user switching -RUN apt-get update && apt-get install -y git gosu && rm -rf /var/lib/apt/lists/* +# Install git, gosu, and PowerShell Core +RUN apt-get update && apt-get install -y \ + git \ + gosu \ + wget \ + ca-certificates \ + libicu-dev \ + && wget -O /tmp/powershell.tar.gz https://github.com/PowerShell/PowerShell/releases/download/v7.4.0/powershell-7.4.0-linux-x64.tar.gz \ + && mkdir -p /opt/microsoft/powershell/7 \ + && tar zxf /tmp/powershell.tar.gz -C /opt/microsoft/powershell/7 \ + && chmod +x /opt/microsoft/powershell/7/pwsh \ + && ln -s /opt/microsoft/powershell/7/pwsh /usr/bin/pwsh \ + && rm /tmp/powershell.tar.gz \ + && rm -rf /var/lib/apt/lists/* # Copy pre-built files from dist directory COPY dist/backend/app ./app +COPY dist/backend/scripts ./app/scripts COPY dist/static ./app/static COPY dist/requirements.txt . RUN pip install --no-cache-dir -r requirements.txt +# Ensure scripts are executable +RUN chmod +x /app/scripts/*.ps1 || true # Copy and setup entrypoint script COPY entrypoint.sh /entrypoint.sh RUN chmod +x /entrypoint.sh diff --git a/backend/Dockerfile b/backend/Dockerfile index 6e6b528..453bc4f 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -1,7 +1,21 @@ FROM python:3.9 WORKDIR /app +# Install PowerShell Core +RUN apt-get update && apt-get install -y \ + wget \ + ca-certificates \ + libicu-dev \ + && wget -O /tmp/powershell.tar.gz https://github.com/PowerShell/PowerShell/releases/download/v7.4.0/powershell-7.4.0-linux-x64.tar.gz \ + && mkdir -p /opt/microsoft/powershell/7 \ + && tar zxf /tmp/powershell.tar.gz -C /opt/microsoft/powershell/7 \ + && chmod +x /opt/microsoft/powershell/7/pwsh \ + && ln -s /opt/microsoft/powershell/7/pwsh /usr/bin/pwsh \ + && rm /tmp/powershell.tar.gz \ + && rm -rf /var/lib/apt/lists/* COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt COPY . . +# Ensure scripts are executable +RUN chmod +x /app/scripts/*.ps1 || true # Use gunicorn with 10-minute timeout CMD ["python", "-m", "app.main"] \ No newline at end of file diff --git a/backend/app/data/__init__.py b/backend/app/data/__init__.py index 0267b9d..ab6bbdb 100644 --- a/backend/app/data/__init__.py +++ b/backend/app/data/__init__.py @@ -226,6 +226,32 @@ def handle_item(category, name): return jsonify({"error": "An unexpected error occurred"}), 500 +@bp.route('/regex/verify', methods=['POST']) +def verify_regex(): + """Verify a regex pattern using .NET regex engine via PowerShell""" + try: + data = request.get_json() + if not data: + return jsonify({"error": "No JSON data provided"}), 400 + + pattern = data.get('pattern') + if not pattern: + return jsonify({"error": "Pattern is required"}), 400 + + from .utils import verify_dotnet_regex + + success, message = verify_dotnet_regex(pattern) + + if success: + return jsonify({"valid": True, "message": "Pattern is valid"}), 200 + else: + return jsonify({"valid": False, "error": message}), 200 + + except Exception as e: + logger.exception("Error verifying regex pattern") + return jsonify({"valid": False, "error": str(e)}), 500 + + @bp.route('//test', methods=['POST']) def run_tests(category): logger.info(f"Received test request for category: {category}") diff --git a/backend/app/data/utils.py b/backend/app/data/utils.py index 41d53b8..fefc40c 100644 --- a/backend/app/data/utils.py +++ b/backend/app/data/utils.py @@ -7,6 +7,8 @@ from typing import Dict, List, Any, Tuple, Union import git import regex import logging +import subprocess +import json from ..db.queries.arr import update_arr_config_on_rename, update_arr_config_on_delete logger = logging.getLogger(__name__) @@ -360,6 +362,68 @@ def check_delete_constraints(category: str, name: str) -> Tuple[bool, str]: return False, f"Error checking references: {str(e)}" +def verify_dotnet_regex(pattern: str) -> Tuple[bool, str]: + """ + Verify a regex pattern using .NET regex engine via PowerShell. + Returns (success, message) tuple. + """ + try: + # Get the path to the validate.ps1 script + # In Docker, the structure is /app/app/data/utils.py and script is at /app/scripts/validate.ps1 + script_path = os.path.join('/app', 'scripts', 'validate.ps1') + if not os.path.exists(script_path): + # Fallback for local development + script_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'scripts', 'validate.ps1') + + # Run PowerShell script, passing pattern via stdin to avoid shell escaping issues + result = subprocess.run( + ['pwsh', '-File', script_path], + input=pattern, + capture_output=True, + text=True, + timeout=5 + ) + + if result.returncode != 0 and not result.stdout: + logger.error(f"PowerShell script failed: {result.stderr}") + return False, "Failed to validate pattern" + + # Log the raw output for debugging + logger.debug(f"PowerShell output: {result.stdout}") + + # Parse JSON output + try: + output = json.loads(result.stdout.strip()) + except json.JSONDecodeError: + # Try to find JSON in the output + lines = result.stdout.strip().split('\n') + for line in reversed(lines): + if line.strip(): + try: + output = json.loads(line) + break + except json.JSONDecodeError: + continue + else: + logger.error(f"No valid JSON found in output: {result.stdout}") + return False, "Failed to parse validation result" + + if output.get('valid'): + return True, output.get('message', 'Pattern is valid') + else: + return False, output.get('error', 'Invalid pattern') + + except subprocess.TimeoutExpired: + logger.error("Pattern validation timed out") + return False, "Pattern validation timed out" + except FileNotFoundError: + logger.error("PowerShell (pwsh) not found") + return False, "PowerShell is not available" + except Exception as e: + logger.error(f"Error validating pattern: {e}") + return False, f"Validation error: {str(e)}" + + def update_references(category: str, old_name: str, new_name: str) -> List[str]: """ diff --git a/backend/scripts/validate.ps1 b/backend/scripts/validate.ps1 new file mode 100755 index 0000000..07e9dc9 --- /dev/null +++ b/backend/scripts/validate.ps1 @@ -0,0 +1,73 @@ +#!/usr/bin/env pwsh +# Validate a .NET regex pattern + +param( + [Parameter(Mandatory=$false)] + [string]$Pattern +) + +# Set output encoding to UTF-8 +[Console]::OutputEncoding = [System.Text.Encoding]::UTF8 +$ErrorActionPreference = "Stop" + +# Read pattern from stdin if not provided as parameter +if (-not $Pattern) { + $Pattern = [System.Console]::In.ReadToEnd() +} + +# Ensure we have a pattern +if ([string]::IsNullOrWhiteSpace($Pattern)) { + $result = @{ + valid = $false + error = "No pattern provided" + } + Write-Output (ConvertTo-Json $result -Compress) + exit 0 +} + +try { + # Attempt to create a .NET Regex object with the pattern + # Using IgnoreCase option as per requirement + $regex = [System.Text.RegularExpressions.Regex]::new($Pattern, [System.Text.RegularExpressions.RegexOptions]::IgnoreCase) + + # If we get here, the pattern is valid + $result = @{ + valid = $true + message = "Pattern is valid .NET regex" + } + + Write-Output (ConvertTo-Json $result -Compress) + exit 0 +} +catch { + # Pattern is invalid, extract the meaningful part of the error message + $errorMessage = $_.Exception.Message + + # Try to extract just the useful part of .NET regex errors + if ($errorMessage -match "Invalid pattern '.*?' at offset (\d+)\. (.+)") { + $errorMessage = "At position $($matches[1]): $($matches[2])" + } + elseif ($errorMessage -match 'parsing ".*?" - (.+)') { + $errorMessage = $matches[1] + } + elseif ($errorMessage -match 'Exception calling .* with .* argument\(s\): "(.+)"') { + $innerError = $matches[1] + if ($innerError -match "Invalid pattern '.*?' at offset (\d+)\. (.+)") { + $errorMessage = "At position $($matches[1]): $($matches[2])" + } + else { + $errorMessage = $innerError + } + } + + # Remove any trailing quotes or periods followed by quotes + $errorMessage = $errorMessage -replace '\."$', '.' -replace '"$', '' + + $result = @{ + valid = $false + error = $errorMessage + } + + Write-Output (ConvertTo-Json $result -Compress) + exit 0 +} \ No newline at end of file diff --git a/frontend/src/api/data.js b/frontend/src/api/data.js index 526f670..1d9dfa5 100644 --- a/frontend/src/api/data.js +++ b/frontend/src/api/data.js @@ -301,5 +301,15 @@ export const RegexPatterns = { update: (name, data, newName) => updateItem('regex_pattern', name, data, newName), delete: name => deleteItem('regex_pattern', name), - runTests: createSpecialEndpoint('regex_pattern', 'test') + runTests: createSpecialEndpoint('regex_pattern', 'test'), + verify: async pattern => { + try { + const response = await axios.post(`${BASE_URL}/regex/verify`, { + pattern + }); + return response.data; + } catch (error) { + throw handleError(error, 'verify regex pattern'); + } + } }; diff --git a/frontend/src/components/regex/RegexGeneralTab.jsx b/frontend/src/components/regex/RegexGeneralTab.jsx index b18db9f..06ab2a9 100644 --- a/frontend/src/components/regex/RegexGeneralTab.jsx +++ b/frontend/src/components/regex/RegexGeneralTab.jsx @@ -2,7 +2,9 @@ import React, {useState} from 'react'; import PropTypes from 'prop-types'; import MarkdownEditor from '@ui/MarkdownEditor'; import AddButton from '@ui/DataBar/AddButton'; -import {InfoIcon} from 'lucide-react'; +import {Regex, Loader} from 'lucide-react'; +import {RegexPatterns} from '@api/data'; +import Alert from '@ui/Alert'; const RegexGeneralTab = ({ name, @@ -18,6 +20,7 @@ const RegexGeneralTab = ({ patternError }) => { const [newTag, setNewTag] = useState(''); + const [validating, setValidating] = useState(false); const handleAddTag = () => { if (newTag.trim() && !tags.includes(newTag.trim())) { @@ -33,6 +36,30 @@ const RegexGeneralTab = ({ } }; + const handleValidatePattern = async () => { + if (!pattern?.trim()) { + Alert.warning('Please enter a pattern to validate'); + return; + } + + setValidating(true); + + try { + const result = await RegexPatterns.verify(pattern); + + if (result.valid) { + Alert.success('Pattern is valid .NET regex'); + } else { + Alert.error(result.error || 'Invalid pattern'); + } + } catch (error) { + console.error('Validation error:', error); + Alert.error('Failed to validate pattern'); + } finally { + setValidating(false); + } + }; + return (
{error && ( @@ -89,17 +116,28 @@ const RegexGeneralTab = ({
- -
- - Case insensitive PCRE2 +
+ +

+ Enter your regular expression pattern (case-insensitive .NET) +

+
-

- Enter your regular expression pattern -

{patternError && (

diff --git a/frontend/src/hooks/useRegexModal.js b/frontend/src/hooks/useRegexModal.js index 2a730db..4ddb9c7 100644 --- a/frontend/src/hooks/useRegexModal.js +++ b/frontend/src/hooks/useRegexModal.js @@ -65,6 +65,19 @@ export const useRegexModal = (initialPattern, onSave) => { return; } + // Validate pattern with .NET regex engine + try { + const validationResult = await RegexPatterns.verify(patternValue); + if (!validationResult.valid) { + Alert.error(`Invalid regex pattern: ${validationResult.error || 'Pattern validation failed'}`); + return; + } + } catch (error) { + console.error('Pattern validation error:', error); + Alert.error('Failed to validate pattern. Please check the pattern and try again.'); + return; + } + try { const data = { name,