From c476775bc2aac6183384d69708123a7e963dad7d Mon Sep 17 00:00:00 2001 From: Sam Chau Date: Mon, 19 Jan 2026 10:41:21 +1030 Subject: [PATCH] feat: add SQL validation method to PCDCache and integrate it into writeOperation as final lod --- scripts/validate-condition-values.ts | 577 +++++++++++++++++++++++++++ src/lib/server/pcd/cache.ts | 66 +++ src/lib/server/pcd/writer.ts | 44 +- 3 files changed, 684 insertions(+), 3 deletions(-) create mode 100644 scripts/validate-condition-values.ts diff --git a/scripts/validate-condition-values.ts b/scripts/validate-condition-values.ts new file mode 100644 index 0000000..e2b627d --- /dev/null +++ b/scripts/validate-condition-values.ts @@ -0,0 +1,577 @@ +#!/usr/bin/env -S deno run --allow-read +/** + * Validates condition values in a PCD's 1.initial.sql against + * the expected values in conditionTypes.ts + * + * Usage: deno run --allow-read scripts/validate-condition-values.ts + */ + +import { + SOURCE_VALUES, + RESOLUTION_VALUES, + QUALITY_MODIFIER_VALUES, + RELEASE_TYPE_VALUES, + INDEXER_FLAG_VALUES, + CONDITION_TYPES +} from '../src/lib/shared/conditionTypes.ts'; + +// Build sets of valid values +const VALID_VALUES: Record> = { + source: new Set(SOURCE_VALUES.map((v) => v.value)), + resolution: new Set(RESOLUTION_VALUES.map((v) => v.value)), + quality_modifier: new Set(QUALITY_MODIFIER_VALUES.map((v) => v.value)), + release_type: new Set(RELEASE_TYPE_VALUES.map((v) => v.value)), + indexer_flag: new Set(INDEXER_FLAG_VALUES.map((v) => v.value)) +}; + +const VALID_CONDITION_TYPES = new Set(CONDITION_TYPES.map((t) => t.value)); + +// Table name to condition type mapping +const TABLE_TO_TYPE: Record = { + condition_sources: 'source', + condition_resolutions: 'resolution', + condition_quality_modifiers: 'quality_modifier', + condition_release_types: 'release_type', + condition_indexer_flags: 'indexer_flag' +}; + +interface Mismatch { + table: string; + type: string; + value: string; + line: number; + validValues: string[]; +} + +interface TypeMismatch { + value: string; + line: number; + validTypes: string[]; +} + +interface PatternMismatch { + formatName: string; + conditionName: string; + patternName: string; + line: number; +} + +interface LanguageMismatch { + formatName: string; + conditionName: string; + languageName: string; + line: number; +} + +interface FKMismatch { + table: string; + referencedTable: string; + referencedColumn: string; + value: string; + line: number; +} + +interface ConditionDataMismatch { + formatName: string; + conditionName: string; + conditionType: string; + expectedTable: string; + line: number; +} + +async function validateSqlContent(content: string) { + + const mismatches: Mismatch[] = []; + const typeMismatches: TypeMismatch[] = []; + const patternMismatches: PatternMismatch[] = []; + const languageMismatches: LanguageMismatch[] = []; + const fkMismatches: FKMismatch[] = []; + const conditionDataMismatches: ConditionDataMismatch[] = []; + + const foundValues: Record> = { + source: new Set(), + resolution: new Set(), + quality_modifier: new Set(), + release_type: new Set(), + indexer_flag: new Set() + }; + const foundConditionTypes = new Set(); + + // Track defined entities + const definedPatterns = new Set(); + const definedLanguages = new Set(); + const definedCustomFormats = new Set(); + const definedTags = new Set(); + + // Track references for validation + const referencedPatterns: { formatName: string; conditionName: string; patternName: string; line: number }[] = []; + const referencedLanguages: { formatName: string; conditionName: string; languageName: string; line: number }[] = []; + + // Track conditions and their types for data integrity check + const conditionDefinitions: Map = + new Map(); + const conditionDataEntries: Set = new Set(); // "formatName|conditionName" for conditions that have data + + // Condition type to table mapping + const CONDITION_TYPE_TO_TABLE: Record = { + release_title: 'condition_patterns', + release_group: 'condition_patterns', + edition: 'condition_patterns', + language: 'condition_languages', + source: 'condition_sources', + resolution: 'condition_resolutions', + quality_modifier: 'condition_quality_modifiers', + release_type: 'condition_release_types', + indexer_flag: 'condition_indexer_flags', + size: 'condition_sizes', + year: 'condition_years' + }; + + // Process content by joining INSERT lines with their VALUES/SELECT lines + // SQL format: INSERT INTO table (...)\nVALUES (...); or INSERT INTO table (...)\nSELECT ...; + let joinedContent = content.replace( + /INSERT INTO (\w+)\s*\([^)]+\)\s*\n\s*VALUES/gi, + 'INSERT INTO $1 VALUES' + ); + // Also join INSERT...SELECT statements + joinedContent = joinedContent.replace( + /INSERT INTO (\w+)\s*\([^)]+\)\s*\n\s*SELECT/gi, + 'INSERT INTO $1 SELECT' + ); + const joinedLines = joinedContent.split('\n'); + + // Patterns to match INSERT...VALUES statements (now on same line after joining) + // Format: INSERT INTO condition_sources VALUES ('format', 'condition', 'value'); + const patterns: Record = { + condition_sources: /INSERT INTO condition_sources VALUES\s*\('[^']+',\s*'[^']+',\s*'([^']+)'\)/i, + condition_resolutions: + /INSERT INTO condition_resolutions VALUES\s*\('[^']+',\s*'[^']+',\s*'([^']+)'\)/i, + condition_quality_modifiers: + /INSERT INTO condition_quality_modifiers VALUES\s*\('[^']+',\s*'[^']+',\s*'([^']+)'\)/i, + condition_release_types: + /INSERT INTO condition_release_types VALUES\s*\('[^']+',\s*'[^']+',\s*'([^']+)'\)/i, + condition_indexer_flags: + /INSERT INTO condition_indexer_flags VALUES\s*\('[^']+',\s*'[^']+',\s*'([^']+)'\)/i + }; + + // Pattern for condition types in custom_format_conditions + // Format: INSERT INTO custom_format_conditions (...)\nSELECT cf.name, 'name', 'type', 'arrType', negate, required + // After joining: INSERT INTO custom_format_conditions SELECT cf.name, 'name', 'type', ... + const conditionTypePattern = + /INSERT INTO custom_format_conditions\s+SELECT\s+cf\.name,\s*'[^']+',\s*'([^']+)'/i; + + // Pattern for regular_expressions definitions + // Format: INSERT INTO regular_expressions (name, pattern, description) VALUES ('name', 'pattern', 'desc'); + const regexDefPattern = /INSERT INTO regular_expressions.*VALUES\s*\('([^']+)'/i; + + // Pattern for languages INSERT start + const languageInsertStartPattern = /INSERT INTO languages.*VALUES/i; + // Pattern for language value rows: ('English'), + const languageValuePattern = /^\s*\('([^']+)'\)[,;]?\s*$/; + + // Pattern for custom_formats definitions + // Format: INSERT INTO custom_formats (name, ...) VALUES ('name', ...); + const customFormatDefPattern = /INSERT INTO custom_formats.*VALUES\s*\('([^']+)'/i; + + // Pattern for tags definitions + // Format: INSERT INTO tags (name) VALUES ('name'); + const tagDefPattern = /INSERT INTO tags.*VALUES\s*\('([^']+)'\)/i; + + // Pattern for condition_patterns references (multi-line, need to track context) + // Format: INSERT INTO condition_patterns ... SELECT 'format', 'condition', re.name ... WHERE re.name = 'pattern'; + const conditionPatternSelectPattern = + /INSERT INTO condition_patterns\s+SELECT\s+'([^']+)',\s*'([^']+)'/i; + const wherePatternPattern = /WHERE\s+re\.name\s*=\s*'([^']+)'/i; + + // Pattern for condition_languages references + // Format: INSERT INTO condition_languages ... SELECT 'format', 'condition', l.name ... WHERE l.name = 'lang'; + const conditionLanguageSelectPattern = + /INSERT INTO condition_languages\s+SELECT\s+'([^']+)',\s*'([^']+)'/i; + const whereLanguagePattern = /WHERE\s+l\.name\s*=\s*'([^']+)'/i; + + // Pattern for custom_format_conditions + // After joining: INSERT INTO custom_format_conditions SELECT cf.name, 'conditionName', 'type', 'arrType', ... + const conditionDefPattern = + /INSERT INTO custom_format_conditions\s+SELECT\s+cf\.name,\s*'([^']+)',\s*'([^']+)'/i; + // Also need to capture the format name from WHERE clause + const whereFormatPattern = /WHERE\s+cf\.name\s*=\s*'([^']+)'/i; + + // Patterns for condition data tables (to track which conditions have data) + const conditionDataPatterns: Record = { + condition_sources: /INSERT INTO condition_sources VALUES\s*\('([^']+)',\s*'([^']+)'/i, + condition_resolutions: /INSERT INTO condition_resolutions VALUES\s*\('([^']+)',\s*'([^']+)'/i, + condition_quality_modifiers: + /INSERT INTO condition_quality_modifiers VALUES\s*\('([^']+)',\s*'([^']+)'/i, + condition_release_types: /INSERT INTO condition_release_types VALUES\s*\('([^']+)',\s*'([^']+)'/i, + condition_indexer_flags: /INSERT INTO condition_indexer_flags VALUES\s*\('([^']+)',\s*'([^']+)'/i, + condition_sizes: /INSERT INTO condition_sizes VALUES\s*\('([^']+)',\s*'([^']+)'/i, + condition_years: /INSERT INTO condition_years VALUES\s*\('([^']+)',\s*'([^']+)'/i + }; + + let currentConditionPatternContext: { formatName: string; conditionName: string } | null = null; + let currentConditionLanguageContext: { formatName: string; conditionName: string } | null = null; + let currentConditionDefContext: { conditionName: string; type: string; line: number } | null = null; + let inLanguagesInsert = false; + + for (let i = 0; i < joinedLines.length; i++) { + const line = joinedLines[i]; + const lineNum = i + 1; + + // ========== ENTITY DEFINITIONS ========== + + // Check for regular_expressions definitions + const regexDefMatch = line.match(regexDefPattern); + if (regexDefMatch) { + definedPatterns.add(regexDefMatch[1]); + } + + // Check for languages definitions (multi-value INSERT format) + if (line.match(languageInsertStartPattern)) { + inLanguagesInsert = true; + } + if (inLanguagesInsert) { + const languageMatch = line.match(languageValuePattern); + if (languageMatch) { + definedLanguages.add(languageMatch[1]); + } + // End of INSERT block (line ends with semicolon or next INSERT starts) + if (line.includes(';') || (line.match(/^INSERT/i) && !line.match(languageInsertStartPattern))) { + inLanguagesInsert = false; + } + } + + // Check for custom_formats definitions + const customFormatDefMatch = line.match(customFormatDefPattern); + if (customFormatDefMatch) { + definedCustomFormats.add(customFormatDefMatch[1]); + } + + // Check for tags definitions + const tagDefMatch = line.match(tagDefPattern); + if (tagDefMatch) { + definedTags.add(tagDefMatch[1]); + } + + // ========== CONDITION DEFINITIONS ========== + + // Check for condition definition (to track type) + const conditionDefMatch = line.match(conditionDefPattern); + if (conditionDefMatch) { + currentConditionDefContext = { + conditionName: conditionDefMatch[1], + type: conditionDefMatch[2], + line: lineNum + }; + } + + // Capture format name for condition definition + const whereFormatMatch = line.match(whereFormatPattern); + if (whereFormatMatch && currentConditionDefContext) { + const key = `${whereFormatMatch[1]}|${currentConditionDefContext.conditionName}`; + conditionDefinitions.set(key, { + formatName: whereFormatMatch[1], + conditionName: currentConditionDefContext.conditionName, + type: currentConditionDefContext.type, + line: currentConditionDefContext.line + }); + currentConditionDefContext = null; + } + + // ========== PATTERN REFERENCES ========== + + // Check for condition_patterns SELECT (captures format and condition names) + const conditionPatternMatch = line.match(conditionPatternSelectPattern); + if (conditionPatternMatch) { + currentConditionPatternContext = { + formatName: conditionPatternMatch[1], + conditionName: conditionPatternMatch[2] + }; + // Mark that this condition has data + conditionDataEntries.add(`${conditionPatternMatch[1]}|${conditionPatternMatch[2]}`); + } + + // Check for WHERE re.name = 'pattern' (the actual pattern reference) + const wherePatternMatch = line.match(wherePatternPattern); + if (wherePatternMatch && currentConditionPatternContext) { + referencedPatterns.push({ + formatName: currentConditionPatternContext.formatName, + conditionName: currentConditionPatternContext.conditionName, + patternName: wherePatternMatch[1], + line: lineNum + }); + currentConditionPatternContext = null; + } + + // ========== LANGUAGE REFERENCES ========== + + // Check for condition_languages SELECT + const conditionLanguageMatch = line.match(conditionLanguageSelectPattern); + if (conditionLanguageMatch) { + currentConditionLanguageContext = { + formatName: conditionLanguageMatch[1], + conditionName: conditionLanguageMatch[2] + }; + // Mark that this condition has data + conditionDataEntries.add(`${conditionLanguageMatch[1]}|${conditionLanguageMatch[2]}`); + } + + // Check for WHERE l.name = 'language' + const whereLanguageMatch = line.match(whereLanguagePattern); + if (whereLanguageMatch && currentConditionLanguageContext) { + referencedLanguages.push({ + formatName: currentConditionLanguageContext.formatName, + conditionName: currentConditionLanguageContext.conditionName, + languageName: whereLanguageMatch[1], + line: lineNum + }); + currentConditionLanguageContext = null; + } + + // ========== CONDITION DATA TABLES ========== + + // Check condition data tables (to track which conditions have data) + for (const [_table, pattern] of Object.entries(conditionDataPatterns)) { + const match = line.match(pattern); + if (match) { + conditionDataEntries.add(`${match[1]}|${match[2]}`); + } + } + + // ========== CONDITION TYPES ========== + + // Check condition types + const typeMatch = line.match(conditionTypePattern); + if (typeMatch) { + const conditionType = typeMatch[1]; // type is the 1st capture group now + foundConditionTypes.add(conditionType); + if (!VALID_CONDITION_TYPES.has(conditionType)) { + typeMismatches.push({ + value: conditionType, + line: lineNum, + validTypes: Array.from(VALID_CONDITION_TYPES) + }); + } + } + + // ========== CONDITION VALUES ========== + + // Check value tables + for (const [table, pattern] of Object.entries(patterns)) { + const match = line.match(pattern); + if (match) { + const value = match[1]; + const type = TABLE_TO_TYPE[table]; + foundValues[type].add(value); + + if (!VALID_VALUES[type].has(value)) { + mismatches.push({ + table, + type, + value, + line: lineNum, + validValues: Array.from(VALID_VALUES[type]) + }); + } + } + } + } + + // ========== POST-LOOP VALIDATION ========== + + // Check for missing patterns + for (const ref of referencedPatterns) { + if (!definedPatterns.has(ref.patternName)) { + patternMismatches.push(ref); + } + } + + // Check for missing languages + for (const ref of referencedLanguages) { + if (!definedLanguages.has(ref.languageName)) { + languageMismatches.push(ref); + } + } + + // Check for conditions without corresponding data + for (const [key, def] of conditionDefinitions) { + const expectedTable = CONDITION_TYPE_TO_TABLE[def.type]; + if (expectedTable && !conditionDataEntries.has(key)) { + conditionDataMismatches.push({ + formatName: def.formatName, + conditionName: def.conditionName, + conditionType: def.type, + expectedTable, + line: def.line + }); + } + } + + return { + mismatches, + typeMismatches, + patternMismatches, + languageMismatches, + fkMismatches, + conditionDataMismatches, + foundValues, + foundConditionTypes, + definedPatterns, + definedLanguages, + definedCustomFormats, + definedTags, + referencedPatterns, + conditionDefinitions + }; +} + +// Find all SQL files in a database directory (including deps) +async function findAllSqlFiles(dbPath: string): Promise { + const files: string[] = []; + + // Get the database directory (parent of ops/) + const dbDir = dbPath.replace(/\/ops\/.*$/, ''); + + // Add deps schema files first (they define base entities like languages) + const depsDir = `${dbDir}/deps/schema/ops`; + try { + for await (const entry of Deno.readDir(depsDir)) { + if (entry.isFile && entry.name.endsWith('.sql')) { + files.push(`${depsDir}/${entry.name}`); + } + } + } catch { + // deps dir might not exist + } + + // Add the main file + files.push(dbPath); + + return files.sort(); +} + +async function main() { + const args = Deno.args; + if (args.length === 0) { + console.log('Usage: deno run --allow-read scripts/validate-condition-values.ts '); + console.log(''); + console.log('Example:'); + console.log( + ' deno run --allow-read scripts/validate-condition-values.ts dist/dev/data/databases/*/ops/1.initial.sql' + ); + console.log(''); + console.log('Note: The script automatically includes deps/schema/ops/*.sql files'); + Deno.exit(1); + } + + for (const filePath of args) { + console.log(`\n${'='.repeat(60)}`); + console.log(`Validating: ${filePath}`); + console.log('='.repeat(60)); + + try { + // Find all SQL files including deps + const allFiles = await findAllSqlFiles(filePath); + console.log(`Including ${allFiles.length} SQL file(s):`); + for (const f of allFiles) { + console.log(` - ${f.split('/').slice(-3).join('/')}`); + } + + // Concatenate all SQL content + let combinedContent = ''; + for (const f of allFiles) { + combinedContent += await Deno.readTextFile(f) + '\n'; + } + + const { + mismatches, + typeMismatches, + patternMismatches, + languageMismatches, + conditionDataMismatches, + foundValues, + foundConditionTypes, + definedPatterns, + definedLanguages, + definedCustomFormats, + referencedPatterns, + conditionDefinitions + } = await validateSqlContent(combinedContent); + + let hasErrors = false; + + // Report condition type mismatches + if (typeMismatches.length > 0) { + hasErrors = true; + console.log(`\n❌ Found ${typeMismatches.length} invalid condition type(s):`); + for (const m of typeMismatches) { + console.log(` Line ${m.line}: "${m.value}"`); + console.log(` Valid types: ${m.validTypes.join(', ')}`); + } + } + + // Report value mismatches + if (mismatches.length > 0) { + hasErrors = true; + console.log(`\n❌ Found ${mismatches.length} invalid value(s):`); + for (const m of mismatches) { + console.log(` Line ${m.line}: ${m.type} = "${m.value}"`); + console.log(` Valid values: ${m.validValues.join(', ')}`); + } + } + + // Report missing patterns + if (patternMismatches.length > 0) { + hasErrors = true; + console.log(`\n❌ Found ${patternMismatches.length} missing pattern(s):`); + for (const m of patternMismatches) { + console.log(` Line ${m.line}: Format "${m.formatName}" condition "${m.conditionName}"`); + console.log(` References missing pattern: "${m.patternName}"`); + } + } + + // Report missing languages + if (languageMismatches.length > 0) { + hasErrors = true; + console.log(`\n❌ Found ${languageMismatches.length} missing language(s):`); + for (const m of languageMismatches) { + console.log(` Line ${m.line}: Format "${m.formatName}" condition "${m.conditionName}"`); + console.log(` References missing language: "${m.languageName}"`); + } + } + + // Report conditions without data + if (conditionDataMismatches.length > 0) { + hasErrors = true; + console.log(`\n❌ Found ${conditionDataMismatches.length} condition(s) without data:`); + for (const m of conditionDataMismatches) { + console.log(` Line ${m.line}: Format "${m.formatName}" condition "${m.conditionName}"`); + console.log(` Type "${m.conditionType}" requires data in "${m.expectedTable}"`); + } + } + + if (!hasErrors) { + console.log('\n✅ All validations passed!'); + } + + // Summary + console.log('\n--- Summary ---'); + console.log(`Custom formats: ${definedCustomFormats.size}`); + console.log(`Conditions: ${conditionDefinitions.size}`); + console.log(`Patterns: ${definedPatterns.size} defined, ${referencedPatterns.length} referenced`); + console.log(`Languages: ${definedLanguages.size}`); + console.log(`Condition types: ${Array.from(foundConditionTypes).sort().join(', ')}`); + + // Detailed values found + console.log('\n--- Condition values used ---'); + for (const [type, values] of Object.entries(foundValues)) { + if (values.size > 0) { + console.log(`${type}: ${Array.from(values).sort().join(', ')}`); + } + } + } catch (error) { + console.error(`Error reading file: ${error}`); + } + } +} + +main(); diff --git a/src/lib/server/pcd/cache.ts b/src/lib/server/pcd/cache.ts index eddaaf1..ad194df 100644 --- a/src/lib/server/pcd/cache.ts +++ b/src/lib/server/pcd/cache.ts @@ -225,6 +225,72 @@ export class PCDCache { return this.db!.prepare(sql).get(...params) as T | undefined; } + + /** + * Validate SQL statements by doing a dry-run in a transaction + * Returns null if valid, or an error message if invalid + * + * This is a safety check before writing operations to files. + * It catches FK violations, constraint errors, etc. + */ + validateSql(sqlStatements: string[]): { valid: boolean; error?: string } { + if (!this.isBuilt()) { + return { valid: false, error: 'Cache not built' }; + } + + try { + // Start a savepoint (nested transaction) + this.db!.exec('SAVEPOINT validation_check'); + + try { + // Try to execute each statement + for (const sql of sqlStatements) { + this.db!.exec(sql); + } + + // All statements executed successfully + return { valid: true }; + } finally { + // Always rollback - this is just a validation check + this.db!.exec('ROLLBACK TO SAVEPOINT validation_check'); + this.db!.exec('RELEASE SAVEPOINT validation_check'); + } + } catch (error) { + // Parse the error to provide a helpful message + const errorStr = String(error); + + // Common SQLite constraint errors + if (errorStr.includes('FOREIGN KEY constraint failed')) { + return { + valid: false, + error: `Foreign key constraint failed - referenced entity does not exist. ${errorStr}` + }; + } + if (errorStr.includes('UNIQUE constraint failed')) { + return { + valid: false, + error: `Unique constraint failed - duplicate entry. ${errorStr}` + }; + } + if (errorStr.includes('NOT NULL constraint failed')) { + return { + valid: false, + error: `Required field is missing. ${errorStr}` + }; + } + if (errorStr.includes('CHECK constraint failed')) { + return { + valid: false, + error: `Value validation failed. ${errorStr}` + }; + } + + return { + valid: false, + error: `Database validation failed: ${errorStr}` + }; + } + } } // ============================================================================ diff --git a/src/lib/server/pcd/writer.ts b/src/lib/server/pcd/writer.ts index 9903b0a..a9d2d64 100644 --- a/src/lib/server/pcd/writer.ts +++ b/src/lib/server/pcd/writer.ts @@ -6,7 +6,7 @@ import type { CompiledQuery } from 'kysely'; import { getBaseOpsPath, getUserOpsPath } from './ops.ts'; import { databaseInstancesQueries } from '$db/queries/databaseInstances.ts'; import { logger } from '$logger/logger.ts'; -import { compile } from './cache.ts'; +import { compile, getCache } from './cache.ts'; import { isFileUncommitted } from '$utils/git/status.ts'; export type OperationLayer = 'base' | 'user'; @@ -280,6 +280,45 @@ export async function writeOperation(options: WriteOptions): Promise