Refactor SkipDetector category descriptions for clarity and conciseness, enhancing semantic classification accuracy

This commit is contained in:
mtayfur
2025-10-05 21:21:46 +03:00
parent c98809f807
commit 826cd6abf7

View File

@@ -343,116 +343,89 @@ class SkipDetector:
"""Semantic-based content classifier using zero-shot classification with category descriptions."""
TECHNICAL_CATEGORY_DESCRIPTIONS = [
"programming code with function definitions class declarations variable assignments import statements or syntax",
"error messages with stack traces exception types file paths line numbers or codes without personal context",
"system logs with timestamps severity levels status indicators like ERROR INFO WARN DEBUG",
"shell terminal commands with dollar signs or sudo git npm docker apt-get pip curl",
"programming code with function definitions class declarations variable assignments import statements syntax",
"error messages stack traces exception types file paths line numbers codes NullPointerException SegmentationFault RuntimeError without personal context",
"system logs with timestamps severity levels ERROR INFO WARN DEBUG entries without my application",
"shell terminal commands with dollar signs sudo git npm docker apt-get pip curl jq grep sed awk pipe operators",
"structured data in JSON XML YAML CSV format with nested objects arrays brackets key-value pairs",
"technical documentation describing APIs configurations file formats system specifications protocols",
"formatted tables lists outputs with multiple rows columns headers structured entries",
"debugging output diagnostic information performance metrics system status build reports",
"impersonal technical question about how technology works programming concepts algorithms data structures protocols without my job project career",
"abstract question about technical comparisons theoretical computer science concepts without I am learning or personal development",
"programming algorithm implementation with complexity analysis O notation time space complexity algorithmic approach without personal project",
"technical explanation about software architecture design patterns microservices authentication systems backend without my work job",
"API endpoints HTTP methods request response formats REST GraphQL WebSocket protocols without I am building or personal application",
"code functionality behavior logic flow with technical terms without I am struggling I have trouble career anxiety",
"Windows file paths with backslashes C colon backslash Program Files drive letters",
"impersonal technical question about technology programming concepts algorithms data structures protocols without my job project career learning",
"programming algorithm implementation with complexity analysis O notation time space complexity without personal project",
"technical explanation about software architecture design patterns microservices authentication systems without my work job",
"API endpoints HTTP methods request response formats REST GraphQL WebSocket cURL syntax without I am building my application",
"code functionality behavior logic flow with technical terms without I am struggling trouble career anxiety",
"Windows file paths with backslashes C colon Program Files drive letters",
"deployment scripts configuration files with multiple technical components paths URLs commands without personal story",
"error stack trace with file paths line numbers exception names NullPointerException SegmentationFault RuntimeError without personal debugging story",
"system error messages with codes status codes HTTP codes 404 500 Connection refused Timeout Exception without personal context",
"application logs with INFO WARN ERROR DEBUG levels timestamps entries without my application system",
"API query syntax GraphQL REST endpoint cURL command HTTP request without I am building my API",
"technical error output compilation errors runtime exceptions segmentation faults core dumps without personal project",
"code snippet with syntax highlighting language markers python javascript java without personal implementation story",
"React JSX component with angle brackets curly braces className props const Component equals arrow function return JSX",
"HTTP error status codes 404 Not Found 500 Internal Server Error 403 Forbidden with status messages",
"GraphQL query syntax with query mutation fragment type definitions curly brace field arguments",
"debug logs with DEBUG prefix timestamp log level output without personal application debugging story",
"command line with pipe operators curl pipe jq grep pipe sed awk without personal story",
"stack trace with arrow notation function1 arrow function2 arrow function3 call sequence without debugging my code",
"DEBUG log with request headers response details authorization content-type without my application debugging",
"React JSX component with angle brackets curly braces className props const Component arrow function return",
"HTTP error status codes 404 500 403 Not Found Internal Server Error Forbidden Connection refused Timeout",
"GraphQL query mutation fragment type definitions curly brace field arguments",
]
META_CONVERSATION_CATEGORY_DESCRIPTIONS = [
"acknowledgment response like thanks got it I understand makes sense helpful appreciate it",
"polite courtesy phrase like please excuse me sorry to bother you hope you are well no worries all good",
"agreement confirmation like yes correct absolutely I agree exactly right indeed totally",
"polite courtesy phrase like please excuse me sorry to bother hope you are well no worries all good",
"agreement confirmation like yes correct absolutely I agree exactly right indeed totally completely",
"farewell closing like goodbye see you later talk soon have a good day take care bye",
"extended thanks like thank you so much appreciate your help grateful for assistance thanks again",
"clarification about previous messages like sorry for confusion let me clarify what I meant I should have been more specific apologize for unclear question",
"feedback about conversation like that was helpful your explanation was clear I appreciate the detailed response exceeded expectations",
"meta discussion about asking questions like I will try to be more specific next time sorry for vague question I should provide more context",
"simple agreement like absolutely exactly totally yes indeed right completely agree",
"brief acknowledgment like got it understood makes sense I see okay cool sounds good",
"simple strong agreement like absolutely agree totally agree exactly right yes indeed completely right",
"brief positive acknowledgment like I hope so fingers crossed hopefully that works hope it helps",
"extended thanks with multiple points like thank you for A B C appreciate detailed help grateful for assistance",
"extended thanks with multiple points like thank you for detailed help grateful for assistance appreciate",
"clarification about previous messages like sorry for confusion let me clarify what I meant should have been more specific",
"feedback about conversation like that was helpful your explanation was clear exceeded expectations",
"meta discussion about asking questions like I will try to be more specific next time sorry for vague question",
"brief positive acknowledgment response like I hope so fingers crossed hopefully that works",
"brief acknowledgment like got it understood okay cool sounds good I see",
]
FACTUAL_QUERY_CATEGORY_DESCRIPTIONS = [
"definition question asking what is something what does term mean explain concept define word without personal context",
"factual information request about dates events history geography science facts trivia without personal relevance application learning goal",
"general how-to question asking for instructions steps process recipe procedure without personal needs circumstances projects",
"theoretical explanation request about why how things work in general abstract concepts principles without personal application career",
"comparison question asking differences between options technologies concepts products services without personal preference situation decision job requirement",
"who what when where question about historical figures famous people events discoveries inventions without personal connection story",
"multiple questions about same topic with numbered list several parts complex multi-part query with first second third bullet points",
"academic theoretical question with multiple sub-questions breaking down topic into components analyzing from different angles without personal project work",
"scientific explanation request about natural phenomena physics chemistry biology astronomy without personal research study career",
"general knowledge query about capitals countries populations currencies geography facts without travel plans personal interest job relevance",
"abstract technology comparison like difference between Python and Java without I am choosing or I work with",
"theoretical programming question about best practices clean code principles without my project codebase work situation",
"what is definition question like what is photosynthesis blockchain what does term mean explain concept",
"when did historical question like when did event happen when was invention when did person live",
"how to general instruction like how to tie tie change tire wash clothes without personal need",
"explain how works question like explain how blockchain works neural networks quantum mechanics without personal learning goal",
"abstract comparison without personal choice like difference between capitalism socialism what is better Python or Java",
"short when question about past events dates like when did happen invention discovery",
"factual information request about dates events history geography science facts trivia without personal relevance application",
"general how-to question asking for instructions steps process recipe procedure without personal needs circumstances",
"theoretical explanation request about why how things work abstract concepts principles without personal application career",
"comparison question asking differences between options technologies concepts products services without personal preference situation decision",
"who what when where question about historical figures famous people events discoveries inventions without personal connection",
"multiple questions about same topic with numbered list several parts complex multi-part query with bullet points",
"academic theoretical question with multiple sub-questions breaking down topic into components without personal project work",
"scientific explanation request about natural phenomena physics chemistry biology astronomy without personal research study",
"general knowledge query about capitals countries populations currencies geography facts without travel plans personal interest",
"abstract technology comparison like difference between Python Java without I am choosing or I work with",
"theoretical programming question about best practices clean code principles without my project codebase work",
"explain how works question like explain blockchain neural networks quantum mechanics without personal learning goal",
]
OUTPUT_FORMATTING_CATEGORY_DESCRIPTIONS = [
"instruction to format output as JSON YAML CSV table list markdown code block or specific data structure",
"request to adjust response style length like make it shorter longer simpler more detailed use bullet points numbered list",
"command to rewrite rephrase translate summarize previous response output or answer",
"request to change tone presentation like be more formal casual technical professional explain like I am five years old",
"tone adjustment like use professional tone formal language business corporate style",
"instruction to format output as JSON YAML CSV table list markdown code block specific data structure",
"request to adjust response style length like make it shorter longer simpler more detailed bullet points numbered list",
"command to rewrite rephrase translate summarize previous response output",
"request to change tone presentation like be more formal casual technical professional explain like I am five",
]
PURE_MATH_CALCULATION_CATEGORY_DESCRIPTIONS = [
"arithmetic calculation with explicit numbers like calculate 15 percent of 250 or solve 45 times 67",
"mathematical expression evaluation with operators like 2 plus 3 times 4 divided by 5 minus 6 or what is 123 times 456",
"unit conversion with specific values like convert 100 kilometers to miles or 72 fahrenheit to celsius",
"percentage calculation with explicit numbers like what is 25 percent of 800 or discount price of 120 minus 30 percent",
"arithmetic calculation with explicit numbers like calculate 15 percent of 250 solve 45 times 67",
"mathematical expression evaluation with operators like 2 plus 3 times 4 divided by 5 what is 123 times 456",
"unit conversion with specific values like convert 100 kilometers to miles 72 fahrenheit to celsius",
"percentage calculation with explicit numbers like what is 25 percent of 800 discount price",
"algebra equation solving with explicit numbers like solve for x in equation 2x plus 5 equals 15",
"geometry calculation with specific measurements like area of circle radius 5 or volume of cube side 10",
"numerical computation like square root of 144 or 15 plus 23 minus 8 times 2",
"geometry calculation with specific measurements like area of circle radius 5 volume of cube side 10",
]
EXPLICIT_TRANSLATION_CATEGORY_DESCRIPTIONS = [
"translation instruction with text to translate like translate this to Spanish colon Hello how are you in quotes or brackets",
"translation request with provided phrase like how do you say quoted phrase good morning in French with explicit text",
"language conversion with text block like convert this English text to Japanese here is the text followed by content",
"translation instruction with text to translate like translate this to Spanish Hello how are you in quotes or brackets",
"translation request with provided phrase like how do you say good morning in French with explicit text",
"language conversion with text block like convert this English text to Japanese followed by content",
"phrase translation with quoted or bracketed text like translate I am hungry to Mandarin with explicit phrase",
"sentence translation with actual text like what is Spanish translation of quoted sentence or how to say specific phrase in Italian",
"text conversion with source content like translate following paragraph to Portuguese colon followed by actual text",
"translation with colon separator like Translate to German colon followed by English sentence or text to convert",
"how do you say question with specific word or phrase like how do you say computer in Russian or what is hello in French",
"translate to language instruction with target language like translate to Italian Japanese Portuguese",
"sentence translation with actual text like what is Spanish translation of sentence how to say specific phrase in Italian",
"translation with colon separator like Translate to German colon followed by English sentence or text",
"language translation with explicit source text in quotes brackets or after colon separator",
]
GRAMMAR_PROOFREADING_CATEGORY_DESCRIPTIONS = [
"proofreading request with incorrect text like fix grammar in this text here is my draft check for typos in quoted text",
"grammar correction with specific wrong text or sentence like She don't like",
"proofreading request with incorrect text like fix grammar in this here is my draft check for typos in quoted text",
"grammar correction with specific wrong text or sentence like She don't like Their going too the store",
"spelling punctuation check with specific text to review and fix errors in provided passage",
"copy editing with text like proofread this paragraph correct errors in sentence fix mistakes in text block",
"error correction like check this text for mistakes or review this sentence for grammar problems with text included",
"correction with misspelled or grammatically incorrect text like Their going too the store or Me and him went",
"typo fixing with text containing errors like Teh quick brown fox or check spelling in this paragraph",
"sentence correction with wrong grammar like fix this I has three book or correct the punctuation in this text",
"check grammar instruction with text to review like check grammar in this or correct grammar in following sentence",
"proofreading with specific errors like multiple typos spelling mistakes punctuation errors in provided text passage",
"copy editing with text like proofread this paragraph correct errors fix mistakes in text block",
"error correction like check this text for mistakes review sentence for grammar problems with text included",
"typo fixing with text containing errors like Teh quick brown fox check spelling in this paragraph",
"sentence correction with wrong grammar like fix this I has three book correct the punctuation",
]
CONVERSATIONAL_CATEGORY_DESCRIPTIONS = [