♻️ Skip detection now binary (#4)

2026-01-22 06:51:01 +01:00 · 2025-10-28 02:35:29 +03:00
parent e6e3f7ab99
commit 551b0c571b
1 changed files with 196 additions and 181 deletions
--- a/memory_system.py
+++ b/memory_system.py
@@ -54,17 +54,15 @@ class Constants:
    LLM_RERANKING_TRIGGER_MULTIPLIER = 0.8  # Multiplier for LLM reranking trigger threshold
    # Skip Detection
-    SKIP_CATEGORY_MARGIN = 0.5  # Margin above conversational similarity for skip category classification
+    SKIP_CATEGORY_MARGIN = 0.20  # Margin above personal similarity for skip category classification
    DEDUPLICATION_SIMILARITY_THRESHOLD = 0.90  # Similarity threshold for deduplication checks
    # Safety & Operations
    MAX_DELETE_OPERATIONS_RATIO = 0.6  # Maximum delete operations ratio for safety
    MIN_OPS_FOR_DELETE_RATIO_CHECK = 6  # Minimum operations to apply ratio check
    # Content Display
-    CONTENT_PREVIEW_LENGTH = 80  # Maximum length for content preview display
+    CONTENT_PREVIEW_LENGTH = 100  # Maximum length for content preview display
    # Default Models
    DEFAULT_LLM_MODEL = "google/gemini-2.5-flash-lite"
 class Prompts:
@@ -73,7 +71,7 @@ class Prompts:
    MEMORY_CONSOLIDATION = f"""You are the Memory System Consolidator, a specialist in creating precise user memories.
 ## OBJECTIVE
-Build precise memories of the user's personal narrative with factual, temporal statements.
+Your goal is to build precise memories of the user's personal narrative with factual, temporal statements.
 ## AVAILABLE OPERATIONS
 - CREATE: For new, personal facts. Must be semantically and temporally enhanced.
@@ -83,6 +81,11 @@ Build precise memories of the user's personal narrative with factual, temporal s
 ## PROCESSING GUIDELINES
 - Personal Facts Only: Store only significant facts with lasting relevance to the user's life and identity. Exclude transient situations, questions, general knowledge, casual mentions, or momentary states.
 - **Filter for Intent:** You MUST SKIP if the user's primary intent is instructional, technical, or analytical, even if the message contains personal details. This includes requests to:
    - Rewrite, revise, translate, or proofread a block of text (e.g., "revise this review for me").
    - Answer a general knowledge, math, or technical question.
    - Explain a concept, perform a calculation, or act as a persona.
  **Only store facts when the user is *directly stating* them as part of a personal narrative, not when providing them as content for a task.**
 - Maintain Temporal Accuracy:
    - Capture Dates: Record temporal information when explicitly stated or clearly derivable. Convert relative references (last month, yesterday) to specific dates.
    - Preserve History: Transform superseded facts into past-tense statements with defined time boundaries.
@@ -93,14 +96,13 @@ Build precise memories of the user's personal narrative with factual, temporal s
    - Retroactive Enrichment: If a name is provided for prior entity, UPDATE only if substantially valuable.
 - Ensure Memory Quality:
    - High Bar for Creation: Only CREATE memories for significant life facts, relationships, events, or core personal attributes. Skip trivial details or passing interests.
    - Contextual Completeness: Combine related information into cohesive statements. Group connected facts (same topic, person, event, or timeframe) into single memories rather than fragmenting. Include supporting details while respecting boundaries. Only combine directly related facts. Avoid bare statements and never merge unrelated information.
    - Mandatory Semantic Enhancement: Enhance entities with descriptive categorical nouns for better retrieval.
    - Verify Nouns/Pronouns: Link pronouns (he, she, they) and nouns to specific entities.
    - First-Person Format: Write all memories in English from the user's perspective.
 ## DECISION FRAMEWORK
- Selectivity: Verify the user is stating a direct, personally significant fact with lasting importance. If not, SKIP. Never create duplicate memories. Skip momentary events or casual mentions. Be conservative with CREATE and UPDATE operations.
+- Selectivity: Verify the user's *primary intent* is to state a direct, personally significant fact with lasting importance. If the intent is instructional, analytical, or a general question, SKIP. Never create duplicate memories. Skip momentary events or casual mentions. Be conservative with CREATE and UPDATE operations.
- Strategy: Strongly prioritize enriching existing memories over creating new ones. Analyze the message holistically to identify naturally connected facts that should be captured together. When facts share connections (same person, event, situation, or causal relationship), combine them into a unified memory that preserves the complete picture. Each memory should be self-contained and meaningful.
+- Strategy: Strongly prioritize enriching existing memories over creating new ones. Analyze the message holistically to identify naturally connected facts (same person, event, or timeframe) and combine them into a unified, cohesive memory rather than fragmenting them. Each memory must be self-contained and **never** merge unrelated information.
 - Execution: For new significant facts, use CREATE. For simple attribute changes, use UPDATE only if it meaningfully improves the memory. For significant changes, use UPDATE to make the old memory historical, then CREATE the new one. For contradictions, use DELETE.
 ## EXAMPLES (Assumes Current Date: September 15, 2025)
@@ -115,37 +117,37 @@ Explanation: Multiple facts about the same person (Sarah's active lifestyle, lov
 Message: "My daughter Emma just turned 12. We adopted a dog named Max for her 11th birthday. What should I give her for her 12th birthday?"
 Memories: [id:mem-002] My daughter Emma is 10 years old [noted at March 20 2024] [id:mem-101] I have a golden retriever [noted at September 20 2024]
 Return: {{"ops": [{{"operation": "UPDATE", "id": "mem-002", "content": "My daughter Emma turned 12 years old in September 2025"}}, {{"operation": "UPDATE", "id": "mem-101", "content": "I have a golden retriever named Max that was adopted in September 2024 as a birthday gift for my daughter Emma when she turned 11"}}]}}
-Explanation: Dog memory enriched with related context (Emma, birthday gift, age 11) and temporal anchoring (September 2024) - all semantically connected to the same event and relationship.
+Explanation: Dog memory enriched with related context (Emma, birthday gift, age 11) and temporal anchoring (September 2024). The instructional question ("What should I give her...?") is ignored as per the 'Filter for Intent' rule.
 ### Example 3
 Message: "Can you recommend some good tapas restaurants in Barcelona? I moved here from Madrid last month."
 Memories: [id:mem-005] I live in Madrid Spain [noted at June 12 2025]
 Return: {{"ops": [{{"operation": "UPDATE", "id": "mem-005", "content": "I lived in Madrid Spain until August 2025"}}, {{"operation": "CREATE", "id": "", "content": "I moved to Barcelona Spain in August 2025"}}]}}
-Explanation: Relocation is a significant life event with lasting impact. "Exploring the city" and "adjusting" are transient states and excluded.
+Explanation: Relocation is a significant life event. The request for recommendations is instructional and is ignored.
 ### Example 4
 Message: "My wife Sofia and I just got married in August. What are some good honeymoon destinations?"
 Memories: [id:mem-008] I am single [noted at January 5 2025]
 Return: {{"ops": [{{"operation": "DELETE", "id": "mem-008", "content": ""}}, {{"operation": "CREATE", "id": "", "content": "I married Sofia in August 2025 and she is now my wife"}}]}}
-Explanation: Marriage is an enduring life event. Wife's name and marriage date are lasting facts combined naturally. "Planning honeymoon" is a transient activity and excluded.
+Explanation: Marriage is an enduring life event. The instructional question ("What are some good honeymoon destinations?") is ignored.
 ### Example 5
 Message: "¡Hola! Me mudé de Madrid a Barcelona el mes pasado y me casé con mi novia Sofía en agosto. ¿Me puedes recomendar un buen restaurante para celebrar?"
 Memories: [id:mem-005] I live in Madrid Spain [noted at June 12 2025] [id:mem-006] I am dating Sofia [noted at February 10 2025] [id:mem-008] I am single [noted at January 5 2025]
 Return: {{"ops": [{{"operation": "UPDATE", "id": "mem-005", "content": "I lived in Madrid Spain until August 2025"}}, {{"operation": "DELETE", "id": "mem-008", "content": ""}}, {{"operation": "UPDATE", "id": "mem-006", "content": "I moved to Barcelona Spain and married my girlfriend Sofia in August 2025, who is now my wife"}}]}}
-Explanation: The user's move and marriage are significant, related life events that occurred in the same month. They are consolidated into a single, cohesive memory that enriches the existing relationship context.
+Explanation: The user's move and marriage are significant, related life events. They are consolidated into a single memory. The request for a recommendation is ignored.
 ### Example 6
 Message: "I'm feeling stressed about work this week and looking for some relaxation tips. I have a big presentation coming up on Friday."
 Memories: []
 Return: {{"ops": []}}
-Explanation: Temporary stress, seeking tips, and upcoming presentation are all transient situations without lasting personal significance. Nothing to store.
+Explanation: Transient state (stress) and a request for information (relaxation tips). The primary intent is instructional/analytical, and the facts (presentation) are not significant, lasting personal narrative. Nothing to store.
 """
    MEMORY_RERANKING = f"""You are the Memory Relevance Analyzer.
 ## OBJECTIVE
-Select relevant memories to personalize the response, prioritizing direct connections and supporting context.
+Your goal is to analyze the user's message and select the most relevant memories to personalize the AI's response. Prioritize direct connections and supporting context.
 ## RELEVANCE CATEGORIES
 - Direct: Memories explicitly about the query topic, people, or domain.
@@ -154,9 +156,8 @@ Select relevant memories to personalize the response, prioritizing direct connec
 ## SELECTION FRAMEWORK
 - Prioritize Current Info: Give current facts higher relevance than historical ones unless the query is about the past or historical context directly informs the current situation.
- Hierarchy: Prioritize Direct → Contextual → Background.
+- Hierarchy: Prioritize topic matches first (Direct), then context that enhances the response (Contextual), and finally general background (Background).
 - Ordering: Order IDs by relevance, most relevant first.
 - Standard: Prioritize topic matches, then context that enhances the response.
 - Maximum Limit: Return up to {Constants.MAX_MEMORIES_PER_RETRIEVAL} memory IDs.
 ## EXAMPLES (Assumes Current Date: September 15, 2025)
@@ -327,9 +328,9 @@ class UnifiedCacheManager:
 class SkipDetector:
-    """Semantic-based content classifier using zero-shot classification with category descriptions."""
+    """Binary content classifier: personal vs non-personal using semantic analysis."""
-    TECHNICAL_CATEGORY_DESCRIPTIONS = [
+    NON_PERSONAL_CATEGORY_DESCRIPTIONS = [
        "programming language syntax, data types like string or integer, algorithm logic, function, method, programming class, object-oriented paradigm, variable scope, control flow, import, module, package, library, framework, recursion, iteration",
        "software design patterns, creational: singleton, factory, builder; structural: adapter, decorator, facade, proxy; behavioral: observer, strategy, command, mediator, chain of responsibility; abstract interface, polymorphism, composition",
        "error handling, exception, stack trace, TypeError, NullPointerException, IndexError, segmentation fault, core dump, stack overflow, runtime vs compile-time error, assertion failed, syntax error, null pointer dereference, memory leak, bug",
@@ -350,9 +351,6 @@ class SkipDetector:
        "regex pattern, regular expression matching, groups, capturing, backslash escapes, metacharacters, wildcards, quantifiers, character classes, lookaheads, lookbehinds, alternation, anchors, word boundary, multiline flag, global search",
        "software testing, unit test, assertion, mock, stub, fixture, test suite, test case, verification, automated QA, validation framework, JUnit, pytest, Jest. Integration, end-to-end (E2E), functional, regression, acceptance testing",
        "cloud computing platforms, infrastructure as a service (IaaS), PaaS, AWS, Azure, GCP, compute instance, region, availability zone, elasticity, distributed system, virtual machine, container, serverless, Lambda, edge computing, CDN",
    ]
    INSTRUCTION_CATEGORY_DESCRIPTIONS = [
        "format the output as structured data. Return the answer as JSON with specific keys and values, or as YAML. Organize information into a CSV file or a database-style table with columns and rows. Present as a list of objects or an array.",
        "style the text presentation. Use markdown formatting like bullet points, a numbered list, or a task list. Organize content into a grid or tabular layout with proper alignment. Create a hierarchical structure with nested elements for clarity.",
        "adjust the response length. Make the answer shorter, more concise, brief, or condensed. Summarize the key points. Trim down the text to reduce the overall word count or meet a specific character limit. Be less verbose and more direct.",
@@ -363,9 +361,6 @@ class SkipDetector:
        "continue the generated response. Keep going with the explanation or list. Provide more information and finish your thought. Complete the rest of the content or story. Proceed with the next steps. Do not stop until you have concluded.",
        "act as a specific persona or role. Respond as if you were a pirate, a scientist, or a travel guide. Adopt the character's voice, style, and knowledge base in your answer. Maintain the persona throughout the entire response.",
        "compare and contrast two or more topics. Explain the similarities and differences between A and B. Provide a detailed analysis of what they have in common and how they diverge. Create a table to highlight the key distinctions.",
    ]
    PURE_MATH_CALCULATION_CATEGORY_DESCRIPTIONS = [
        "perform a pure arithmetic calculation with explicit numbers. Solve, multiply, add, subtract, and divide. Compute a numeric expression following the order of operations (PEMDAS/BODMAS). What is 23 plus 456 minus 78 times 9 divided by 3?",
        "evaluate a mathematical expression containing numbers and operators, such as 2 plus 3 times 4 divided by 5. Solve this numerical problem and compute the final result. Simplify the arithmetic and show the final answer. Calculate 123 * 456.",
        "convert units between measurement systems with numeric values. Convert 100 kilometers to miles, 72 fahrenheit to celsius, or 5 feet 9 inches to centimeters. Change between metric and imperial for distance, weight, volume, or temperature.",
@@ -376,9 +371,6 @@ class SkipDetector:
        "compute descriptive statistics for a dataset of numbers like 12, 15, 18, 20, 22. Calculate the mean, median, mode, average, and standard deviation. Find the variance, range, quartiles, and percentiles for a given sample distribution.",
        "calculate health and fitness metrics using a numeric formula. Compute the Body Mass Index (BMI) given a weight in pounds or kilograms and height in feet, inches, or meters. Find my basal metabolic rate (BMR) or target heart rate.",
        "calculate the time difference between two dates. How many days, hours, or minutes are between two points in time? Find the duration or elapsed time. Act as an age calculator for a birthday or find the time until a future anniversary.",
    ]
    EXPLICIT_TRANSLATION_CATEGORY_DESCRIPTIONS = [
        "translate the explicitly quoted text 'Hello, how are you?' to a foreign language like Spanish, French, or German. This is a translation instruction that includes the word 'translate' and the source text in quotes for direct conversion.",
        "how do you say a specific word or phrase in another language? For example, how do you say 'thank you', 'computer', or 'goodbye' in Japanese, Chinese, or Korean? This is a request for a direct translation of a common expression or term.",
        "convert a block of text or a paragraph from a source language to a target language. Translate the following content to Italian, Arabic, Portuguese, or Russian. This is a language conversion request for a larger piece of text provided.",
@@ -389,9 +381,6 @@ class SkipDetector:
        "how do I say 'I am learning to code' in German? Convert this specific English phrase into its equivalent in another language. This is a request for a practical, conversational phrase translation for personal or professional use.",
        "translate this informal or slang expression to its colloquial equivalent in Spanish. How would you say 'What's up?' in Japanese in a casual context? This request focuses on capturing the correct tone and nuance of informal language.",
        "provide the formal and professional translation for 'Please find the attached document for your review' in French. Translate this business email phrase to German, ensuring the terminology and register are appropriate for a corporate context.",
    ]
    GRAMMAR_PROOFREADING_CATEGORY_DESCRIPTIONS = [
        "proofread the following text for errors. Here is my draft, please check it for typos and mistakes: 'Teh quick brown fox jumpped'. Review, revise, and correct any misspellings or grammatical issues you find in the provided passage.",
        "correct the grammar in this sentence: 'She don't like it'. Resolve grammatical issues like subject-verb agreement, incorrect verb tense, pronoun reference errors, or misplaced modifiers in the provided text. Address faulty sentence structure.",
        "check the spelling and punctuation in this passage. Please review the following text and correct any textual errors: 'its a beautiful day, isnt it'. Amend mistakes with commas, periods, apostrophes, quotation marks, colons, or capitalization.",
@@ -404,7 +393,7 @@ class SkipDetector:
        "check my essay for conciseness and remove any redundancy. Help me edit this text to be more direct and to the point. Identify and eliminate wordiness, filler words, and repetitive phrases to strengthen the overall quality of the writing.",
    ]
-    CONVERSATIONAL_CATEGORY_DESCRIPTIONS = [
+    PERSONAL_CATEGORY_DESCRIPTIONS = [
        "discussing my family members, like my spouse, children, parents, or siblings. Mentioning relatives by name or role, such as my husband, wife, son, daughter, mother, or father. Sharing stories or asking questions about my family.",
        "expressing lasting personal feelings, core values, beliefs, or principles. My worldview, deeply held opinions, philosophy, or moral standards. Things I love, hate, or feel strongly about in life, such as my passion for animal welfare.",
        "describing my established personal hobbies, regular activities, or consistent interests. My passions and what I do in my leisure time, such as creative outlets like painting, sports like hiking, or other recreational pursuits I enjoy.",
@@ -434,25 +423,14 @@ class SkipDetector:
    class SkipReason(Enum):
        SKIP_SIZE = "SKIP_SIZE"
-        SKIP_TECHNICAL = "SKIP_TECHNICAL"
+        SKIP_NON_PERSONAL = "SKIP_NON_PERSONAL"
        SKIP_INSTRUCTION = "SKIP_INSTRUCTION"
        SKIP_PURE_MATH = "SKIP_PURE_MATH"
        SKIP_TRANSLATION = "SKIP_TRANSLATION"
        SKIP_GRAMMAR_PROOFREAD = "SKIP_GRAMMAR_PROOFREAD"
    STATUS_MESSAGES = {
        SkipReason.SKIP_SIZE: "📏 Message Length Out of Limits, skipping memory operations",
-        SkipReason.SKIP_TECHNICAL: "💻 Technical Content Detected, skipping memory operations",
+        SkipReason.SKIP_NON_PERSONAL: "🚫 Non-Personal Content Detected, skipping memory operations",
        SkipReason.SKIP_INSTRUCTION: "💬 Instruction Detected, skipping memory operations",
        SkipReason.SKIP_PURE_MATH: "🔢 Mathematical Calculation Detected, skipping memory operations",
        SkipReason.SKIP_TRANSLATION: "🌐 Translation Request Detected, skipping memory operations",
        SkipReason.SKIP_GRAMMAR_PROOFREAD: "📝 Grammar/Proofreading Request Detected, skipping memory operations",
    }
-    def __init__(
+    def __init__(self, embedding_function: Callable[[Union[str, List[str]]], Union[np.ndarray, List[np.ndarray]]]):
        self,
        embedding_function: Callable[[Union[str, List[str]]], Union[np.ndarray, List[np.ndarray]]],
    ):
        """Initialize the skip detector with an embedding function and compute reference embeddings."""
        self.embedding_function = embedding_function
        self._reference_embeddings = None
@@ -461,37 +439,16 @@ class SkipDetector:
    def _initialize_reference_embeddings(self) -> None:
        """Compute and cache embeddings for category descriptions."""
        try:
-            technical_embeddings = self.embedding_function(self.TECHNICAL_CATEGORY_DESCRIPTIONS)
+            non_personal_embeddings = self.embedding_function(self.NON_PERSONAL_CATEGORY_DESCRIPTIONS)
-
+            personal_embeddings = self.embedding_function(self.PERSONAL_CATEGORY_DESCRIPTIONS)
            instruction_embeddings = self.embedding_function(self.INSTRUCTION_CATEGORY_DESCRIPTIONS)
            pure_math_embeddings = self.embedding_function(self.PURE_MATH_CALCULATION_CATEGORY_DESCRIPTIONS)
            translation_embeddings = self.embedding_function(self.EXPLICIT_TRANSLATION_CATEGORY_DESCRIPTIONS)
            grammar_embeddings = self.embedding_function(self.GRAMMAR_PROOFREADING_CATEGORY_DESCRIPTIONS)
            conversational_embeddings = self.embedding_function(self.CONVERSATIONAL_CATEGORY_DESCRIPTIONS)
            self._reference_embeddings = {
-                "technical": np.array(technical_embeddings),
+                "non_personal": np.array(non_personal_embeddings),
-                "instruction": np.array(instruction_embeddings),
+                "personal": np.array(personal_embeddings),
                "pure_math": np.array(pure_math_embeddings),
                "translation": np.array(translation_embeddings),
                "grammar": np.array(grammar_embeddings),
                "conversational": np.array(conversational_embeddings),
            }
            total_skip_categories = (
                len(self.TECHNICAL_CATEGORY_DESCRIPTIONS)
                + len(self.INSTRUCTION_CATEGORY_DESCRIPTIONS)
                + len(self.PURE_MATH_CALCULATION_CATEGORY_DESCRIPTIONS)
                + len(self.EXPLICIT_TRANSLATION_CATEGORY_DESCRIPTIONS)
                + len(self.GRAMMAR_PROOFREADING_CATEGORY_DESCRIPTIONS)
            )
            logger.info(
-                f"SkipDetector initialized with {total_skip_categories} skip categories and {len(self.CONVERSATIONAL_CATEGORY_DESCRIPTIONS)} personal categories"
+                f"SkipDetector initialized with {len(self.NON_PERSONAL_CATEGORY_DESCRIPTIONS)} non-personal categories and {len(self.PERSONAL_CATEGORY_DESCRIPTIONS)} personal categories"
            )
        except Exception as e:
            logger.error(f"Failed to initialize SkipDetector reference embeddings: {e}")
@@ -506,27 +463,27 @@ class SkipDetector:
            return SkipDetector.SkipReason.SKIP_SIZE.value
        return None
-    def _fast_path_skip_detection(self, message: str) -> Optional[str]:
+    def _fast_path_skip_detection(self, message: str) -> Optional[bool]:
        """Language-agnostic structural pattern detection with high confidence and low false positive rate."""
        msg_len = len(message)
        # Pattern 1: Multiple URLs (5+ full URLs indicates link lists or technical references)
        url_pattern_count = message.count("http://") + message.count("https://")
        if url_pattern_count >= 5:
-            return self.SkipReason.SKIP_TECHNICAL.value
+            return True
        # Pattern 2: Long unbroken alphanumeric strings (tokens, hashes, base64)
        words = message.split()
        for word in words:
            cleaned = word.strip('.,;:!?()[]{}"\'"')
            if len(cleaned) > 80 and cleaned.replace("-", "").replace("_", "").isalnum():
-                return self.SkipReason.SKIP_TECHNICAL.value
+                return True
        # Pattern 3: Markdown/text separators (repeated ---, ===, ___, ***)
        separator_patterns = ["---", "===", "___", "***"]
        for pattern in separator_patterns:
            if message.count(pattern) >= 2:
-                return self.SkipReason.SKIP_TECHNICAL.value
+                return True
        # Pattern 4: Command-line patterns with context-aware detection
        lines_stripped = [line.strip() for line in message.split("\n") if line.strip()]
@@ -551,9 +508,9 @@ class SkipDetector:
                    pass
            if actual_command_lines >= 1 and any(c in message for c in ["http://", "https://", " | "]):
-                return self.SkipReason.SKIP_TECHNICAL.value
+                return True
            if actual_command_lines >= 3:
-                return self.SkipReason.SKIP_TECHNICAL.value
+                return True
        # Pattern 5: High path/URL density (dots and slashes suggesting file paths or URLs)
        if msg_len > 30:
@@ -561,16 +518,16 @@ class SkipDetector:
            dot_count = message.count(".")
            path_chars = slash_count + dot_count
            if path_chars > 10 and (path_chars / msg_len) > 0.15:
-                return self.SkipReason.SKIP_TECHNICAL.value
+                return True
        # Pattern 6: Markup character density (structured data)
        markup_chars = sum(message.count(c) for c in "{}[]<>")
        if markup_chars >= 6:
            if markup_chars / msg_len > 0.10:
-                return self.SkipReason.SKIP_TECHNICAL.value
+                return True
            curly_count = message.count("{") + message.count("}")
            if curly_count >= 10:
-                return self.SkipReason.SKIP_TECHNICAL.value
+                return True
        # Pattern 7: Structured nested content with colons (key: value patterns)
        line_count = message.count("\n")
@@ -582,7 +539,7 @@ class SkipDetector:
                indented_lines = sum(1 for line in non_empty_lines if line.startswith((" ", "\t")))
                if colon_lines / len(non_empty_lines) > 0.4 and indented_lines / len(non_empty_lines) > 0.5:
-                    return self.SkipReason.SKIP_TECHNICAL.value
+                    return True
        # Pattern 8: Highly structured multi-line content (require markup chars for technical confidence)
        if line_count > 15:
@@ -593,7 +550,7 @@ class SkipDetector:
                structured_lines = sum(1 for line in non_empty_lines if line.startswith((" ", "\t")))
                if markup_in_lines / len(non_empty_lines) > 0.3:
-                    return self.SkipReason.SKIP_TECHNICAL.value
+                    return True
                elif structured_lines / len(non_empty_lines) > 0.6:
                    technical_keywords = [
                        "function",
@@ -606,7 +563,7 @@ class SkipDetector:
                        "def",
                    ]
                    if any(keyword in message.lower() for keyword in technical_keywords):
-                        return self.SkipReason.SKIP_TECHNICAL.value
+                        return True
        # Pattern 9: Code-like indentation pattern (require code indicators to avoid false positives from bullet lists)
        if line_count >= 3:
@@ -628,7 +585,7 @@ class SkipDetector:
                        "private ",
                    ]
                    if any(indicator in message.lower() for indicator in code_indicators):
-                        return self.SkipReason.SKIP_TECHNICAL.value
+                        return True
        # Pattern 10: Very high special character ratio (encoded data, technical output)
        if msg_len > 50:
@@ -637,7 +594,7 @@ class SkipDetector:
            if special_ratio > 0.35:
                alphanumeric = sum(1 for c in message if c.isalnum())
                if alphanumeric / msg_len < 0.50:
-                    return self.SkipReason.SKIP_TECHNICAL.value
+                    return True
        return None
@@ -645,7 +602,7 @@ class SkipDetector:
        """
        Detect if a message should be skipped using two-stage detection:
        1. Fast-path structural patterns (~95% confidence)
-        2. Semantic classification (for remaining cases)
+        2. Binary semantic classification (personal vs non-personal)
        Returns:
            Skip reason string if content should be skipped, None otherwise
        """
@@ -655,8 +612,8 @@ class SkipDetector:
        fast_skip = self._fast_path_skip_detection(message)
        if fast_skip:
-            logger.info(f"Fast-path skip: {fast_skip}")
+            logger.info(f"Fast-path skip: {self.SkipReason.SKIP_NON_PERSONAL.value}")
-            return fast_skip
+            return self.SkipReason.SKIP_NON_PERSONAL.value
        if self._reference_embeddings is None:
            logger.warning("SkipDetector reference embeddings not initialized, allowing message through")
@@ -665,53 +622,19 @@ class SkipDetector:
        try:
            message_embedding = np.array(self.embedding_function([message.strip()])[0])
-            conversational_similarities = np.dot(message_embedding, self._reference_embeddings["conversational"].T)
+            personal_similarities = np.dot(message_embedding, self._reference_embeddings["personal"].T)
-            max_conversational_similarity = float(conversational_similarities.max())
+            max_personal_similarity = float(personal_similarities.max())
-            skip_categories = [
+            non_personal_similarities = np.dot(message_embedding, self._reference_embeddings["non_personal"].T)
-                (
+            max_non_personal_similarity = float(non_personal_similarities.max())
                    "instruction",
                    self.SkipReason.SKIP_INSTRUCTION,
                    self.INSTRUCTION_CATEGORY_DESCRIPTIONS,
                ),
                (
                    "translation",
                    self.SkipReason.SKIP_TRANSLATION,
                    self.EXPLICIT_TRANSLATION_CATEGORY_DESCRIPTIONS,
                ),
                (
                    "grammar",
                    self.SkipReason.SKIP_GRAMMAR_PROOFREAD,
                    self.GRAMMAR_PROOFREADING_CATEGORY_DESCRIPTIONS,
                ),
                (
                    "technical",
                    self.SkipReason.SKIP_TECHNICAL,
                    self.TECHNICAL_CATEGORY_DESCRIPTIONS,
                ),
                (
                    "pure_math",
                    self.SkipReason.SKIP_PURE_MATH,
                    self.PURE_MATH_CALCULATION_CATEGORY_DESCRIPTIONS,
                ),
            ]
-            qualifying_categories = []
+            if (max_non_personal_similarity - max_personal_similarity) > Constants.SKIP_CATEGORY_MARGIN:
            margin_threshold = max_conversational_similarity + Constants.SKIP_CATEGORY_MARGIN
            for cat_key, skip_reason, descriptions in skip_categories:
                similarities = np.dot(message_embedding, self._reference_embeddings[cat_key].T)
                max_similarity = float(similarities.max())
                if max_similarity > margin_threshold:
                    qualifying_categories.append((max_similarity, cat_key, skip_reason))
            if qualifying_categories:
                highest_similarity, highest_cat_key, highest_skip_reason = max(qualifying_categories, key=lambda x: x[0])
                logger.info(
-                    f"🚫 Skipping message: {highest_skip_reason.value} (sim {highest_similarity:.3f} > conv {max_conversational_similarity:.3f} + {Constants.SKIP_CATEGORY_MARGIN:.3f})"
+                    f"🚫 Skipping message: non-personal content detected "
                    f"(non-personal sim {max_non_personal_similarity:.3f} > "
                    f"personal sim {max_personal_similarity:.3f} + {Constants.SKIP_CATEGORY_MARGIN:.3f})"
                )
-                return highest_skip_reason.value
+                return self.SkipReason.SKIP_NON_PERSONAL.value
            return None
@@ -834,6 +757,34 @@ class LLMConsolidationService:
    def __init__(self, memory_system):
        self.memory_system = memory_system
    async def _check_semantic_duplicate(self, content: str, existing_memories: List, user_id: str) -> Optional[str]:
        """
        Check if content is semantically duplicate of existing memories using embeddings.
        Returns the ID of duplicate memory if found, None otherwise.
        """
        if not existing_memories:
            return None
        try:
            content_embedding = await self.memory_system._generate_embeddings(content, user_id)
            for memory in existing_memories:
                if not memory.content or len(memory.content.strip()) < Constants.MIN_MESSAGE_CHARS:
                    continue
                memory_embedding = await self.memory_system._generate_embeddings(memory.content, user_id)
                similarity = float(np.dot(content_embedding, memory_embedding))
                if similarity >= Constants.DEDUPLICATION_SIMILARITY_THRESHOLD:
                    logger.info(f"🔍 Semantic duplicate detected: similarity={similarity:.3f} with memory {memory.id}")
                    return str(memory.id)
            return None
        except Exception as e:
            logger.warning(f"⚠️ Semantic duplicate check failed: {str(e)}")
            return None
    def _filter_consolidation_candidates(self, similarities: List[Dict[str, Any]]) -> Tuple[List[Dict[str, Any]], str]:
        """Filter consolidation candidates by threshold and return candidates with threshold info."""
        consolidation_threshold = self.memory_system._get_retrieval_threshold(is_consolidation=True)
@@ -936,7 +887,31 @@ class LLMConsolidationService:
            )
            return []
-        valid_operations = [op.model_dump() for op in operations if op.validate_operation(existing_memory_ids)]
+        deduplicated_operations = []
        seen_contents = set()
        seen_update_ids = set()
        for op in operations:
            if not op.validate_operation(existing_memory_ids):
                continue
            if op.operation == Models.MemoryOperationType.UPDATE and op.id in seen_update_ids:
                logger.info(f"⏭️ Skipping duplicate UPDATE for memory {op.id} in LLM response")
                continue
            if op.operation in [Models.MemoryOperationType.CREATE, Models.MemoryOperationType.UPDATE]:
                normalized_content = op.content.strip().lower()
                if normalized_content in seen_contents:
                    op_type = "CREATE" if op.operation == Models.MemoryOperationType.CREATE else f"UPDATE {op.id}"
                    logger.info(f"⏭️ Skipping duplicate {op_type} in LLM response: {self.memory_system._truncate_content(op.content)}")
                    continue
                seen_contents.add(normalized_content)
            if op.operation == Models.MemoryOperationType.UPDATE:
                seen_update_ids.add(op.id)
            deduplicated_operations.append(op.model_dump())
        valid_operations = deduplicated_operations
        if valid_operations:
            create_count = sum(1 for op in valid_operations if op.get("operation") == Models.MemoryOperationType.CREATE.value)
@@ -951,12 +926,42 @@ class LLMConsolidationService:
        return valid_operations
-    async def execute_memory_operations(
+    async def _deduplicate_operations(
-        self,
+        self, operations: List, current_memories: List, user_id: str, operation_type: str, delete_operations: Optional[List] = None
-        operations: List[Dict[str, Any]],
+    ) -> List:
-        user_id: str,
+        """
-        emitter: Optional[Callable] = None,
+        Deduplicate operations against existing memories using semantic similarity.
-    ) -> Tuple[int, int, int, int]:
+        For UPDATE operations, preserves enriched content and deletes the duplicate.
        """
        deduplicated = []
        for operation in operations:
            memories_to_check = current_memories
            if operation_type == "UPDATE":
                memories_to_check = [m for m in current_memories if str(m.id) != operation.id]
            duplicate_id = await self._check_semantic_duplicate(operation.content, memories_to_check, user_id)
            if duplicate_id:
                if operation_type == "UPDATE" and delete_operations is not None:
                    logger.info(
                        f"🔄 UPDATE creates duplicate: keeping enriched content from memory {operation.id}, " f"deleting duplicate memory {duplicate_id}"
                    )
                    deduplicated.append(operation)
                    delete_operations.append(Models.MemoryOperation(operation=Models.MemoryOperationType.DELETE, content="", id=duplicate_id))
                else:
                    logger.info(
                        f"⏭️ Skipping duplicate {operation_type}: "
                        f"{self.memory_system._truncate_content(operation.content)} "
                        f"(matches memory {duplicate_id})"
                    )
                continue
            deduplicated.append(operation)
        return deduplicated
    async def execute_memory_operations(self, operations: List[Dict[str, Any]], user_id: str, emitter: Optional[Callable] = None) -> Tuple[int, int, int, int]:
        """Execute consolidation operations with simplified tracking."""
        if not operations or not user_id:
            return 0, 0, 0, 0
@@ -1001,6 +1006,23 @@ class LLMConsolidationService:
            except Exception as e:
                logger.warning(f"⚠️ Failed to fetch memories for DELETE preview: {str(e)}")
        if operations_by_type["CREATE"] or operations_by_type["UPDATE"]:
            try:
                current_memories = await self.memory_system._get_user_memories(user_id)
                if operations_by_type["CREATE"]:
                    operations_by_type["CREATE"] = await self._deduplicate_operations(
                        operations_by_type["CREATE"], current_memories, user_id, operation_type="CREATE"
                    )
                if operations_by_type["UPDATE"]:
                    operations_by_type["UPDATE"] = await self._deduplicate_operations(
                        operations_by_type["UPDATE"], current_memories, user_id, operation_type="UPDATE", delete_operations=operations_by_type["DELETE"]
                    )
            except Exception as e:
                logger.warning(f"⚠️ Semantic deduplication check failed, proceeding with original operations: {str(e)}")
        for operation_type, ops in operations_by_type.items():
            if not ops:
                continue
@@ -1115,17 +1137,9 @@ class Filter:
    class Valves(BaseModel):
        """Configuration valves for the Memory System."""
-        model: str = Field(
+        memory_model: Optional[str] = Field(
-            default=Constants.DEFAULT_LLM_MODEL,
+            default=None,
-            description="Model name for LLM operations",
+            description="Custom model to use for memory operations. If Default, uses the current chat model",
        )
        use_custom_model_for_memory: bool = Field(
            default=False,
            description="Use a custom model for memory operations instead of the current chat model",
        )
        custom_memory_model: str = Field(
            default=Constants.DEFAULT_LLM_MODEL,
            description="Custom model to use for memory operations when enabled",
        )
        max_memories_returned: int = Field(
            default=Constants.MAX_MEMORIES_PER_RETRIEVAL,
@@ -1164,6 +1178,7 @@ class Filter:
        self._shutdown_event = asyncio.Event()
        self._embedding_function = None
        self._embedding_dimension = None
        self._skip_detector = None
        self._llm_reranking_service = LLMRerankingService(self)
@@ -1190,6 +1205,8 @@ class Filter:
                self._embedding_function = __request__.app.state.EMBEDDING_FUNCTION
                logger.info(f"✅ Using OpenWebUI's embedding function")
                self._detect_embedding_dimension()
                if self._skip_detector is None:
                    global _SHARED_SKIP_DETECTOR_CACHE, _SHARED_SKIP_DETECTOR_CACHE_LOCK
                    embedding_engine = getattr(__request__.app.state.config, "RAG_EMBEDDING_ENGINE", "")
@@ -1245,9 +1262,6 @@ class Filter:
    def _validate_system_configuration(self) -> None:
        """Validate configuration and fail if invalid."""
        if not self.valves.model or not self.valves.model.strip():
            raise ValueError("🤖 Model not specified")
        if self.valves.max_memories_returned <= 0:
            raise ValueError(f"📊 Invalid max memories returned: {self.valves.max_memories_returned}")
@@ -1268,12 +1282,32 @@ class Filter:
        """Compute SHA256 hash for text caching."""
        return hashlib.sha256(text.encode()).hexdigest()
    def _detect_embedding_dimension(self) -> None:
        """Detect embedding dimension by generating a test embedding."""
        try:
            test_embedding = self._embedding_function(["dummy"], prefix=None, user=None)
            if isinstance(test_embedding, list):
                test_embedding = test_embedding[0]
            self._embedding_dimension = np.squeeze(test_embedding).shape[0]
            logger.info(f"🎯 Detected embedding dimension: {self._embedding_dimension}")
        except Exception as e:
            raise RuntimeError(f"Failed to detect embedding dimension: {str(e)}")
    def _normalize_embedding(self, embedding: Union[List[float], np.ndarray]) -> np.ndarray:
-        """Normalize embedding vector."""
+        """Normalize embedding vector and ensure 1D shape."""
        if isinstance(embedding, list):
            embedding = np.array(embedding, dtype=np.float16)
        else:
            embedding = embedding.astype(np.float16)
        embedding = np.squeeze(embedding)
        if embedding.ndim != 1:
            raise ValueError(f"Embedding must be 1D after squeeze, got shape {embedding.shape}")
        if self._embedding_dimension and embedding.shape[0] != self._embedding_dimension:
            raise ValueError(f"Embedding must have {self._embedding_dimension} dimensions, got {embedding.shape[0]}")
        norm = np.linalg.norm(embedding)
        return embedding / norm if norm > 0 else embedding
@@ -1320,7 +1354,7 @@ class Filter:
            raw_embeddings = await loop.run_in_executor(None, self._embedding_function, uncached_texts, None, user)
            if isinstance(raw_embeddings, list) and len(raw_embeddings) > 0:
-                if isinstance(raw_embeddings[0], list):
+                if isinstance(raw_embeddings[0], (list, np.ndarray)):
                    new_embeddings = [self._normalize_embedding(emb) for emb in raw_embeddings]
                else:
                    new_embeddings = [self._normalize_embedding(raw_embeddings)]
@@ -1334,7 +1368,8 @@ class Filter:
                result_embeddings[original_idx] = embedding
        if is_single:
-            logger.info("📥 User message embedding: cache hit" if not uncached_texts else "💾 User message embedding: generated and cached")
+            if uncached_texts:
                logger.info("💾 User message embedding: generated and cached")
            return result_embeddings[0]
        else:
            valid_count = sum(1 for emb in result_embeddings if emb is not None)
@@ -1623,25 +1658,15 @@ class Filter:
        body: Dict[str, Any],
        __event_emitter__: Optional[Callable] = None,
        __user__: Optional[Dict[str, Any]] = None,
        __model__: Optional[str] = None,
        __request__: Optional[Request] = None,
        **kwargs,
    ) -> Dict[str, Any]:
        """Simplified inlet processing for memory retrieval and injection."""
-        model_to_use = body.get("model") if isinstance(body, dict) else None
+        model_to_use = self.valves.memory_model or (body.get("model") if isinstance(body, dict) else None)
        if not model_to_use:
            model_to_use = __model__ or getattr(__request__.state, "model", None)
        if not model_to_use:
            model_to_use = Constants.DEFAULT_LLM_MODEL
            logger.warning(f"⚠️ No model found, use default model : {model_to_use}")
-        if self.valves.use_custom_model_for_memory and self.valves.custom_memory_model:
+        if self.valves.memory_model:
            model_to_use = self.valves.custom_memory_model
            logger.info(f"🧠 Using the custom model for memory : {model_to_use}")
        self.valves.model = model_to_use
        await self._set_pipeline_context(__event_emitter__, __user__, model_to_use, __request__)
        user_id = __user__.get("id") if body and __user__ else None
@@ -1687,25 +1712,15 @@ class Filter:
        body: dict,
        __event_emitter__: Optional[Callable] = None,
        __user__: Optional[dict] = None,
        __model__: Optional[str] = None,
        __request__: Optional[Request] = None,
        **kwargs,
    ) -> dict:
        """Simplified outlet processing for background memory consolidation."""
-        model_to_use = body.get("model") if isinstance(body, dict) else None
+        model_to_use = self.valves.memory_model or (body.get("model") if isinstance(body, dict) else None)
        if not model_to_use:
            model_to_use = __model__ or getattr(__request__.state, "model", None)
        if not model_to_use:
            model_to_use = Constants.DEFAULT_LLM_MODEL
            logger.warning(f"⚠️ No model found, use default model : {model_to_use}")
-        if self.valves.use_custom_model_for_memory and self.valves.custom_memory_model:
+        if self.valves.memory_model:
            model_to_use = self.valves.custom_memory_model
            logger.info(f"🧠 Using the custom model for memory : {model_to_use}")
        self.valves.model = model_to_use
        await self._set_pipeline_context(__event_emitter__, __user__, model_to_use, __request__)
        user_id = __user__.get("id") if body and __user__ else None
@@ -1869,7 +1884,7 @@ class Filter:
        if not hasattr(self, "__request__") or not hasattr(self, "__user__"):
            raise RuntimeError("🔧 Pipeline interface not properly initialized. __request__ and __user__ required.")
-        model_to_use = self.valves.model if self.valves.model else self.__model__
+        model_to_use = self.__model__
        if not model_to_use:
            raise ValueError("🤖 No model specified for LLM operations")