refactor(memory): remove redundant valve options and clarify reranking controls

Eliminate unused or redundant valve options such as max_message_chars, relaxed_semantic_threshold_multiplier, and enable_llm_reranking to streamline configuration, clarify that llm_reranking_trigger_multiplier disables reranking when set to 0.0, and update documentation and code to reflect these changes for improved maintainability and user clarity.
2026-01-22 06:51:01 +01:00 · 2025-11-09 15:55:27 +03:00
parent c77237a651
commit 6ae99d4778
2 changed files with 9 additions and 24 deletions
--- a/README.md
+++ b/README.md
@@ -67,12 +67,11 @@ Uses OpenWebUI's configured embedding model (supports Ollama, OpenAI, Azure Open

 Customize behavior through valves:
 - **model**: LLM for consolidation and reranking. Set to "Default" to use the current chat model, or specify a model ID to use that specific model
- **max_message_chars**: Maximum message length before skipping operations (default: 2500)
 - **max_memories_returned**: Context injection limit (default: 10)
 - **semantic_retrieval_threshold**: Minimum similarity score (default: 0.5)
- **relaxed_semantic_threshold_multiplier**: Adjusts threshold for consolidation (default: 0.9)
- **enable_llm_reranking**: Toggle smart reranking (default: true)
- **llm_reranking_trigger_multiplier**: When to activate LLM reranking (default: 0.5 = 50%)
+- **llm_reranking_trigger_multiplier**: When to activate LLM reranking (0.0 = disabled, default: 0.5 = 50%)
+- **skip_category_margin**: Margin for skip detection classification (default: 0.20)
+- **status_emit_level**: Status message verbosity - Basic or Detailed (default: Detailed)

 ## Performance Optimizations

--- a/memory_system.py
+++ b/memory_system.py
@@ -67,7 +67,7 @@ class Constants:
    # Status Emit Levels
    STATUS_LEVEL_BASIC = 0  # Maps to "Basic" - Show only summary counts
    STATUS_LEVEL_DETAILED = 1  # Maps to "Detailed" - Show everything including full diagnostics
-    
+
    # Mapping from enum string values to numeric levels for comparison
    STATUS_LEVEL_MAP = {
        "Basic": 0,
@@ -663,7 +663,7 @@ class LLMRerankingService:
        self.memory_system = memory_system

    def _should_use_llm_reranking(self, memories: List[Dict]) -> Tuple[bool, str]:
-        if not self.memory_system.valves.enable_llm_reranking:
+        if self.memory_system.valves.llm_reranking_trigger_multiplier <= 0:
            return False, "LLM reranking disabled"

        llm_trigger_threshold = int(self.memory_system.valves.max_memories_returned * self.memory_system.valves.llm_reranking_trigger_multiplier)
@@ -748,9 +748,7 @@ CANDIDATE MEMORIES:

            if not selected_memories:
                logger.info("📭 No relevant memories after LLM analysis")
-                await self.memory_system._emit_status(
-                    emitter, f"📭 No Relevant Memories After LLM Analysis", done=True, level=Constants.STATUS_LEVEL_BASIC
-                )
+                await self.memory_system._emit_status(emitter, f"📭 No Relevant Memories After LLM Analysis", done=True, level=Constants.STATUS_LEVEL_BASIC)
                return selected_memories, analysis_info
        else:
            logger.info(f"Skipping LLM reranking: {decision_reason}")
@@ -1146,25 +1144,13 @@ class Filter:
            default=Constants.MAX_MEMORIES_PER_RETRIEVAL,
            description="Maximum number of memories to return in context",
        )
-        max_message_chars: int = Field(
-            default=Constants.MAX_MESSAGE_CHARS,
-            description="Maximum user message length before skipping memory operations",
-        )
        semantic_retrieval_threshold: float = Field(
            default=Constants.SEMANTIC_RETRIEVAL_THRESHOLD,
            description="Minimum similarity threshold for memory retrieval",
        )
-        relaxed_semantic_threshold_multiplier: float = Field(
-            default=Constants.RELAXED_SEMANTIC_THRESHOLD_MULTIPLIER,
-            description="Adjusts similarity threshold for memory consolidation (lower = more candidates)",
-        )
-        enable_llm_reranking: bool = Field(
-            default=True,
-            description="Enable LLM-based memory reranking for improved contextual selection",
-        )
        llm_reranking_trigger_multiplier: float = Field(
            default=Constants.LLM_RERANKING_TRIGGER_MULTIPLIER,
-            description="Controls when LLM reranking activates (lower = more aggressive)",
+            description="Controls when LLM reranking activates (0.0 = disabled, lower = more aggressive)",
        )
        skip_category_margin: float = Field(
            default=Constants.SKIP_CATEGORY_MARGIN,
@@ -1254,7 +1240,7 @@ class Filter:
    def _get_retrieval_threshold(self, is_consolidation: bool = False) -> float:
        """Calculate retrieval threshold for semantic similarity filtering."""
        if is_consolidation:
-            return self.valves.semantic_retrieval_threshold * self.valves.relaxed_semantic_threshold_multiplier
+            return self.valves.semantic_retrieval_threshold * Constants.RELAXED_SEMANTIC_THRESHOLD_MULTIPLIER
        return self.valves.semantic_retrieval_threshold

    def _extract_text_from_content(self, content) -> str:
@@ -1379,7 +1365,7 @@ class Filter:
            return result_embeddings

    def _should_skip_memory_operations(self, user_message: str) -> Tuple[bool, str]:
-        skip_reason = self._skip_detector.detect_skip_reason(user_message, self.valves.max_message_chars, memory_system=self)
+        skip_reason = self._skip_detector.detect_skip_reason(user_message, Constants.MAX_MESSAGE_CHARS, memory_system=self)
        if skip_reason:
            status_key = SkipDetector.SkipReason(skip_reason)
            return True, SkipDetector.STATUS_MESSAGES[status_key]