fix(memory_system): improve content extraction, hash handling, and memory filtering

Updates content extraction logic for robustness, ensures all hash operations handle non-string input safely, filters out empty memory content, and bumps required Open WebUI version for compatibility. These changes address edge cases in content processing, prevent potential errors, and ensure only valid memories are processed and embedded.
2026-01-22 06:51:01 +01:00 · 2025-11-26 01:04:48 +03:00
parent 4502e07fb3
commit cdedeee6ba
1 changed files with 12 additions and 15 deletions
--- a/memory_system.py
+++ b/memory_system.py
@@ -1,10 +1,10 @@
 """
 title: Memory System
 description: A semantic memory management system for Open WebUI that consolidates, deduplicates, and retrieves personalized user memories using LLM operations.
-version: 1.2.0
+version: 1.2.1
 authors: https://github.com/mtayfur
 license: Apache-2.0
-required_open_webui_version: 0.6.37
+required_open_webui_version: 0.6.40
 """

 import asyncio
@@ -1249,14 +1249,10 @@ class Filter:
    def _extract_text_from_content(self, content) -> str:
        if isinstance(content, str):
            return content
-        elif isinstance(content, list):
-            text_parts = []
-            for item in content:
-                if isinstance(item, dict) and item.get("type") == "text":
-                    text_parts.append(item.get("text", ""))
-            return " ".join(text_parts)
-        elif isinstance(content, dict) and "text" in content:
-            return content["text"]
+        if isinstance(content, list):
+            return " ".join(item.get("text", "") for item in content if isinstance(item, dict) and item.get("type") == "text")
+        if isinstance(content, dict) and "text" in content:
+            return content.get("text", "")
        return ""

    def _validate_system_configuration(self) -> None:
@@ -1279,7 +1275,7 @@ class Filter:

    def _compute_text_hash(self, text: str) -> str:
        """Compute SHA256 hash for text caching."""
-        return hashlib.sha256(text.encode()).hexdigest()
+        return hashlib.sha256(str(text).encode()).hexdigest()

    async def _detect_embedding_dimension(self) -> None:
        """Detect embedding dimension by generating a test embedding."""
@@ -1400,10 +1396,11 @@ class Filter:
        if timeout is None:
            timeout = Constants.DATABASE_OPERATION_TIMEOUT_SEC
        
-        return await asyncio.wait_for(
+        memories = await asyncio.wait_for(
            asyncio.to_thread(Memories.get_memories_by_user_id, user_id),
            timeout=timeout,
        )
+        return [m for m in (memories or []) if m.content]

    def _log_retrieved_memories(self, memories: List[Dict[str, Any]], context_type: str = "semantic") -> None:
        """Log retrieved memories with concise formatting showing key statistics and semantic values."""
@@ -1434,7 +1431,7 @@ class Filter:
    def _cache_key(self, cache_type: str, user_id: str, content: Optional[str] = None) -> str:
        """Unified cache key generation for all cache types."""
        if content:
-            content_hash = hashlib.sha256(content.encode("utf-8")).hexdigest()[: Constants.CACHE_KEY_HASH_PREFIX_LENGTH]
+            content_hash = hashlib.sha256(str(content).encode("utf-8")).hexdigest()[: Constants.CACHE_KEY_HASH_PREFIX_LENGTH]
            return f"{cache_type}_{user_id}:{content_hash}"
        return f"{cache_type}_{user_id}"

@@ -1637,7 +1634,7 @@ class Filter:

        user_message, should_skip, skip_reason = await self._process_user_message(body)

-        skip_cache_key = self._cache_key(self._cache_manager.SKIP_STATE_CACHE, user_id, user_message or "")
+        skip_cache_key = self._cache_key(self._cache_manager.SKIP_STATE_CACHE, user_id, user_message)
        await self._cache_manager.put(
            user_id,
            self._cache_manager.SKIP_STATE_CACHE,
@@ -1771,7 +1768,7 @@ class Filter:
                user_memories,
            )

-            memory_contents = [memory.content for memory in user_memories if memory.content and len(memory.content.strip()) >= Constants.MIN_MESSAGE_CHARS]
+            memory_contents = [memory.content for memory in user_memories if len(memory.content.strip()) >= Constants.MIN_MESSAGE_CHARS]

            if memory_contents:
                await self._generate_embeddings(memory_contents, user_id)