From cdedeee6baee823575e0f3db90b4bf038b6da1e9 Mon Sep 17 00:00:00 2001 From: mtayfur Date: Wed, 26 Nov 2025 01:04:48 +0300 Subject: [PATCH] fix(memory_system): improve content extraction, hash handling, and memory filtering Updates content extraction logic for robustness, ensures all hash operations handle non-string input safely, filters out empty memory content, and bumps required Open WebUI version for compatibility. These changes address edge cases in content processing, prevent potential errors, and ensure only valid memories are processed and embedded. --- memory_system.py | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/memory_system.py b/memory_system.py index b5666d1..01b9684 100644 --- a/memory_system.py +++ b/memory_system.py @@ -1,10 +1,10 @@ """ title: Memory System description: A semantic memory management system for Open WebUI that consolidates, deduplicates, and retrieves personalized user memories using LLM operations. -version: 1.2.0 +version: 1.2.1 authors: https://github.com/mtayfur license: Apache-2.0 -required_open_webui_version: 0.6.37 +required_open_webui_version: 0.6.40 """ import asyncio @@ -1249,14 +1249,10 @@ class Filter: def _extract_text_from_content(self, content) -> str: if isinstance(content, str): return content - elif isinstance(content, list): - text_parts = [] - for item in content: - if isinstance(item, dict) and item.get("type") == "text": - text_parts.append(item.get("text", "")) - return " ".join(text_parts) - elif isinstance(content, dict) and "text" in content: - return content["text"] + if isinstance(content, list): + return " ".join(item.get("text", "") for item in content if isinstance(item, dict) and item.get("type") == "text") + if isinstance(content, dict) and "text" in content: + return content.get("text", "") return "" def _validate_system_configuration(self) -> None: @@ -1279,7 +1275,7 @@ class Filter: def _compute_text_hash(self, text: str) -> str: """Compute SHA256 hash for text caching.""" - return hashlib.sha256(text.encode()).hexdigest() + return hashlib.sha256(str(text).encode()).hexdigest() async def _detect_embedding_dimension(self) -> None: """Detect embedding dimension by generating a test embedding.""" @@ -1400,10 +1396,11 @@ class Filter: if timeout is None: timeout = Constants.DATABASE_OPERATION_TIMEOUT_SEC - return await asyncio.wait_for( + memories = await asyncio.wait_for( asyncio.to_thread(Memories.get_memories_by_user_id, user_id), timeout=timeout, ) + return [m for m in (memories or []) if m.content] def _log_retrieved_memories(self, memories: List[Dict[str, Any]], context_type: str = "semantic") -> None: """Log retrieved memories with concise formatting showing key statistics and semantic values.""" @@ -1434,7 +1431,7 @@ class Filter: def _cache_key(self, cache_type: str, user_id: str, content: Optional[str] = None) -> str: """Unified cache key generation for all cache types.""" if content: - content_hash = hashlib.sha256(content.encode("utf-8")).hexdigest()[: Constants.CACHE_KEY_HASH_PREFIX_LENGTH] + content_hash = hashlib.sha256(str(content).encode("utf-8")).hexdigest()[: Constants.CACHE_KEY_HASH_PREFIX_LENGTH] return f"{cache_type}_{user_id}:{content_hash}" return f"{cache_type}_{user_id}" @@ -1637,7 +1634,7 @@ class Filter: user_message, should_skip, skip_reason = await self._process_user_message(body) - skip_cache_key = self._cache_key(self._cache_manager.SKIP_STATE_CACHE, user_id, user_message or "") + skip_cache_key = self._cache_key(self._cache_manager.SKIP_STATE_CACHE, user_id, user_message) await self._cache_manager.put( user_id, self._cache_manager.SKIP_STATE_CACHE, @@ -1771,7 +1768,7 @@ class Filter: user_memories, ) - memory_contents = [memory.content for memory in user_memories if memory.content and len(memory.content.strip()) >= Constants.MIN_MESSAGE_CHARS] + memory_contents = [memory.content for memory in user_memories if len(memory.content.strip()) >= Constants.MIN_MESSAGE_CHARS] if memory_contents: await self._generate_embeddings(memory_contents, user_id)