mirror of
https://github.com/mtayfur/openwebui-memory-system.git
synced 2026-01-22 06:51:01 +01:00
Enhance SkipDetector with fast-path detection for technical messages and add confident margin for early skips
This commit is contained in:
@@ -54,6 +54,7 @@ class Constants:
|
||||
# Skip Detection Thresholds
|
||||
SKIP_DETECTION_SIMILARITY_THRESHOLD = 0.50 # Similarity threshold for skip category detection (tuned for zero-shot)
|
||||
SKIP_DETECTION_MARGIN = 0.05 # Minimum margin required between technical and conversational similarity to skip
|
||||
SKIP_DETECTION_CONFIDENT_MARGIN = 0.15 # Margin threshold for confident skips that trigger early exit
|
||||
|
||||
# Safety & Operations
|
||||
MAX_DELETE_OPERATIONS_RATIO = 0.6 # Maximum delete operations ratio for safety
|
||||
@@ -477,9 +478,52 @@ class SkipDetector:
|
||||
return SkipDetector.SkipReason.SKIP_SIZE.value
|
||||
return None
|
||||
|
||||
def _fast_path_skip_detection(self, message: str) -> Optional[str]:
|
||||
"""Language-agnostic structural pattern detection with ~95% confidence."""
|
||||
msg_len = len(message)
|
||||
|
||||
if '```' in message:
|
||||
return self.SkipReason.SKIP_TECHNICAL.value
|
||||
|
||||
lines_stripped = [line.strip() for line in message.split('\n') if line.strip()]
|
||||
if lines_stripped:
|
||||
command_lines = sum(1 for line in lines_stripped if line.startswith(('$', '#', '>', '%')))
|
||||
if command_lines >= 3 or (len(lines_stripped) <= 3 and command_lines >= 2):
|
||||
return self.SkipReason.SKIP_TECHNICAL.value
|
||||
|
||||
markup_chars = sum(message.count(c) for c in '{}[]<>')
|
||||
if markup_chars >= 6:
|
||||
if markup_chars / msg_len > 0.06:
|
||||
return self.SkipReason.SKIP_TECHNICAL.value
|
||||
|
||||
line_count = message.count('\n')
|
||||
if line_count > 12:
|
||||
lines = message.split('\n')
|
||||
non_empty_lines = [line for line in lines if line.strip()]
|
||||
if non_empty_lines:
|
||||
structured_lines = sum(1 for line in non_empty_lines if (
|
||||
line.startswith((' ', '\t')) or
|
||||
any(c in line for c in '{}[]<>') or
|
||||
(': ' in line or ':\n' in line)
|
||||
))
|
||||
if structured_lines / len(non_empty_lines) > 0.5:
|
||||
return self.SkipReason.SKIP_TECHNICAL.value
|
||||
|
||||
if msg_len > 50:
|
||||
special_chars = sum(1 for c in message if not c.isalnum() and not c.isspace())
|
||||
special_ratio = special_chars / msg_len
|
||||
if special_ratio > 0.35:
|
||||
alphanumeric = sum(1 for c in message if c.isalnum())
|
||||
if alphanumeric / msg_len < 0.50:
|
||||
return self.SkipReason.SKIP_TECHNICAL.value
|
||||
|
||||
return None
|
||||
|
||||
def detect_skip_reason(self, message: str, max_message_chars: int = Constants.MAX_MESSAGE_CHARS) -> Optional[str]:
|
||||
"""
|
||||
Detect if a message should be skipped using zero-shot semantic classification.
|
||||
Detect if a message should be skipped using two-stage detection:
|
||||
1. Fast-path structural patterns (~95% confidence)
|
||||
2. Semantic classification (for remaining cases)
|
||||
|
||||
Returns:
|
||||
Skip reason string if content should be skipped, None otherwise
|
||||
@@ -488,6 +532,11 @@ class SkipDetector:
|
||||
if size_issue:
|
||||
return size_issue
|
||||
|
||||
fast_skip = self._fast_path_skip_detection(message)
|
||||
if fast_skip:
|
||||
logger.info(f"Fast-path skip: {fast_skip}")
|
||||
return fast_skip
|
||||
|
||||
if self._reference_embeddings is None:
|
||||
logger.warning("SkipDetector reference embeddings not initialized, allowing message through")
|
||||
return None
|
||||
@@ -523,6 +572,11 @@ class SkipDetector:
|
||||
|
||||
if max_similarity > Constants.SKIP_DETECTION_SIMILARITY_THRESHOLD:
|
||||
margin = max_similarity - max_conversational_similarity
|
||||
|
||||
if margin > Constants.SKIP_DETECTION_CONFIDENT_MARGIN:
|
||||
logger.info(f"Skipping message - {skip_reason.value} ({cat_key}: {max_similarity:.3f}, conv: {max_conversational_similarity:.3f}, margin: {margin:.3f})")
|
||||
return skip_reason.value
|
||||
|
||||
if margin > Constants.SKIP_DETECTION_MARGIN:
|
||||
logger.info(f"Skipping message - {skip_reason.value} ({cat_key}: {max_similarity:.3f}, conv: {max_conversational_similarity:.3f}, margin: {margin:.3f})")
|
||||
return skip_reason.value
|
||||
|
||||
Reference in New Issue
Block a user