mirror of
https://github.com/mtayfur/openwebui-memory-system.git
synced 2026-01-22 06:51:01 +01:00
Enhance SkipDetector with fast-path detection for technical messages and add confident margin for early skips
This commit is contained in:
@@ -54,6 +54,7 @@ class Constants:
|
|||||||
# Skip Detection Thresholds
|
# Skip Detection Thresholds
|
||||||
SKIP_DETECTION_SIMILARITY_THRESHOLD = 0.50 # Similarity threshold for skip category detection (tuned for zero-shot)
|
SKIP_DETECTION_SIMILARITY_THRESHOLD = 0.50 # Similarity threshold for skip category detection (tuned for zero-shot)
|
||||||
SKIP_DETECTION_MARGIN = 0.05 # Minimum margin required between technical and conversational similarity to skip
|
SKIP_DETECTION_MARGIN = 0.05 # Minimum margin required between technical and conversational similarity to skip
|
||||||
|
SKIP_DETECTION_CONFIDENT_MARGIN = 0.15 # Margin threshold for confident skips that trigger early exit
|
||||||
|
|
||||||
# Safety & Operations
|
# Safety & Operations
|
||||||
MAX_DELETE_OPERATIONS_RATIO = 0.6 # Maximum delete operations ratio for safety
|
MAX_DELETE_OPERATIONS_RATIO = 0.6 # Maximum delete operations ratio for safety
|
||||||
@@ -477,9 +478,52 @@ class SkipDetector:
|
|||||||
return SkipDetector.SkipReason.SKIP_SIZE.value
|
return SkipDetector.SkipReason.SKIP_SIZE.value
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def _fast_path_skip_detection(self, message: str) -> Optional[str]:
|
||||||
|
"""Language-agnostic structural pattern detection with ~95% confidence."""
|
||||||
|
msg_len = len(message)
|
||||||
|
|
||||||
|
if '```' in message:
|
||||||
|
return self.SkipReason.SKIP_TECHNICAL.value
|
||||||
|
|
||||||
|
lines_stripped = [line.strip() for line in message.split('\n') if line.strip()]
|
||||||
|
if lines_stripped:
|
||||||
|
command_lines = sum(1 for line in lines_stripped if line.startswith(('$', '#', '>', '%')))
|
||||||
|
if command_lines >= 3 or (len(lines_stripped) <= 3 and command_lines >= 2):
|
||||||
|
return self.SkipReason.SKIP_TECHNICAL.value
|
||||||
|
|
||||||
|
markup_chars = sum(message.count(c) for c in '{}[]<>')
|
||||||
|
if markup_chars >= 6:
|
||||||
|
if markup_chars / msg_len > 0.06:
|
||||||
|
return self.SkipReason.SKIP_TECHNICAL.value
|
||||||
|
|
||||||
|
line_count = message.count('\n')
|
||||||
|
if line_count > 12:
|
||||||
|
lines = message.split('\n')
|
||||||
|
non_empty_lines = [line for line in lines if line.strip()]
|
||||||
|
if non_empty_lines:
|
||||||
|
structured_lines = sum(1 for line in non_empty_lines if (
|
||||||
|
line.startswith((' ', '\t')) or
|
||||||
|
any(c in line for c in '{}[]<>') or
|
||||||
|
(': ' in line or ':\n' in line)
|
||||||
|
))
|
||||||
|
if structured_lines / len(non_empty_lines) > 0.5:
|
||||||
|
return self.SkipReason.SKIP_TECHNICAL.value
|
||||||
|
|
||||||
|
if msg_len > 50:
|
||||||
|
special_chars = sum(1 for c in message if not c.isalnum() and not c.isspace())
|
||||||
|
special_ratio = special_chars / msg_len
|
||||||
|
if special_ratio > 0.35:
|
||||||
|
alphanumeric = sum(1 for c in message if c.isalnum())
|
||||||
|
if alphanumeric / msg_len < 0.50:
|
||||||
|
return self.SkipReason.SKIP_TECHNICAL.value
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
def detect_skip_reason(self, message: str, max_message_chars: int = Constants.MAX_MESSAGE_CHARS) -> Optional[str]:
|
def detect_skip_reason(self, message: str, max_message_chars: int = Constants.MAX_MESSAGE_CHARS) -> Optional[str]:
|
||||||
"""
|
"""
|
||||||
Detect if a message should be skipped using zero-shot semantic classification.
|
Detect if a message should be skipped using two-stage detection:
|
||||||
|
1. Fast-path structural patterns (~95% confidence)
|
||||||
|
2. Semantic classification (for remaining cases)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Skip reason string if content should be skipped, None otherwise
|
Skip reason string if content should be skipped, None otherwise
|
||||||
@@ -488,6 +532,11 @@ class SkipDetector:
|
|||||||
if size_issue:
|
if size_issue:
|
||||||
return size_issue
|
return size_issue
|
||||||
|
|
||||||
|
fast_skip = self._fast_path_skip_detection(message)
|
||||||
|
if fast_skip:
|
||||||
|
logger.info(f"Fast-path skip: {fast_skip}")
|
||||||
|
return fast_skip
|
||||||
|
|
||||||
if self._reference_embeddings is None:
|
if self._reference_embeddings is None:
|
||||||
logger.warning("SkipDetector reference embeddings not initialized, allowing message through")
|
logger.warning("SkipDetector reference embeddings not initialized, allowing message through")
|
||||||
return None
|
return None
|
||||||
@@ -523,6 +572,11 @@ class SkipDetector:
|
|||||||
|
|
||||||
if max_similarity > Constants.SKIP_DETECTION_SIMILARITY_THRESHOLD:
|
if max_similarity > Constants.SKIP_DETECTION_SIMILARITY_THRESHOLD:
|
||||||
margin = max_similarity - max_conversational_similarity
|
margin = max_similarity - max_conversational_similarity
|
||||||
|
|
||||||
|
if margin > Constants.SKIP_DETECTION_CONFIDENT_MARGIN:
|
||||||
|
logger.info(f"Skipping message - {skip_reason.value} ({cat_key}: {max_similarity:.3f}, conv: {max_conversational_similarity:.3f}, margin: {margin:.3f})")
|
||||||
|
return skip_reason.value
|
||||||
|
|
||||||
if margin > Constants.SKIP_DETECTION_MARGIN:
|
if margin > Constants.SKIP_DETECTION_MARGIN:
|
||||||
logger.info(f"Skipping message - {skip_reason.value} ({cat_key}: {max_similarity:.3f}, conv: {max_conversational_similarity:.3f}, margin: {margin:.3f})")
|
logger.info(f"Skipping message - {skip_reason.value} ({cat_key}: {max_similarity:.3f}, conv: {max_conversational_similarity:.3f}, margin: {margin:.3f})")
|
||||||
return skip_reason.value
|
return skip_reason.value
|
||||||
|
|||||||
Reference in New Issue
Block a user