From 9fbcaea709ceaf59b0c6a41f9774b6661581350a Mon Sep 17 00:00:00 2001 From: santiagosayshey Date: Fri, 16 Aug 2024 15:11:29 +0930 Subject: [PATCH] refactor(sanitization): centralize and apply input sanitization --- backend/app/utils/file_utils.py | 11 ++++++++++- backend/app/utils/format_operations.py | 18 +----------------- backend/app/utils/regex_operations.py | 13 +++++++------ 3 files changed, 18 insertions(+), 24 deletions(-) diff --git a/backend/app/utils/file_utils.py b/backend/app/utils/file_utils.py index 002a11e..732417a 100644 --- a/backend/app/utils/file_utils.py +++ b/backend/app/utils/file_utils.py @@ -12,4 +12,13 @@ def generate_filename(directory, id, name): return os.path.join(directory, f"{id}_{sanitized_name}.yml") def get_current_timestamp(): - return datetime.datetime.now().isoformat() \ No newline at end of file + return datetime.datetime.now().isoformat() + +import re + +def sanitize_input(input_str): + sanitized_str = input_str.strip() + sanitized_str = re.sub(r'[:#\-\*>\|&]', '', sanitized_str) + sanitized_str = sanitized_str.replace('\t', ' ') + sanitized_str = re.sub(r'\s+', ' ', sanitized_str) + return sanitized_str \ No newline at end of file diff --git a/backend/app/utils/format_operations.py b/backend/app/utils/format_operations.py index 843a7e6..a9b728b 100644 --- a/backend/app/utils/format_operations.py +++ b/backend/app/utils/format_operations.py @@ -1,9 +1,8 @@ import os import yaml -import re import logging from collections import OrderedDict -from .file_utils import get_next_id, generate_filename, get_current_timestamp +from .file_utils import get_next_id, generate_filename, get_current_timestamp, sanitize_input FORMAT_DIR = 'custom_formats' @@ -16,21 +15,6 @@ def represent_ordereddict(dumper, data): yaml.add_representer(OrderedDict, represent_ordereddict, Dumper=yaml.SafeDumper) -def sanitize_input(input_str): - # Trim leading/trailing whitespace - sanitized_str = input_str.strip() - - # Replace special characters that could affect YAML formatting - sanitized_str = re.sub(r'[:#\-\*>\|&]', '', sanitized_str) - - # Ensure there are no tabs (which can cause issues in YAML) - sanitized_str = sanitized_str.replace('\t', ' ') - - # Optionally: Collapse multiple spaces into a single space - sanitized_str = re.sub(r'\s+', ' ', sanitized_str) - - return sanitized_str - def save_format(data): # Log the received data logger.info("Received data for saving format: %s", data) diff --git a/backend/app/utils/regex_operations.py b/backend/app/utils/regex_operations.py index b9beefc..4c90c20 100644 --- a/backend/app/utils/regex_operations.py +++ b/backend/app/utils/regex_operations.py @@ -1,7 +1,7 @@ import os import yaml from collections import OrderedDict -from .file_utils import get_next_id, generate_filename, get_current_timestamp +from .file_utils import get_next_id, generate_filename, get_current_timestamp, sanitize_input REGEX_DIR = 'regex_patterns' @@ -12,10 +12,10 @@ def save_regex(data): else: ordered_data['id'] = get_next_id(REGEX_DIR) - ordered_data['name'] = data.get('name', '') - ordered_data['description'] = data.get('description', '') - ordered_data['pattern'] = data.get('pattern', '') - ordered_data['regex101Link'] = data.get('regex101Link', '') + ordered_data['name'] = sanitize_input(data.get('name', '')) + ordered_data['description'] = sanitize_input(data.get('description', '')) + ordered_data['pattern'] = sanitize_input(data.get('pattern', '')) + ordered_data['regex101Link'] = sanitize_input(data.get('regex101Link', '')) if ordered_data['id'] != 0: # Existing regex existing_data = load_regex(ordered_data['id']) @@ -27,7 +27,7 @@ def save_regex(data): ordered_data['date_created'] = get_current_timestamp() ordered_data['date_modified'] = get_current_timestamp() - ordered_data['tags'] = data.get('tags', []) + ordered_data['tags'] = [sanitize_input(tag) for tag in data.get('tags', [])] filename = generate_filename(REGEX_DIR, ordered_data['id'], ordered_data['name']) with open(filename, 'w') as file: @@ -43,6 +43,7 @@ def save_regex(data): return ordered_data + def load_regex(id): files = [f for f in os.listdir(REGEX_DIR) if f.startswith(f"{id}_") and f.endswith('.yml')] if files: