Private
Public Access
0
0

more organization

This commit is contained in:
2026-06-06 10:24:22 -04:00
parent 1c627bcc30
commit 7d555361f9
20 changed files with 630 additions and 725 deletions
+77 -95
View File
@@ -49,21 +49,17 @@ from typing import Any
class LogRegistry:
"""
Manages a persistent registry of session logs using a TOML file.
Tracks session paths, start times, whitelisting status, and metadata.
Manages a persistent registry of session logs using a TOML file.
Tracks session paths, start times, whitelisting status, and metadata.
"""
def __init__(self, registry_path: str) -> None:
"""
Initializes the LogRegistry with a path to the registry file.
Initializes the LogRegistry with a path to the registry file.
Args:
registry_path (str): The file path to the TOML registry.
[C: src/mcp_client.py:_DDGParser.__init__, src/mcp_client.py:_TextExtractor.__init__]
Args:
registry_path (str): The file path to the TOML registry.
[C: src/mcp_client.py:_DDGParser.__init__, src/mcp_client.py:_TextExtractor.__init__]
"""
self.registry_path = registry_path
self.data: dict[str, dict[str, Any]] = {}
@@ -76,10 +72,8 @@ class LogRegistry:
def load_registry(self) -> None:
"""
Loads the registry data from the TOML file into memory.
Handles date/time conversions from TOML-native formats to strings for consistency.
Loads the registry data from the TOML file into memory.
Handles date/time conversions from TOML-native formats to strings for consistency.
"""
if os.path.exists(self.registry_path):
try:
@@ -106,11 +100,9 @@ class LogRegistry:
def save_registry(self) -> None:
"""
Serializes and saves the current registry data to the TOML file.
Converts internal datetime objects to ISO format strings for compatibility.
[C: tests/test_logging_e2e.py:test_logging_e2e]
Serializes and saves the current registry data to the TOML file.
Converts internal datetime objects to ISO format strings for compatibility.
[C: tests/test_logging_e2e.py:test_logging_e2e]
"""
try:
# Convert datetime objects to ISO format strings for TOML serialization
@@ -130,7 +122,7 @@ class LogRegistry:
if mk == 'timestamp' and isinstance(mv, datetime):
metadata_copy[mk] = mv.isoformat()
else:
metadata_copy[mk] = mv
metadata_copy[mk] = mv
session_data_copy[k] = metadata_copy
else:
session_data_copy[k] = v
@@ -142,15 +134,13 @@ class LogRegistry:
def register_session(self, session_id: str, path: str, start_time: datetime | str) -> None:
"""
Registers a new session in the registry.
Args:
session_id (str): Unique identifier for the session.
path (str): File path to the session's log directory.
start_time (datetime|str): The timestamp when the session started.
[C: src/session_logger.py:open_session, tests/test_auto_whitelist.py:test_auto_whitelist_keywords, tests/test_auto_whitelist.py:test_auto_whitelist_large_size, tests/test_auto_whitelist.py:test_auto_whitelist_message_count, tests/test_auto_whitelist.py:test_no_auto_whitelist_insignificant, tests/test_log_pruner.py:test_prune_old_insignificant_logs, tests/test_log_pruning_heuristic.py:TestLogPruningHeuristic.test_get_old_non_whitelisted_sessions_includes_empty_sessions, tests/test_log_pruning_heuristic.py:TestLogPruningHeuristic.test_get_old_non_whitelisted_sessions_includes_sessions_without_metadata, tests/test_log_pruning_heuristic.py:TestLogPruningHeuristic.test_prune_handles_relative_paths_starting_with_logs, tests/test_log_pruning_heuristic.py:TestLogPruningHeuristic.test_prune_removes_empty_sessions_regardless_of_age, tests/test_log_pruning_heuristic.py:TestLogPruningHeuristic.test_prune_removes_sessions_without_metadata_regardless_of_age, tests/test_log_registry.py:TestLogRegistry.test_get_old_non_whitelisted_sessions, tests/test_log_registry.py:TestLogRegistry.test_is_session_whitelisted, tests/test_log_registry.py:TestLogRegistry.test_register_session, tests/test_log_registry.py:TestLogRegistry.test_update_session_metadata, tests/test_logging_e2e.py:test_logging_e2e]
Registers a new session in the registry.
Args:
session_id (str): Unique identifier for the session.
path (str): File path to the session's log directory.
start_time (datetime|str): The timestamp when the session started.
[C: src/session_logger.py:open_session, tests/test_auto_whitelist.py:test_auto_whitelist_keywords, tests/test_auto_whitelist.py:test_auto_whitelist_large_size, tests/test_auto_whitelist.py:test_auto_whitelist_message_count, tests/test_auto_whitelist.py:test_no_auto_whitelist_insignificant, tests/test_log_pruner.py:test_prune_old_insignificant_logs, tests/test_log_pruning_heuristic.py:TestLogPruningHeuristic.test_get_old_non_whitelisted_sessions_includes_empty_sessions, tests/test_log_pruning_heuristic.py:TestLogPruningHeuristic.test_get_old_non_whitelisted_sessions_includes_sessions_without_metadata, tests/test_log_pruning_heuristic.py:TestLogPruningHeuristic.test_prune_handles_relative_paths_starting_with_logs, tests/test_log_pruning_heuristic.py:TestLogPruningHeuristic.test_prune_removes_empty_sessions_regardless_of_age, tests/test_log_pruning_heuristic.py:TestLogPruningHeuristic.test_prune_removes_sessions_without_metadata_regardless_of_age, tests/test_log_registry.py:TestLogRegistry.test_get_old_non_whitelisted_sessions, tests/test_log_registry.py:TestLogRegistry.test_is_session_whitelisted, tests/test_log_registry.py:TestLogRegistry.test_register_session, tests/test_log_registry.py:TestLogRegistry.test_update_session_metadata, tests/test_logging_e2e.py:test_logging_e2e]
"""
if session_id in self.data:
print(f"Warning: Session ID '{session_id}' already exists. Overwriting.")
@@ -160,27 +150,25 @@ class LogRegistry:
else:
start_time_str = start_time
self.data[session_id] = {
'path': path,
'start_time': start_time_str,
'path': path,
'start_time': start_time_str,
'whitelisted': False,
'metadata': None
'metadata': None
}
self.save_registry()
def update_session_metadata(self, session_id: str, message_count: int, errors: int, size_kb: int, whitelisted: bool, reason: str) -> None:
"""
Updates metadata fields for an existing session.
Args:
session_id (str): Unique identifier for the session.
message_count (int): Total number of messages in the session.
errors (int): Number of errors identified in logs.
size_kb (int): Total size of the session logs in kilobytes.
whitelisted (bool): Whether the session should be protected from pruning.
reason (str): Explanation for the current whitelisting status.
[C: tests/test_auto_whitelist.py:test_auto_whitelist_large_size, tests/test_auto_whitelist.py:test_auto_whitelist_message_count, tests/test_log_pruning_heuristic.py:TestLogPruningHeuristic.test_get_old_non_whitelisted_sessions_includes_empty_sessions, tests/test_log_pruning_heuristic.py:TestLogPruningHeuristic.test_prune_removes_empty_sessions_regardless_of_age, tests/test_log_registry.py:TestLogRegistry.test_get_old_non_whitelisted_sessions, tests/test_log_registry.py:TestLogRegistry.test_is_session_whitelisted, tests/test_log_registry.py:TestLogRegistry.test_update_session_metadata]
Updates metadata fields for an existing session.
Args:
session_id (str): Unique identifier for the session.
message_count (int): Total number of messages in the session.
errors (int): Number of errors identified in logs.
size_kb (int): Total size of the session logs in kilobytes.
whitelisted (bool): Whether the session should be protected from pruning.
reason (str): Explanation for the current whitelisting status.
[C: tests/test_auto_whitelist.py:test_auto_whitelist_large_size, tests/test_auto_whitelist.py:test_auto_whitelist_message_count, tests/test_log_pruning_heuristic.py:TestLogPruningHeuristic.test_get_old_non_whitelisted_sessions_includes_empty_sessions, tests/test_log_pruning_heuristic.py:TestLogPruningHeuristic.test_prune_removes_empty_sessions_regardless_of_age, tests/test_log_registry.py:TestLogRegistry.test_get_old_non_whitelisted_sessions, tests/test_log_registry.py:TestLogRegistry.test_is_session_whitelisted, tests/test_log_registry.py:TestLogRegistry.test_update_session_metadata]
"""
if session_id not in self.data:
print(f"Error: Session ID '{session_id}' not found for metadata update.")
@@ -192,10 +180,10 @@ class LogRegistry:
metadata = self.data[session_id].get('metadata')
if isinstance(metadata, dict):
metadata['message_count'] = message_count
metadata['errors'] = errors
metadata['size_kb'] = size_kb
metadata['whitelisted'] = whitelisted
metadata['reason'] = reason
metadata['errors'] = errors
metadata['size_kb'] = size_kb
metadata['whitelisted'] = whitelisted
metadata['reason'] = reason
# self.data[session_id]['metadata']['timestamp'] = datetime.utcnow() # Optionally add a timestamp
# Also update the top-level whitelisted flag if provided
if whitelisted is not None:
@@ -204,16 +192,14 @@ class LogRegistry:
def is_session_whitelisted(self, session_id: str) -> bool:
"""
Checks if a specific session is marked as whitelisted.
Args:
session_id (str): Unique identifier for the session.
Returns:
bool: True if whitelisted, False otherwise.
[C: tests/test_auto_whitelist.py:test_auto_whitelist_keywords, tests/test_auto_whitelist.py:test_auto_whitelist_large_size, tests/test_auto_whitelist.py:test_auto_whitelist_message_count, tests/test_auto_whitelist.py:test_no_auto_whitelist_insignificant, tests/test_log_registry.py:TestLogRegistry.test_is_session_whitelisted, tests/test_logging_e2e.py:test_logging_e2e]
Checks if a specific session is marked as whitelisted.
Args:
session_id (str): Unique identifier for the session.
Returns:
bool: True if whitelisted, False otherwise.
[C: tests/test_auto_whitelist.py:test_auto_whitelist_keywords, tests/test_auto_whitelist.py:test_auto_whitelist_large_size, tests/test_auto_whitelist.py:test_auto_whitelist_message_count, tests/test_auto_whitelist.py:test_no_auto_whitelist_insignificant, tests/test_log_registry.py:TestLogRegistry.test_is_session_whitelisted, tests/test_logging_e2e.py:test_logging_e2e]
"""
session_data = self.data.get(session_id)
if session_data is None:
@@ -223,15 +209,13 @@ class LogRegistry:
def update_auto_whitelist_status(self, session_id: str) -> None:
"""
Analyzes session logs and updates whitelisting status based on heuristics.
Sessions are automatically whitelisted if they contain error keywords,
have a high message count, or exceed a size threshold.
Args:
session_id (str): Unique identifier for the session to analyze.
[C: src/session_logger.py:close_session]
Analyzes session logs and updates whitelisting status based on heuristics.
Sessions are automatically whitelisted if they contain error keywords,
have a high message count, or exceed a size threshold.
Args:
session_id (str): Unique identifier for the session to analyze.
[C: src/session_logger.py:close_session]
"""
if session_id not in self.data:
return
@@ -239,9 +223,9 @@ class LogRegistry:
session_path = session_data.get('path')
if not session_path or not os.path.isdir(str(session_path)):
return
total_size_bytes = 0
message_count = 0
found_keywords = []
total_size_bytes = 0
message_count = 0
found_keywords = []
keywords_to_check = ['ERROR', 'WARNING', 'EXCEPTION']
try:
for entry in os.scandir(str(session_path)):
@@ -261,41 +245,39 @@ class LogRegistry:
pass
except Exception:
pass
size_kb = total_size_bytes / 1024
size_kb = total_size_bytes / 1024
whitelisted = False
reason = ""
reason = ""
if found_keywords:
whitelisted = True
reason = f"Found keywords: {', '.join(found_keywords)}"
reason = f"Found keywords: {', '.join(found_keywords)}"
elif message_count > 10:
whitelisted = True
reason = f"High message count: {message_count}"
reason = f"High message count: {message_count}"
elif size_kb > 50:
whitelisted = True
reason = f"Large session size: {size_kb:.1f} KB"
reason = f"Large session size: {size_kb:.1f} KB"
self.update_session_metadata(
session_id,
message_count=message_count,
errors=len(found_keywords),
size_kb=int(size_kb),
whitelisted=whitelisted,
reason=reason
message_count = message_count,
errors = len(found_keywords),
size_kb = int(size_kb),
whitelisted = whitelisted,
reason = reason
)
def get_old_non_whitelisted_sessions(self, cutoff_datetime: datetime) -> list[dict[str, Any]]:
"""
Retrieves a list of sessions that are older than a specific cutoff time
and are not marked as whitelisted.
Also includes non-whitelisted sessions that are empty (message_count=0 or size_kb=0).
Args:
cutoff_datetime (datetime): The threshold time for identifying old sessions.
Returns:
list: A list of dictionaries containing session details (id, path, start_time).
[C: tests/test_log_pruner.py:test_prune_old_insignificant_logs, tests/test_log_pruning_heuristic.py:TestLogPruningHeuristic.test_get_old_non_whitelisted_sessions_includes_empty_sessions, tests/test_log_pruning_heuristic.py:TestLogPruningHeuristic.test_get_old_non_whitelisted_sessions_includes_sessions_without_metadata, tests/test_log_registry.py:TestLogRegistry.test_get_old_non_whitelisted_sessions]
Retrieves a list of sessions that are older than a specific cutoff time
and are not marked as whitelisted.
Also includes non-whitelisted sessions that are empty (message_count=0 or size_kb=0).
Args:
cutoff_datetime (datetime): The threshold time for identifying old sessions.
Returns:
list: A list of dictionaries containing session details (id, path, start_time).
[C: tests/test_log_pruner.py:test_prune_old_insignificant_logs, tests/test_log_pruning_heuristic.py:TestLogPruningHeuristic.test_get_old_non_whitelisted_sessions_includes_empty_sessions, tests/test_log_pruning_heuristic.py:TestLogPruningHeuristic.test_get_old_non_whitelisted_sessions_includes_sessions_without_metadata, tests/test_log_registry.py:TestLogRegistry.test_get_old_non_whitelisted_sessions]
"""
old_sessions = []
for session_id, session_data in self.data.items():
@@ -316,14 +298,14 @@ class LogRegistry:
is_empty = True
else:
message_count = metadata.get('message_count', -1)
size_kb = metadata.get('size_kb', -1)
is_empty = (message_count == 0 or size_kb == 0)
size_kb = metadata.get('size_kb', -1)
is_empty = (message_count == 0 or size_kb == 0)
if not is_whitelisted:
if is_empty or (start_time is not None and start_time < cutoff_datetime):
old_sessions.append({
'session_id': session_id,
'path': session_data.get('path'),
'path': session_data.get('path'),
'start_time': start_time_raw
})
return old_sessions