feat(logging): Implement LogRegistry for managing session metadata

This commit is contained in:
2026-02-26 08:52:51 -05:00
parent 9a66b7697e
commit 10fbfd0f54
2 changed files with 322 additions and 0 deletions

142
log_registry.py Normal file
View File

@@ -0,0 +1,142 @@
import tomli_w
import tomllib
from datetime import datetime
import os
class LogRegistry:
def __init__(self, registry_path):
self.registry_path = registry_path
self.data = {}
self.load_registry()
def load_registry(self):
"""Loads the registry data from the TOML file."""
if os.path.exists(self.registry_path):
try:
with open(self.registry_path, 'rb') as f:
loaded_data = tomllib.load(f)
# Keep data as it is from TOML (strings or native datetimes)
# If we want to satisfy tests that expect strings, we ensure they are strings.
self.data = {}
for session_id, session_data in loaded_data.items():
new_session_data = session_data.copy()
# If tomllib parsed it as a datetime, convert it back to string for the tests
if 'start_time' in new_session_data and isinstance(new_session_data['start_time'], datetime):
new_session_data['start_time'] = new_session_data['start_time'].isoformat()
if 'metadata' in new_session_data and isinstance(new_session_data['metadata'], dict):
m = new_session_data['metadata']
if 'timestamp' in m and isinstance(m['timestamp'], datetime):
m['timestamp'] = m['timestamp'].isoformat()
self.data[session_id] = new_session_data
except Exception as e:
print(f"Error loading registry from {self.registry_path}: {e}")
self.data = {}
else:
self.data = {}
def save_registry(self):
"""Saves the current registry data to the TOML file."""
try:
# Convert datetime objects to ISO format strings for TOML serialization
data_to_save = {}
for session_id, session_data in self.data.items():
session_data_copy = {}
for k, v in session_data.items():
if v is None:
continue
if k == 'start_time' and isinstance(v, datetime):
session_data_copy[k] = v.isoformat()
elif k == 'metadata' and isinstance(v, dict):
metadata_copy = {}
for mk, mv in v.items():
if mv is None:
continue
if mk == 'timestamp' and isinstance(mv, datetime):
metadata_copy[mk] = mv.isoformat()
else:
metadata_copy[mk] = mv
session_data_copy[k] = metadata_copy
else:
session_data_copy[k] = v
data_to_save[session_id] = session_data_copy
with open(self.registry_path, 'wb') as f:
tomli_w.dump(data_to_save, f)
except Exception as e:
print(f"Error saving registry to {self.registry_path}: {e}")
def register_session(self, session_id, path, start_time):
"""Registers a new session."""
if session_id in self.data:
print(f"Warning: Session ID '{session_id}' already exists. Overwriting.")
# Store start_time internally as a string to satisfy tests
if isinstance(start_time, datetime):
start_time_str = start_time.isoformat()
else:
start_time_str = start_time
self.data[session_id] = {
'path': path,
'start_time': start_time_str,
'whitelisted': False,
'metadata': None
}
self.save_registry()
def update_session_metadata(self, session_id, message_count, errors, size_kb, whitelisted, reason):
"""Updates metadata for an existing session."""
if session_id not in self.data:
print(f"Error: Session ID '{session_id}' not found for metadata update.")
return
# Ensure metadata exists
if self.data[session_id].get('metadata') is None:
self.data[session_id]['metadata'] = {}
# Update fields
self.data[session_id]['metadata']['message_count'] = message_count
self.data[session_id]['metadata']['errors'] = errors
self.data[session_id]['metadata']['size_kb'] = size_kb
self.data[session_id]['metadata']['whitelisted'] = whitelisted
self.data[session_id]['metadata']['reason'] = reason
# self.data[session_id]['metadata']['timestamp'] = datetime.utcnow() # Optionally add a timestamp
# Also update the top-level whitelisted flag if provided
if whitelisted is not None:
self.data[session_id]['whitelisted'] = whitelisted
self.save_registry() # Save after update
def is_session_whitelisted(self, session_id):
"""Checks if a session is whitelisted."""
session_data = self.data.get(session_id)
if session_data is None:
return False # Non-existent sessions are not whitelisted
# Check the top-level 'whitelisted' flag. If it's not set or False, it's not whitelisted.
return session_data.get('whitelisted', False)
def get_old_non_whitelisted_sessions(self, cutoff_datetime):
"""Gets sessions older than cutoff_datetime and not whitelisted."""
old_sessions = []
for session_id, session_data in self.data.items():
# Check if session is older than cutoff and not whitelisted
start_time_raw = session_data.get('start_time')
if isinstance(start_time_raw, str):
try:
start_time = datetime.fromisoformat(start_time_raw)
except ValueError:
start_time = None
else:
start_time = start_time_raw
is_whitelisted = session_data.get('whitelisted', False)
if start_time is not None and start_time < cutoff_datetime and not is_whitelisted:
old_sessions.append({
'session_id': session_id,
'path': session_data.get('path'),
'start_time': start_time_raw
})
return old_sessions

180
tests/test_log_registry.py Normal file
View File

@@ -0,0 +1,180 @@
import unittest
import tempfile
import os
from datetime import datetime, timedelta
# This import is expected to fail with ImportError until log_registry.py is created.
from log_registry import LogRegistry
class TestLogRegistry(unittest.TestCase):
def setUp(self):
"""Set up a temporary directory and registry file for each test."""
self.temp_dir = tempfile.TemporaryDirectory()
self.registry_path = os.path.join(self.temp_dir.name, "registry.toml")
# Ensure the file is created and empty initially for a clean state.
# LogRegistry is assumed to load from this file on instantiation.
with open(self.registry_path, 'w') as f:
f.write("# Initial empty registry\n")
# Instantiate LogRegistry. This will load from the empty file.
self.registry = LogRegistry(self.registry_path)
def tearDown(self):
"""Clean up the temporary directory and its contents after each test."""
self.temp_dir.cleanup()
def test_instantiation(self):
"""Test LogRegistry instantiation with a file path."""
self.assertIsInstance(self.registry, LogRegistry)
self.assertEqual(self.registry.registry_path, self.registry_path)
# Check if the file exists. LogRegistry is assumed to create it if not.
self.assertTrue(os.path.exists(self.registry_path))
# We will verify content in other tests that explicitly save and reload.
def test_register_session(self):
"""Test registering a new session."""
session_id = "session-123"
path = "/path/to/session/123"
start_time = datetime.utcnow()
self.registry.register_session(session_id, path, start_time)
# Verify session was added to internal data (assuming LogRegistry has a public 'data' attribute for testing)
self.assertIn(session_id, self.registry.data)
session_data = self.registry.data[session_id]
self.assertEqual(session_data['path'], path)
# Convert stored ISO string back to datetime for comparison
stored_start_time = datetime.fromisoformat(session_data['start_time'])
self.assertAlmostEqual(stored_start_time, start_time, delta=timedelta(seconds=1)) # Allow for minor time differences
self.assertFalse(session_data.get('whitelisted', False)) # Default to not whitelisted
self.assertIsNone(session_data.get('metadata'))
# Verify data was written to the TOML file by reloading
reloaded_registry = LogRegistry(self.registry_path)
self.assertIn(session_id, reloaded_registry.data)
reloaded_session_data = reloaded_registry.data[session_id]
reloaded_start_time = datetime.fromisoformat(reloaded_session_data['start_time'])
self.assertAlmostEqual(reloaded_start_time, start_time, delta=timedelta(seconds=1))
def test_update_session_metadata(self):
"""Test updating session metadata."""
session_id = "session-456"
path = "/path/to/session/456"
start_time = datetime.utcnow()
self.registry.register_session(session_id, path, start_time)
message_count = 100
errors = 5
size_kb = 1024
whitelisted = True
reason = "Automated process"
self.registry.update_session_metadata(session_id, message_count, errors, size_kb, whitelisted, reason)
# Verify metadata was updated in internal data
self.assertIn(session_id, self.registry.data)
session_data = self.registry.data[session_id]
self.assertIsNotNone(session_data.get('metadata'))
metadata = session_data['metadata']
self.assertEqual(metadata['message_count'], message_count)
self.assertEqual(metadata['errors'], errors)
self.assertEqual(metadata['size_kb'], size_kb)
self.assertEqual(metadata['whitelisted'], whitelisted)
self.assertEqual(metadata['reason'], reason)
# Also check if the whitelisted flag in the main session data is updated
self.assertTrue(session_data.get('whitelisted', False))
# Verify data was written to the TOML file by reloading
reloaded_registry = LogRegistry(self.registry_path)
self.assertIn(session_id, reloaded_registry.data)
reloaded_session_data = reloaded_registry.data[session_id]
self.assertTrue(reloaded_session_data.get('metadata', {}).get('whitelisted', False))
self.assertTrue(reloaded_session_data.get('whitelisted', False)) # Check main flag too
def test_is_session_whitelisted(self):
"""Test checking if a session is whitelisted."""
session_id_whitelisted = "session-789-whitelisted"
path_w = "/path/to/session/789"
start_time_w = datetime.utcnow()
self.registry.register_session(session_id_whitelisted, path_w, start_time_w)
self.registry.update_session_metadata(session_id_whitelisted, 10, 0, 100, True, "Manual whitelist")
session_id_not_whitelisted = "session-abc-not-whitelisted"
path_nw = "/path/to/session/abc"
start_time_nw = datetime.utcnow()
self.registry.register_session(session_id_not_whitelisted, path_nw, start_time_nw)
# Test explicitly whitelisted session
self.assertTrue(self.registry.is_session_whitelisted(session_id_whitelisted))
# Test session registered but not updated, should default to not whitelisted
self.assertFalse(self.registry.is_session_whitelisted(session_id_not_whitelisted))
# Test for a non-existent session, should be treated as not whitelisted
self.assertFalse(self.registry.is_session_whitelisted("non-existent-session"))
def test_get_old_non_whitelisted_sessions(self):
"""Test retrieving old, non-whitelisted sessions."""
now = datetime.utcnow()
# Define a cutoff time that is 7 days ago
cutoff_time = now - timedelta(days=7)
# Session 1: Old and not whitelisted
session_id_old_nw = "session-old-nw"
path_old_nw = "/path/to/session/old_nw"
start_time_old_nw = now - timedelta(days=10) # Older than cutoff
self.registry.register_session(session_id_old_nw, path_old_nw, start_time_old_nw)
# Session 2: Recent and not whitelisted
session_id_recent_nw = "session-recent-nw"
path_recent_nw = "/path/to/session/recent_nw"
start_time_recent_nw = now - timedelta(days=3) # Newer than cutoff
self.registry.register_session(session_id_recent_nw, path_recent_nw, start_time_recent_nw)
# Session 3: Old and whitelisted
session_id_old_w = "session-old-w"
path_old_w = "/path/to/session/old_w"
start_time_old_w = now - timedelta(days=15) # Older than cutoff
self.registry.register_session(session_id_old_w, path_old_w, start_time_old_w)
self.registry.update_session_metadata(session_id_old_w, 50, 0, 500, True, "Whitelisted")
# Session 4: Old, not whitelisted explicitly, but with metadata that doesn't set 'whitelisted' to True.
# The 'is_session_whitelisted' logic should correctly interpret this as not whitelisted.
session_id_old_nw_incomplete = "session-old-nw-incomplete"
path_old_nw_incomplete = "/path/to/session/old_nw_incomplete"
start_time_old_nw_incomplete = now - timedelta(days=20) # Older than cutoff
self.registry.register_session(session_id_old_nw_incomplete, path_old_nw_incomplete, start_time_old_nw_incomplete)
# Update with some metadata, but set 'whitelisted' to False explicitly
self.registry.update_session_metadata(session_id_old_nw_incomplete, 10, 0, 100, False, "Manual review needed")
# Get sessions older than cutoff_time and not whitelisted
old_sessions = self.registry.get_old_non_whitelisted_sessions(cutoff_time)
# Collect session IDs from the result
found_session_ids = {s['session_id'] for s in old_sessions}
# Expected: session_id_old_nw (old, not whitelisted) and session_id_old_nw_incomplete (old, explicitly not whitelisted)
self.assertIn(session_id_old_nw, found_session_ids)
self.assertIn(session_id_old_nw_incomplete, found_session_ids)
# Not expected: session_id_recent_nw (too recent), session_id_old_w (whitelisted)
self.assertNotIn(session_id_recent_nw, found_session_ids)
self.assertNotIn(session_id_old_w, found_session_ids)
# Ensure only the expected sessions are in the result
self.assertEqual(len(found_session_ids), 2)
# Test with a cutoff that includes all sessions, and ensure only non-whitelisted are returned
future_cutoff = now + timedelta(days=1) # All sessions are older than this
all_old_sessions = self.registry.get_old_non_whitelisted_sessions(future_cutoff)
all_found_session_ids = {s['session_id'] for s in all_old_sessions}
# Expected: session_id_old_nw, session_id_old_nw_incomplete, AND session_id_recent_nw
# Not expected: session_id_old_w (whitelisted)
self.assertEqual(len(all_found_session_ids), 3)
self.assertIn(session_id_old_nw, all_found_session_ids)
self.assertIn(session_id_old_nw_incomplete, all_found_session_ids)
self.assertIn(session_id_recent_nw, all_found_session_ids)
self.assertNotIn(session_id_old_w, all_found_session_ids)