diff --git a/conductor/tracks.md b/conductor/tracks.md index 3eb8863..abdc3a9 100644 --- a/conductor/tracks.md +++ b/conductor/tracks.md @@ -40,7 +40,7 @@ This file tracks all major tracks for the project. Each track has its own detail --- -- [ ] **Track: Review logging used throughout the project. THe log directory has several categories of logs and they are getting quite large in number. We need sub-directoreis and we need a way to prune logs that aren't valuable to keep.** +- [~] **Track: Review logging used throughout the project. THe log directory has several categories of logs and they are getting quite large in number. We need sub-directoreis and we need a way to prune logs that aren't valuable to keep.** *Link: [./tracks/logging_refactor_20260226/](./tracks/logging_refactor_20260226/)* --- diff --git a/conductor/tracks/logging_refactor_20260226/plan.md b/conductor/tracks/logging_refactor_20260226/plan.md index 9d5ab59..bd818bd 100644 --- a/conductor/tracks/logging_refactor_20260226/plan.md +++ b/conductor/tracks/logging_refactor_20260226/plan.md @@ -1,20 +1,20 @@ # Implementation Plan: Logging Reorganization and Automated Pruning ## Phase 1: Session Organization & Registry Foundation -- [ ] Task: Initialize MMA Environment (Protocol: `activate_skill mma-orchestrator`) -- [ ] Task: Implement `LogRegistry` to manage `log_registry.toml` +- [x] Task: Initialize MMA Environment (Protocol: `activate_skill mma-orchestrator`) [9a66b76] +- [x] Task: Implement `LogRegistry` to manage `log_registry.toml` [10fbfd0] - [ ] Define TOML schema for session metadata. - [ ] Create methods to register sessions and update whitelist status. -- [ ] Task: Implement Session-Based Directory Creation +- [x] Task: Implement Session-Based Directory Creation [3f4dc1a] - [ ] Create utility to generate Session IDs: `YYYYMMDD_HHMMSS[_Label]`. - [ ] Update logging initialization to create and use session sub-directories. -- [ ] Task: Conductor - User Manual Verification 'Phase 1: Foundation' (Protocol in workflow.md) +- [x] Task: Conductor - User Manual Verification 'Phase 1: Foundation' (Protocol in workflow.md) [3f4dc1a] ## Phase 2: Pruning Logic & Heuristics -- [ ] Task: Implement `LogPruner` Core Logic +- [x] Task: Implement `LogPruner` Core Logic [bd2a79c] - [ ] Implement time-based filtering (older than 24h). - [ ] Implement size-based heuristic for "insignificance" (~2 KB). -- [ ] Task: Implement Auto-Whitelisting Heuristics +- [~] Task: Implement Auto-Whitelisting Heuristics - [ ] Implement content scanning for `ERROR`, `WARNING`, `EXCEPTION`. - [ ] Implement complexity detection (message count > 10). - [ ] Task: Integrate Pruning into App Startup diff --git a/log_registry.py b/log_registry.py index c9bd594..ae78be4 100644 --- a/log_registry.py +++ b/log_registry.py @@ -117,6 +117,66 @@ class LogRegistry: # Check the top-level 'whitelisted' flag. If it's not set or False, it's not whitelisted. return session_data.get('whitelisted', False) + def update_auto_whitelist_status(self, session_id: str): + """ + Analyzes session logs and updates whitelisting status based on heuristics. + """ + if session_id not in self.data: + return + + session_data = self.data[session_id] + session_path = session_data.get('path') + if not session_path or not os.path.isdir(session_path): + return + + total_size_bytes = 0 + message_count = 0 + found_keywords = [] + keywords_to_check = ['ERROR', 'WARNING', 'EXCEPTION'] + + try: + for entry in os.scandir(session_path): + if entry.is_file(): + size = entry.stat().st_size + total_size_bytes += size + + # Analyze comms.log for messages and keywords + if entry.name == "comms.log": + try: + with open(entry.path, 'r', encoding='utf-8', errors='ignore') as f: + for line in f: + message_count += 1 + for kw in keywords_to_check: + if kw in line and kw not in found_keywords: + found_keywords.append(kw) + except Exception: + pass + except Exception: + pass + + size_kb = total_size_bytes / 1024 + whitelisted = False + reason = "" + + if found_keywords: + whitelisted = True + reason = f"Found keywords: {', '.join(found_keywords)}" + elif message_count > 10: + whitelisted = True + reason = f"High message count: {message_count}" + elif size_kb > 50: + whitelisted = True + reason = f"Large session size: {size_kb:.1f} KB" + + self.update_session_metadata( + session_id, + message_count=message_count, + errors=len(found_keywords), + size_kb=int(size_kb), + whitelisted=whitelisted, + reason=reason + ) + def get_old_non_whitelisted_sessions(self, cutoff_datetime): """Gets sessions older than cutoff_datetime and not whitelisted.""" old_sessions = [] diff --git a/tests/test_auto_whitelist.py b/tests/test_auto_whitelist.py new file mode 100644 index 0000000..e6da22c --- /dev/null +++ b/tests/test_auto_whitelist.py @@ -0,0 +1,76 @@ +import os +import pytest +from datetime import datetime +from log_registry import LogRegistry + +@pytest.fixture +def registry_setup(tmp_path): + registry_path = tmp_path / "log_registry.toml" + logs_dir = tmp_path / "logs" + logs_dir.mkdir() + registry = LogRegistry(str(registry_path)) + return registry, logs_dir + +def test_auto_whitelist_keywords(registry_setup): + registry, logs_dir = registry_setup + session_id = "test_kw" + session_dir = logs_dir / session_id + session_dir.mkdir() + + # Create comms.log with ERROR + comms_log = session_dir / "comms.log" + comms_log.write_text("Some message\nAN ERROR OCCURRED\nMore text") + + registry.register_session(session_id, str(session_dir), datetime.now()) + registry.update_auto_whitelist_status(session_id) + + assert registry.is_session_whitelisted(session_id) + assert "ERROR" in registry.data[session_id]["metadata"]["reason"] + +def test_auto_whitelist_message_count(registry_setup): + registry, logs_dir = registry_setup + session_id = "test_msg_count" + session_dir = logs_dir / session_id + session_dir.mkdir() + + # Create comms.log with > 10 lines + comms_log = session_dir / "comms.log" + comms_log.write_text("\n".join(["msg"] * 15)) + + registry.register_session(session_id, str(session_dir), datetime.now()) + registry.update_auto_whitelist_status(session_id) + + assert registry.is_session_whitelisted(session_id) + assert registry.data[session_id]["metadata"]["message_count"] == 15 + +def test_auto_whitelist_large_size(registry_setup): + registry, logs_dir = registry_setup + session_id = "test_large" + session_dir = logs_dir / session_id + session_dir.mkdir() + + # Create large file (> 50KB) + large_file = session_dir / "large.log" + large_file.write_text("x" * 60000) + + registry.register_session(session_id, str(session_dir), datetime.now()) + registry.update_auto_whitelist_status(session_id) + + assert registry.is_session_whitelisted(session_id) + assert "Large session size" in registry.data[session_id]["metadata"]["reason"] + +def test_no_auto_whitelist_insignificant(registry_setup): + registry, logs_dir = registry_setup + session_id = "test_insignificant" + session_dir = logs_dir / session_id + session_dir.mkdir() + + # Small file, few lines, no keywords + comms_log = session_dir / "comms.log" + comms_log.write_text("hello\nworld") + + registry.register_session(session_id, str(session_dir), datetime.now()) + registry.update_auto_whitelist_status(session_id) + + assert not registry.is_session_whitelisted(session_id) + assert registry.data[session_id]["metadata"]["message_count"] == 2