feat(logging): Implement auto-whitelisting heuristics for log sessions

This commit is contained in:
2026-02-26 09:05:15 -05:00
parent ff98a63450
commit 4e9c47f081
4 changed files with 143 additions and 7 deletions

View File

@@ -40,7 +40,7 @@ This file tracks all major tracks for the project. Each track has its own detail
---
- [ ] **Track: Review logging used throughout the project. THe log directory has several categories of logs and they are getting quite large in number. We need sub-directoreis and we need a way to prune logs that aren't valuable to keep.**
- [~] **Track: Review logging used throughout the project. THe log directory has several categories of logs and they are getting quite large in number. We need sub-directoreis and we need a way to prune logs that aren't valuable to keep.**
*Link: [./tracks/logging_refactor_20260226/](./tracks/logging_refactor_20260226/)*
---

View File

@@ -1,20 +1,20 @@
# Implementation Plan: Logging Reorganization and Automated Pruning
## Phase 1: Session Organization & Registry Foundation
- [ ] Task: Initialize MMA Environment (Protocol: `activate_skill mma-orchestrator`)
- [ ] Task: Implement `LogRegistry` to manage `log_registry.toml`
- [x] Task: Initialize MMA Environment (Protocol: `activate_skill mma-orchestrator`) [9a66b76]
- [x] Task: Implement `LogRegistry` to manage `log_registry.toml` [10fbfd0]
- [ ] Define TOML schema for session metadata.
- [ ] Create methods to register sessions and update whitelist status.
- [ ] Task: Implement Session-Based Directory Creation
- [x] Task: Implement Session-Based Directory Creation [3f4dc1a]
- [ ] Create utility to generate Session IDs: `YYYYMMDD_HHMMSS[_Label]`.
- [ ] Update logging initialization to create and use session sub-directories.
- [ ] Task: Conductor - User Manual Verification 'Phase 1: Foundation' (Protocol in workflow.md)
- [x] Task: Conductor - User Manual Verification 'Phase 1: Foundation' (Protocol in workflow.md) [3f4dc1a]
## Phase 2: Pruning Logic & Heuristics
- [ ] Task: Implement `LogPruner` Core Logic
- [x] Task: Implement `LogPruner` Core Logic [bd2a79c]
- [ ] Implement time-based filtering (older than 24h).
- [ ] Implement size-based heuristic for "insignificance" (~2 KB).
- [ ] Task: Implement Auto-Whitelisting Heuristics
- [~] Task: Implement Auto-Whitelisting Heuristics
- [ ] Implement content scanning for `ERROR`, `WARNING`, `EXCEPTION`.
- [ ] Implement complexity detection (message count > 10).
- [ ] Task: Integrate Pruning into App Startup

View File

@@ -117,6 +117,66 @@ class LogRegistry:
# Check the top-level 'whitelisted' flag. If it's not set or False, it's not whitelisted.
return session_data.get('whitelisted', False)
def update_auto_whitelist_status(self, session_id: str):
"""
Analyzes session logs and updates whitelisting status based on heuristics.
"""
if session_id not in self.data:
return
session_data = self.data[session_id]
session_path = session_data.get('path')
if not session_path or not os.path.isdir(session_path):
return
total_size_bytes = 0
message_count = 0
found_keywords = []
keywords_to_check = ['ERROR', 'WARNING', 'EXCEPTION']
try:
for entry in os.scandir(session_path):
if entry.is_file():
size = entry.stat().st_size
total_size_bytes += size
# Analyze comms.log for messages and keywords
if entry.name == "comms.log":
try:
with open(entry.path, 'r', encoding='utf-8', errors='ignore') as f:
for line in f:
message_count += 1
for kw in keywords_to_check:
if kw in line and kw not in found_keywords:
found_keywords.append(kw)
except Exception:
pass
except Exception:
pass
size_kb = total_size_bytes / 1024
whitelisted = False
reason = ""
if found_keywords:
whitelisted = True
reason = f"Found keywords: {', '.join(found_keywords)}"
elif message_count > 10:
whitelisted = True
reason = f"High message count: {message_count}"
elif size_kb > 50:
whitelisted = True
reason = f"Large session size: {size_kb:.1f} KB"
self.update_session_metadata(
session_id,
message_count=message_count,
errors=len(found_keywords),
size_kb=int(size_kb),
whitelisted=whitelisted,
reason=reason
)
def get_old_non_whitelisted_sessions(self, cutoff_datetime):
"""Gets sessions older than cutoff_datetime and not whitelisted."""
old_sessions = []

View File

@@ -0,0 +1,76 @@
import os
import pytest
from datetime import datetime
from log_registry import LogRegistry
@pytest.fixture
def registry_setup(tmp_path):
registry_path = tmp_path / "log_registry.toml"
logs_dir = tmp_path / "logs"
logs_dir.mkdir()
registry = LogRegistry(str(registry_path))
return registry, logs_dir
def test_auto_whitelist_keywords(registry_setup):
registry, logs_dir = registry_setup
session_id = "test_kw"
session_dir = logs_dir / session_id
session_dir.mkdir()
# Create comms.log with ERROR
comms_log = session_dir / "comms.log"
comms_log.write_text("Some message\nAN ERROR OCCURRED\nMore text")
registry.register_session(session_id, str(session_dir), datetime.now())
registry.update_auto_whitelist_status(session_id)
assert registry.is_session_whitelisted(session_id)
assert "ERROR" in registry.data[session_id]["metadata"]["reason"]
def test_auto_whitelist_message_count(registry_setup):
registry, logs_dir = registry_setup
session_id = "test_msg_count"
session_dir = logs_dir / session_id
session_dir.mkdir()
# Create comms.log with > 10 lines
comms_log = session_dir / "comms.log"
comms_log.write_text("\n".join(["msg"] * 15))
registry.register_session(session_id, str(session_dir), datetime.now())
registry.update_auto_whitelist_status(session_id)
assert registry.is_session_whitelisted(session_id)
assert registry.data[session_id]["metadata"]["message_count"] == 15
def test_auto_whitelist_large_size(registry_setup):
registry, logs_dir = registry_setup
session_id = "test_large"
session_dir = logs_dir / session_id
session_dir.mkdir()
# Create large file (> 50KB)
large_file = session_dir / "large.log"
large_file.write_text("x" * 60000)
registry.register_session(session_id, str(session_dir), datetime.now())
registry.update_auto_whitelist_status(session_id)
assert registry.is_session_whitelisted(session_id)
assert "Large session size" in registry.data[session_id]["metadata"]["reason"]
def test_no_auto_whitelist_insignificant(registry_setup):
registry, logs_dir = registry_setup
session_id = "test_insignificant"
session_dir = logs_dir / session_id
session_dir.mkdir()
# Small file, few lines, no keywords
comms_log = session_dir / "comms.log"
comms_log.write_text("hello\nworld")
registry.register_session(session_id, str(session_dir), datetime.now())
registry.update_auto_whitelist_status(session_id)
assert not registry.is_session_whitelisted(session_id)
assert registry.data[session_id]["metadata"]["message_count"] == 2