feat(logging): Implement auto-whitelisting heuristics for log sessions
This commit is contained in:
@@ -40,7 +40,7 @@ This file tracks all major tracks for the project. Each track has its own detail
|
||||
|
||||
---
|
||||
|
||||
- [ ] **Track: Review logging used throughout the project. THe log directory has several categories of logs and they are getting quite large in number. We need sub-directoreis and we need a way to prune logs that aren't valuable to keep.**
|
||||
- [~] **Track: Review logging used throughout the project. THe log directory has several categories of logs and they are getting quite large in number. We need sub-directoreis and we need a way to prune logs that aren't valuable to keep.**
|
||||
*Link: [./tracks/logging_refactor_20260226/](./tracks/logging_refactor_20260226/)*
|
||||
|
||||
---
|
||||
|
||||
@@ -1,20 +1,20 @@
|
||||
# Implementation Plan: Logging Reorganization and Automated Pruning
|
||||
|
||||
## Phase 1: Session Organization & Registry Foundation
|
||||
- [ ] Task: Initialize MMA Environment (Protocol: `activate_skill mma-orchestrator`)
|
||||
- [ ] Task: Implement `LogRegistry` to manage `log_registry.toml`
|
||||
- [x] Task: Initialize MMA Environment (Protocol: `activate_skill mma-orchestrator`) [9a66b76]
|
||||
- [x] Task: Implement `LogRegistry` to manage `log_registry.toml` [10fbfd0]
|
||||
- [ ] Define TOML schema for session metadata.
|
||||
- [ ] Create methods to register sessions and update whitelist status.
|
||||
- [ ] Task: Implement Session-Based Directory Creation
|
||||
- [x] Task: Implement Session-Based Directory Creation [3f4dc1a]
|
||||
- [ ] Create utility to generate Session IDs: `YYYYMMDD_HHMMSS[_Label]`.
|
||||
- [ ] Update logging initialization to create and use session sub-directories.
|
||||
- [ ] Task: Conductor - User Manual Verification 'Phase 1: Foundation' (Protocol in workflow.md)
|
||||
- [x] Task: Conductor - User Manual Verification 'Phase 1: Foundation' (Protocol in workflow.md) [3f4dc1a]
|
||||
|
||||
## Phase 2: Pruning Logic & Heuristics
|
||||
- [ ] Task: Implement `LogPruner` Core Logic
|
||||
- [x] Task: Implement `LogPruner` Core Logic [bd2a79c]
|
||||
- [ ] Implement time-based filtering (older than 24h).
|
||||
- [ ] Implement size-based heuristic for "insignificance" (~2 KB).
|
||||
- [ ] Task: Implement Auto-Whitelisting Heuristics
|
||||
- [~] Task: Implement Auto-Whitelisting Heuristics
|
||||
- [ ] Implement content scanning for `ERROR`, `WARNING`, `EXCEPTION`.
|
||||
- [ ] Implement complexity detection (message count > 10).
|
||||
- [ ] Task: Integrate Pruning into App Startup
|
||||
|
||||
@@ -117,6 +117,66 @@ class LogRegistry:
|
||||
# Check the top-level 'whitelisted' flag. If it's not set or False, it's not whitelisted.
|
||||
return session_data.get('whitelisted', False)
|
||||
|
||||
def update_auto_whitelist_status(self, session_id: str):
|
||||
"""
|
||||
Analyzes session logs and updates whitelisting status based on heuristics.
|
||||
"""
|
||||
if session_id not in self.data:
|
||||
return
|
||||
|
||||
session_data = self.data[session_id]
|
||||
session_path = session_data.get('path')
|
||||
if not session_path or not os.path.isdir(session_path):
|
||||
return
|
||||
|
||||
total_size_bytes = 0
|
||||
message_count = 0
|
||||
found_keywords = []
|
||||
keywords_to_check = ['ERROR', 'WARNING', 'EXCEPTION']
|
||||
|
||||
try:
|
||||
for entry in os.scandir(session_path):
|
||||
if entry.is_file():
|
||||
size = entry.stat().st_size
|
||||
total_size_bytes += size
|
||||
|
||||
# Analyze comms.log for messages and keywords
|
||||
if entry.name == "comms.log":
|
||||
try:
|
||||
with open(entry.path, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
for line in f:
|
||||
message_count += 1
|
||||
for kw in keywords_to_check:
|
||||
if kw in line and kw not in found_keywords:
|
||||
found_keywords.append(kw)
|
||||
except Exception:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
size_kb = total_size_bytes / 1024
|
||||
whitelisted = False
|
||||
reason = ""
|
||||
|
||||
if found_keywords:
|
||||
whitelisted = True
|
||||
reason = f"Found keywords: {', '.join(found_keywords)}"
|
||||
elif message_count > 10:
|
||||
whitelisted = True
|
||||
reason = f"High message count: {message_count}"
|
||||
elif size_kb > 50:
|
||||
whitelisted = True
|
||||
reason = f"Large session size: {size_kb:.1f} KB"
|
||||
|
||||
self.update_session_metadata(
|
||||
session_id,
|
||||
message_count=message_count,
|
||||
errors=len(found_keywords),
|
||||
size_kb=int(size_kb),
|
||||
whitelisted=whitelisted,
|
||||
reason=reason
|
||||
)
|
||||
|
||||
def get_old_non_whitelisted_sessions(self, cutoff_datetime):
|
||||
"""Gets sessions older than cutoff_datetime and not whitelisted."""
|
||||
old_sessions = []
|
||||
|
||||
76
tests/test_auto_whitelist.py
Normal file
76
tests/test_auto_whitelist.py
Normal file
@@ -0,0 +1,76 @@
|
||||
import os
|
||||
import pytest
|
||||
from datetime import datetime
|
||||
from log_registry import LogRegistry
|
||||
|
||||
@pytest.fixture
|
||||
def registry_setup(tmp_path):
|
||||
registry_path = tmp_path / "log_registry.toml"
|
||||
logs_dir = tmp_path / "logs"
|
||||
logs_dir.mkdir()
|
||||
registry = LogRegistry(str(registry_path))
|
||||
return registry, logs_dir
|
||||
|
||||
def test_auto_whitelist_keywords(registry_setup):
|
||||
registry, logs_dir = registry_setup
|
||||
session_id = "test_kw"
|
||||
session_dir = logs_dir / session_id
|
||||
session_dir.mkdir()
|
||||
|
||||
# Create comms.log with ERROR
|
||||
comms_log = session_dir / "comms.log"
|
||||
comms_log.write_text("Some message\nAN ERROR OCCURRED\nMore text")
|
||||
|
||||
registry.register_session(session_id, str(session_dir), datetime.now())
|
||||
registry.update_auto_whitelist_status(session_id)
|
||||
|
||||
assert registry.is_session_whitelisted(session_id)
|
||||
assert "ERROR" in registry.data[session_id]["metadata"]["reason"]
|
||||
|
||||
def test_auto_whitelist_message_count(registry_setup):
|
||||
registry, logs_dir = registry_setup
|
||||
session_id = "test_msg_count"
|
||||
session_dir = logs_dir / session_id
|
||||
session_dir.mkdir()
|
||||
|
||||
# Create comms.log with > 10 lines
|
||||
comms_log = session_dir / "comms.log"
|
||||
comms_log.write_text("\n".join(["msg"] * 15))
|
||||
|
||||
registry.register_session(session_id, str(session_dir), datetime.now())
|
||||
registry.update_auto_whitelist_status(session_id)
|
||||
|
||||
assert registry.is_session_whitelisted(session_id)
|
||||
assert registry.data[session_id]["metadata"]["message_count"] == 15
|
||||
|
||||
def test_auto_whitelist_large_size(registry_setup):
|
||||
registry, logs_dir = registry_setup
|
||||
session_id = "test_large"
|
||||
session_dir = logs_dir / session_id
|
||||
session_dir.mkdir()
|
||||
|
||||
# Create large file (> 50KB)
|
||||
large_file = session_dir / "large.log"
|
||||
large_file.write_text("x" * 60000)
|
||||
|
||||
registry.register_session(session_id, str(session_dir), datetime.now())
|
||||
registry.update_auto_whitelist_status(session_id)
|
||||
|
||||
assert registry.is_session_whitelisted(session_id)
|
||||
assert "Large session size" in registry.data[session_id]["metadata"]["reason"]
|
||||
|
||||
def test_no_auto_whitelist_insignificant(registry_setup):
|
||||
registry, logs_dir = registry_setup
|
||||
session_id = "test_insignificant"
|
||||
session_dir = logs_dir / session_id
|
||||
session_dir.mkdir()
|
||||
|
||||
# Small file, few lines, no keywords
|
||||
comms_log = session_dir / "comms.log"
|
||||
comms_log.write_text("hello\nworld")
|
||||
|
||||
registry.register_session(session_id, str(session_dir), datetime.now())
|
||||
registry.update_auto_whitelist_status(session_id)
|
||||
|
||||
assert not registry.is_session_whitelisted(session_id)
|
||||
assert registry.data[session_id]["metadata"]["message_count"] == 2
|
||||
Reference in New Issue
Block a user