From 3c9e03dd3ced0bcdd127a66e472e796ec863dcfd Mon Sep 17 00:00:00 2001 From: Ed_ Date: Sat, 7 Mar 2026 12:35:37 -0500 Subject: [PATCH] fix(logs): Make empty log pruning more robust by including sessions with missing metadata --- src/log_registry.py | 17 +++++++++------ tests/test_log_pruning_heuristic.py | 33 +++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 7 deletions(-) diff --git a/src/log_registry.py b/src/log_registry.py index 24382ce..c458194 100644 --- a/src/log_registry.py +++ b/src/log_registry.py @@ -243,14 +243,17 @@ class LogRegistry: start_time = start_time_raw is_whitelisted = session_data.get('whitelisted', False) - # Heuristic: also include non-whitelisted sessions that have 0 messages or 0 KB size - metadata = session_data.get('metadata', {}) or {} - message_count = metadata.get('message_count', -1) - size_kb = metadata.get('size_kb', -1) - is_empty = (message_count == 0 or size_kb == 0) + # Heuristic: also include non-whitelisted sessions that have 0 messages or 0 KB size, or missing metadata + metadata = session_data.get('metadata') + if metadata is None: + is_empty = True + else: + message_count = metadata.get('message_count', -1) + size_kb = metadata.get('size_kb', -1) + is_empty = (message_count == 0 or size_kb == 0) - if start_time is not None and not is_whitelisted: - if start_time < cutoff_datetime or is_empty: + if not is_whitelisted: + if is_empty or (start_time is not None and start_time < cutoff_datetime): old_sessions.append({ 'session_id': session_id, 'path': session_data.get('path'), diff --git a/tests/test_log_pruning_heuristic.py b/tests/test_log_pruning_heuristic.py index 1f1218b..624aa95 100644 --- a/tests/test_log_pruning_heuristic.py +++ b/tests/test_log_pruning_heuristic.py @@ -64,6 +64,19 @@ class TestLogPruningHeuristic(unittest.TestCase): self.assertNotIn("recent_not_empty", session_ids) self.assertNotIn("old_w", session_ids) + def test_get_old_non_whitelisted_sessions_includes_sessions_without_metadata(self) -> None: + now = datetime.now() + cutoff_time = now - timedelta(days=7) + + # Recent, not whitelisted, NO metadata (should be included) + # _create_session without message_count/size_kb will leave metadata=None + self._create_session("recent_no_metadata", now - timedelta(days=1)) + + sessions = self.registry.get_old_non_whitelisted_sessions(cutoff_time) + session_ids = {s['session_id'] for s in sessions} + + self.assertIn("recent_no_metadata", session_ids) + def test_prune_removes_empty_sessions_regardless_of_age(self) -> None: now = datetime.now() @@ -86,5 +99,25 @@ class TestLogPruningHeuristic(unittest.TestCase): self.assertFalse(os.path.exists(session_path)) self.assertNotIn(session_id, self.registry.data) + def test_prune_removes_sessions_without_metadata_regardless_of_age(self) -> None: + now = datetime.now() + session_id = "recent_no_metadata_to_prune" + session_path = os.path.join(self.logs_dir, session_id) + os.makedirs(session_path, exist_ok=True) + # Actual file size 0 + with open(os.path.join(session_path, "comms.log"), "w") as f: + pass + + self.registry.register_session(session_id, session_path, now - timedelta(hours=1)) + # NO metadata update + + self.assertTrue(os.path.exists(session_path)) + + # Prune with max_age_days=30 + self.pruner.prune(max_age_days=30, min_size_kb=1) + + self.assertFalse(os.path.exists(session_path)) + self.assertNotIn(session_id, self.registry.data) + if __name__ == '__main__': unittest.main()