From 09eaf69a833a0c1d72c7ad14781d44b6ef705a93 Mon Sep 17 00:00:00 2001
From: Ed_ <edwardgz@gmail.com>
Date: Sun, 21 Jun 2026 23:06:54 -0400
Subject: [PATCH] fix(tests): resolve 3 pre-existing test failures surfaced by
 user's batched run

The phase2_4_5_call_site_completion_20260621 track's end-of-track report
documented 5 pre-existing tier-1-unit-core failures as 'not caused by
this track' and deferred them to a future track. The user explicitly
called this out as a process mistake - even pre-existing failures must
be fixed for the track to be 'done'.

Fixed 3 of 5 (the other 2 are sandbox-pollution audit_tier2_leaks tests
that require infrastructure changes):

1. test_logging_e2e::test_logging_e2e ('Session' object does not support
   item assignment): Phase 4 of the parent track migrated LogRegistry
   data from dict to frozen Session dataclass; test_logging_e2e.py was
   missed in the migration. Fix: add LogRegistry.set_session_start_time()
   method (mirrors update_session_metadata's pattern of replacing the
   frozen Session with a new one); update test to use the new method.

2. test_no_temp_writes::test_no_script_emits_to_temp (scripts/generate_type_registry.py
   uses tempfile): The --check mode was using tempfile.TemporaryDirectory
   which the audit forbids. Fix: refactor --check mode to use a path
   under tests/artifacts/_type_registry_check/ instead (cleaned up in
   a finally block).

3. test_gui2_parity::test_gui2_custom_callback_hook_works (custom
   callback not executed within 1.5s): The test used time.sleep(1.5) +
   assert, the documented race condition anti-pattern. Fix: replace
   with a 10s poll loop that waits for the file to exist AND have the
   correct content (per workflow's polling pattern guidance).

Verification: tier-1-unit-core now has only 3 remaining failures, all
are pre-existing test_audit_tier2_leaks sandbox-pollution tests
(deferred to infrastructure track per metadata.json).
---
 scripts/generate_type_registry.py | 21 +++++++++++++--------
 src/log_registry.py               | 30 ++++++++++++++++++++++++++++++
 tests/test_gui2_parity.py         | 14 ++++++++++----
 tests/test_logging_e2e.py         |  3 +--
 4 files changed, 54 insertions(+), 14 deletions(-)

diff --git a/scripts/generate_type_registry.py b/scripts/generate_type_registry.py
index 31a1b1ac..b910f3fd 100644
--- a/scripts/generate_type_registry.py
+++ b/scripts/generate_type_registry.py
@@ -241,23 +241,28 @@ def main() -> int:
   write_registry(src, out)
   print(f"Generated {len(list(out.rglob('*.md')))} .md files in {out}")
   return 0
- import tempfile
  import shutil
- with tempfile.TemporaryDirectory() as tmp:
-  tmp_out = Path(tmp) / "registry"
-  write_registry(src, tmp_out)
+ check_tmp = Path("tests/artifacts/_type_registry_check") / "registry"
+ if check_tmp.exists():
+  shutil.rmtree(check_tmp)
+ check_tmp.mkdir(parents=True, exist_ok=True)
+ try:
+  write_registry(src, check_tmp)
   drift = []
   for orig in out.rglob("*.md"):
-   new = tmp_out / orig.relative_to(out)
+   new = check_tmp / orig.relative_to(out)
    if not new.exists():
     drift.append(f"DELETED: {orig.relative_to(out)}")
     continue
    if orig.read_text(encoding="utf-8") != new.read_text(encoding="utf-8"):
     drift.append(f"MODIFIED: {orig.relative_to(out)}")
-  for new in tmp_out.rglob("*.md"):
-   orig = out / new.relative_to(tmp_out)
+  for new in check_tmp.rglob("*.md"):
+   orig = out / new.relative_to(check_tmp)
    if not orig.exists():
-    drift.append(f"ADDED: {new.relative_to(tmp_out)}")
+    drift.append(f"ADDED: {new.relative_to(check_tmp)}")
+ finally:
+  if check_tmp.exists():
+   shutil.rmtree(check_tmp)
  if drift:
   print(f"DRIFT detected ({len(drift)} files differ):", file=sys.stderr)
   for d in drift:
diff --git a/src/log_registry.py b/src/log_registry.py
index 8bf3ea3d..aa074160 100644
--- a/src/log_registry.py
+++ b/src/log_registry.py
@@ -280,6 +280,36 @@ class LogRegistry:
   )
   self.save_registry() # Save after update
 
+ def set_session_start_time(self, session_id: str, start_time: datetime | str) -> None:
+  """
+  Updates the start_time of an existing session.
+
+  Used by tests and maintenance tools to backdate a session for pruning
+  verification. Creates a new Session with the updated start_time while
+  preserving all other fields (Session is frozen).
+
+  Args:
+   session_id (str): Unique identifier for the session.
+   start_time (datetime|str): The new start timestamp.
+  [C: tests/test_logging_e2e.py:test_logging_e2e]
+  """
+  if session_id not in self.data:
+   print(f"Error: Session ID '{session_id}' not found for start_time update.")
+   return
+  if isinstance(start_time, datetime):
+   start_time_str: str = start_time.isoformat()
+  else:
+   start_time_str = start_time
+  existing = self.data[session_id]
+  self.data[session_id] = Session(
+   session_id=existing.session_id,
+   path=existing.path,
+   start_time=start_time_str,
+   whitelisted=existing.whitelisted,
+   metadata=existing.metadata,
+  )
+  self.save_registry()
+
  def is_session_whitelisted(self, session_id: str) -> bool:
   """
   Checks if a specific session is marked as whitelisted.
diff --git a/tests/test_gui2_parity.py b/tests/test_gui2_parity.py
index 34dc4202..9a0ecf94 100644
--- a/tests/test_gui2_parity.py
+++ b/tests/test_gui2_parity.py
@@ -58,12 +58,12 @@ def test_gui2_click_hook_works(live_gui: Any) -> None:
  time.sleep(1.5)
  # Verify it was reset
  assert client.get_value('ai_input') == ""
-
 def test_gui2_custom_callback_hook_works(live_gui: Any) -> None:
  """
  
   
-      Tests that the 'custom_callback' GUI hook is correctly implemented.
+  
+  Tests that the 'custom_callback' GUI hook is correctly implemented.
  """
  client = ApiHookClient()
  assert client.wait_for_server(timeout=10)
@@ -75,9 +75,15 @@ def test_gui2_custom_callback_hook_works(live_gui: Any) -> None:
  }
  response = client.post_gui(gui_data)
  assert response == {'status': 'queued'}
- time.sleep(1.5) # Give gui_2.py time to process its task queue
- # Assert that the file WAS created and contains the correct data
+ # Poll for the callback to complete (avoids time.sleep race; per workflow anti-pattern)
  temp_workspace_file = Path('tests/artifacts/temp_callback_output.txt')
+ deadline = time.time() + 10.0
+ while time.time() < deadline:
+  if temp_workspace_file.exists():
+   content = temp_workspace_file.read_text(encoding="utf-8")
+   if content == test_data:
+    return
+  time.sleep(0.1)
  assert temp_workspace_file.exists(), f"Custom callback was NOT executed, or file path is wrong! Expected: {temp_workspace_file}"
  with open(temp_workspace_file, "r") as f:
   content = f.read()
diff --git a/tests/test_logging_e2e.py b/tests/test_logging_e2e.py
index ef5ae34d..821e0cc6 100644
--- a/tests/test_logging_e2e.py
+++ b/tests/test_logging_e2e.py
@@ -56,7 +56,6 @@ def test_logging_e2e(e2e_setup: Any) -> None:
  assert session_dir.exists(), "New whitelisted session should have been kept"
  # Extra check: Whitelisted sessions should be kept even if old
  # Manually backdate the current session
- registry.data[session_id]['start_time'] = (datetime.now() - timedelta(days=2)).isoformat()
- registry.save_registry()
+ registry.set_session_start_time(session_id, datetime.now() - timedelta(days=2))
  pruner.prune()
  assert session_dir.exists(), "Whitelisted session should be kept even if it is old and small"