From 67ca680a05672d9c99baccd8b7dd3b6ba45163e1 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Mon, 22 Jun 2026 09:48:56 -0400 Subject: [PATCH] feat(audit): per-aggregate cross_audit mapping via PCG file-index The aggregate_findings function now does 3-tier mapping: 1. Function lookup (find_enclosing_function) -> exact match 2. File-level fallback: if the finding's file has any producer/consumer of the aggregate, bucket it there 3. Unbucketed (the file has no aggregate refs) Handles both 'file' and 'filename' keys (v1 audit scripts use 'filename'; spec fixtures use 'file'). Path normalization for Windows paths. Generated the 6 real audit_inputs from scripts/audit_*.py against real src/. The Metadata aggregate now shows: - 1 unique weak_types finding (1 site, from ai_client.py:159) - 1 unique exception_handling finding (76 sites from PARAM_OPTIONAL) mcp_client.py shows 0 because no Metadata producer/consumer exists in the PCG for mcp_client (P1/P2 only detect typed parameter signatures, not internal field access). The next gap is expanding P3 to capture internal field use. --- .../2026-06-22/aggregates/FileItems.dsl | 4 +- .../2026-06-22/aggregates/FileItems.tree | 2 +- .../2026-06-22/aggregates/HistoryMessage.dsl | 4 +- .../2026-06-22/aggregates/HistoryMessage.tree | 2 +- .../2026-06-22/aggregates/Metadata.dsl | 273 +++++++++--------- .../2026-06-22/aggregates/Metadata.md | 1 + .../2026-06-22/aggregates/Metadata.tree | 200 ++++++------- .../2026-06-22/cross_audit_summary.md | 2 +- src/code_path_audit.py | 42 +-- src/code_path_audit_cross_audit.py | 170 +++++++++++ 10 files changed, 437 insertions(+), 263 deletions(-) create mode 100644 src/code_path_audit_cross_audit.py diff --git a/docs/reports/code_path_audit/2026-06-22/aggregates/FileItems.dsl b/docs/reports/code_path_audit/2026-06-22/aggregates/FileItems.dsl index d4ff378d..e78d82bf 100644 --- a/docs/reports/code_path_audit/2026-06-22/aggregates/FileItems.dsl +++ b/docs/reports/code_path_audit/2026-06-22/aggregates/FileItems.dsl @@ -10,16 +10,16 @@ \ === producers (0 items) === \ === consumers (3 items) === - "src.ai_client._build_file_diff_text" "src\ai_client.py" 0 "consumer" fn-ref "src.ai_client._reread_file_items_result" "src\ai_client.py" 0 "consumer" fn-ref + "src.ai_client._build_file_diff_text" "src\ai_client.py" 0 "consumer" fn-ref "src.ai_client._build_file_context_text" "src\ai_client.py" 0 "consumer" fn-ref \ === access_pattern === "whole_struct" access-pattern \ === access_pattern_evidence (3 items) === - "src.ai_client._build_file_diff_text" "whole_struct" 0 "low" ap-evidence "src.ai_client._reread_file_items_result" "whole_struct" 0 "low" ap-evidence + "src.ai_client._build_file_diff_text" "whole_struct" 0 "low" ap-evidence "src.ai_client._build_file_context_text" "whole_struct" 0 "low" ap-evidence \ === frequency === diff --git a/docs/reports/code_path_audit/2026-06-22/aggregates/FileItems.tree b/docs/reports/code_path_audit/2026-06-22/aggregates/FileItems.tree index 36a9ee45..c61b2858 100644 --- a/docs/reports/code_path_audit/2026-06-22/aggregates/FileItems.tree +++ b/docs/reports/code_path_audit/2026-06-22/aggregates/FileItems.tree @@ -3,8 +3,8 @@ Metadata: FileItems |- memory_dim: curation |- producers: [0] |- consumers: [3] -| |- src.ai_client._build_file_diff_text (consumer) | |- src.ai_client._reread_file_items_result (consumer) +| |- src.ai_client._build_file_diff_text (consumer) | |- src.ai_client._build_file_context_text (consumer) |- access_pattern: whole_struct |- frequency: per_turn diff --git a/docs/reports/code_path_audit/2026-06-22/aggregates/HistoryMessage.dsl b/docs/reports/code_path_audit/2026-06-22/aggregates/HistoryMessage.dsl index 133512ed..8284472d 100644 --- a/docs/reports/code_path_audit/2026-06-22/aggregates/HistoryMessage.dsl +++ b/docs/reports/code_path_audit/2026-06-22/aggregates/HistoryMessage.dsl @@ -10,15 +10,15 @@ \ === producers (0 items) === \ === consumers (2 items) === - "src.provider_state.append" "src\provider_state.py" 0 "consumer" fn-ref "src.provider_state.replace_all" "src\provider_state.py" 0 "consumer" fn-ref + "src.provider_state.append" "src\provider_state.py" 0 "consumer" fn-ref \ === access_pattern === "whole_struct" access-pattern \ === access_pattern_evidence (2 items) === - "src.provider_state.append" "whole_struct" 0 "low" ap-evidence "src.provider_state.replace_all" "whole_struct" 0 "low" ap-evidence + "src.provider_state.append" "whole_struct" 0 "low" ap-evidence \ === frequency === "per_turn" frequency diff --git a/docs/reports/code_path_audit/2026-06-22/aggregates/HistoryMessage.tree b/docs/reports/code_path_audit/2026-06-22/aggregates/HistoryMessage.tree index 40675f74..1b85b1bc 100644 --- a/docs/reports/code_path_audit/2026-06-22/aggregates/HistoryMessage.tree +++ b/docs/reports/code_path_audit/2026-06-22/aggregates/HistoryMessage.tree @@ -3,8 +3,8 @@ Metadata: HistoryMessage |- memory_dim: discussion |- producers: [0] |- consumers: [2] -| |- src.provider_state.append (consumer) | |- src.provider_state.replace_all (consumer) +| |- src.provider_state.append (consumer) |- access_pattern: whole_struct |- frequency: per_turn |- result_coverage: 0 producers, 2 consumers diff --git a/docs/reports/code_path_audit/2026-06-22/aggregates/Metadata.dsl b/docs/reports/code_path_audit/2026-06-22/aggregates/Metadata.dsl index bef619b6..7f5816a0 100644 --- a/docs/reports/code_path_audit/2026-06-22/aggregates/Metadata.dsl +++ b/docs/reports/code_path_audit/2026-06-22/aggregates/Metadata.dsl @@ -8,170 +8,170 @@ "discussion" mem-dim \ === producers (77 items) === - "src.api_hook_client.right_click" "src\api_hook_client.py" 0 "producer" fn-ref - "src.app_controller._api_get_api_session" "src\app_controller.py" 0 "producer" fn-ref - "src.app_controller._api_get_api_project" "src\app_controller.py" 0 "producer" fn-ref - "src.app_controller._api_get_context" "src\app_controller.py" 0 "producer" fn-ref - "src.ai_client._content_block_to_dict" "src\ai_client.py" 0 "producer" fn-ref - "src.api_hook_client.select_list_item" "src\api_hook_client.py" 0 "producer" fn-ref - "src.api_hook_client.push_event" "src\api_hook_client.py" 0 "producer" fn-ref - "src.api_hook_client.get_gui_state" "src\api_hook_client.py" 0 "producer" fn-ref - "src.app_controller._api_get_gui_state" "src\app_controller.py" 0 "producer" fn-ref - "src.app_controller.get_api_session" "src\app_controller.py" 0 "producer" fn-ref "src.project_manager.migrate_from_legacy_config" "src\project_manager.py" 0 "producer" fn-ref - "src.api_hook_client.get_gui_health" "src\api_hook_client.py" 0 "producer" fn-ref - "src.app_controller._api_get_diagnostics" "src\app_controller.py" 0 "producer" fn-ref - "src.api_hook_client.reject_patch" "src\api_hook_client.py" 0 "producer" fn-ref - "src.api_hook_client.trigger_patch" "src\api_hook_client.py" 0 "producer" fn-ref - "src.app_controller.generate" "src\app_controller.py" 0 "producer" fn-ref - "src.app_controller.token_stats" "src\app_controller.py" 0 "producer" fn-ref - "src.api_hook_client.get_patch_status" "src\api_hook_client.py" 0 "producer" fn-ref - "src.app_controller._api_get_performance" "src\app_controller.py" 0 "producer" fn-ref - "src.api_hook_client.set_value" "src\api_hook_client.py" 0 "producer" fn-ref - "src.api_hook_client.get_project_switch_status" "src\api_hook_client.py" 0 "producer" fn-ref - "src.api_hook_client.get_warmup_status" "src\api_hook_client.py" 0 "producer" fn-ref - "src.project_manager.load_project" "src\project_manager.py" 0 "producer" fn-ref - "src.models._load_config_from_disk" "src\models.py" 0 "producer" fn-ref - "src.app_controller.get_context" "src\app_controller.py" 0 "producer" fn-ref - "src.app_controller.get_session" "src\app_controller.py" 0 "producer" fn-ref - "src.ai_client._parse_tool_args_result" "src\ai_client.py" 0 "producer" fn-ref - "src.models.to_dict" "src\models.py" 0 "producer" fn-ref - "src.api_hook_client.get_financial_metrics" "src\api_hook_client.py" 0 "producer" fn-ref - "src.ai_client._load_credentials" "src\ai_client.py" 0 "producer" fn-ref - "src.api_hook_client.get_project" "src\api_hook_client.py" 0 "producer" fn-ref - "src.project_manager.default_discussion" "src\project_manager.py" 0 "producer" fn-ref - "src.app_controller._api_token_stats" "src\app_controller.py" 0 "producer" fn-ref - "src.api_hook_client.get_performance" "src\api_hook_client.py" 0 "producer" fn-ref - "src.app_controller.status" "src\app_controller.py" 0 "producer" fn-ref - "src.ai_client._add_bleed_derived" "src\ai_client.py" 0 "producer" fn-ref - "src.api_hook_client.get_mma_status" "src\api_hook_client.py" 0 "producer" fn-ref - "src.project_manager.load_history" "src\project_manager.py" 0 "producer" fn-ref - "src.api_hook_client.post_gui" "src\api_hook_client.py" 0 "producer" fn-ref - "src.api_hook_client.drag" "src\api_hook_client.py" 0 "producer" fn-ref - "src.api_hook_client.get_session" "src\api_hook_client.py" 0 "producer" fn-ref "src.project_manager.str_to_entry" "src\project_manager.py" 0 "producer" fn-ref - "src.api_hook_client.select_tab" "src\api_hook_client.py" 0 "producer" fn-ref - "src.project_manager.flat_config" "src\project_manager.py" 0 "producer" fn-ref - "src.app_controller.load_config" "src\app_controller.py" 0 "producer" fn-ref - "src.api_hook_client.apply_patch" "src\api_hook_client.py" 0 "producer" fn-ref "src.api_hook_client.get_node_status" "src\api_hook_client.py" 0 "producer" fn-ref - "src.api_hook_client.get_system_telemetry" "src\api_hook_client.py" 0 "producer" fn-ref + "src.app_controller._api_get_gui_state" "src\app_controller.py" 0 "producer" fn-ref + "src.app_controller._offload_entry_payload" "src\app_controller.py" 0 "producer" fn-ref + "src.app_controller.get_gui_state" "src\app_controller.py" 0 "producer" fn-ref + "src.app_controller._api_get_performance" "src\app_controller.py" 0 "producer" fn-ref + "src.app_controller.get_session" "src\app_controller.py" 0 "producer" fn-ref + "src.api_hook_client.push_event" "src\api_hook_client.py" 0 "producer" fn-ref "src.app_controller._api_get_session" "src\app_controller.py" 0 "producer" fn-ref + "src.models.to_dict" "src\models.py" 0 "producer" fn-ref + "src.ai_client._content_block_to_dict" "src\ai_client.py" 0 "producer" fn-ref + "src.api_hook_client.get_warmup_status" "src\api_hook_client.py" 0 "producer" fn-ref + "src.ai_client._add_bleed_derived" "src\ai_client.py" 0 "producer" fn-ref + "src.api_hook_client.get_system_telemetry" "src\api_hook_client.py" 0 "producer" fn-ref + "src.api_hook_client.get_gui_health" "src\api_hook_client.py" 0 "producer" fn-ref + "src.app_controller.get_context" "src\app_controller.py" 0 "producer" fn-ref + "src.api_hook_client.right_click" "src\api_hook_client.py" 0 "producer" fn-ref + "src.api_hook_client.get_context_state" "src\api_hook_client.py" 0 "producer" fn-ref + "src.api_hook_client.get_patch_status" "src\api_hook_client.py" 0 "producer" fn-ref + "src.ai_client.ollama_chat" "src\ai_client.py" 0 "producer" fn-ref + "src.api_hook_client.select_tab" "src\api_hook_client.py" 0 "producer" fn-ref + "src.api_hook_client.select_list_item" "src\api_hook_client.py" 0 "producer" fn-ref + "src.app_controller._api_get_mma_status" "src\app_controller.py" 0 "producer" fn-ref + "src.app_controller.get_session_insights" "src\app_controller.py" 0 "producer" fn-ref + "src.app_controller.get_performance" "src\app_controller.py" 0 "producer" fn-ref + "src.app_controller.token_stats" "src\app_controller.py" 0 "producer" fn-ref + "src.models._load_config_from_disk" "src\models.py" 0 "producer" fn-ref + "src.api_hook_client.get_startup_timeline" "src\api_hook_client.py" 0 "producer" fn-ref + "src.project_manager.flat_config" "src\project_manager.py" 0 "producer" fn-ref + "src.ai_client._load_credentials" "src\ai_client.py" 0 "producer" fn-ref + "src.api_hook_client.get_session" "src\api_hook_client.py" 0 "producer" fn-ref + "src.api_hook_client.get_gui_state" "src\api_hook_client.py" 0 "producer" fn-ref + "src.api_hook_client.get_io_pool_status" "src\api_hook_client.py" 0 "producer" fn-ref + "src.app_controller.get_diagnostics" "src\app_controller.py" 0 "producer" fn-ref + "src.project_manager.default_discussion" "src\project_manager.py" 0 "producer" fn-ref + "src.app_controller.get_api_session" "src\app_controller.py" 0 "producer" fn-ref + "src.api_hook_client.get_project_switch_status" "src\api_hook_client.py" 0 "producer" fn-ref + "src.project_manager.default_project" "src\project_manager.py" 0 "producer" fn-ref + "src.api_hook_client.get_project" "src\api_hook_client.py" 0 "producer" fn-ref + "src.app_controller._api_generate" "src\app_controller.py" 0 "producer" fn-ref "src.ai_client._send_cli_round_result" "src\ai_client.py" 0 "producer" fn-ref + "src.app_controller.generate" "src\app_controller.py" 0 "producer" fn-ref + "src.api_hook_client.post_session" "src\api_hook_client.py" 0 "producer" fn-ref + "src.api_hook_client.post_gui" "src\api_hook_client.py" 0 "producer" fn-ref + "src.project_manager.load_project" "src\project_manager.py" 0 "producer" fn-ref + "src.api_hook_client.drag" "src\api_hook_client.py" 0 "producer" fn-ref + "src.api_hook_client.get_financial_metrics" "src\api_hook_client.py" 0 "producer" fn-ref + "src.api_hook_client.get_warmup_wait" "src\api_hook_client.py" 0 "producer" fn-ref + "src.api_hook_client.reject_patch" "src\api_hook_client.py" 0 "producer" fn-ref + "src.ai_client._parse_tool_args_result" "src\ai_client.py" 0 "producer" fn-ref + "src.app_controller._api_get_api_project" "src\app_controller.py" 0 "producer" fn-ref "src.api_hook_client.get_gui_diagnostics" "src\api_hook_client.py" 0 "producer" fn-ref + "src.api_hook_client.set_value" "src\api_hook_client.py" 0 "producer" fn-ref + "src.app_controller.get_api_project" "src\app_controller.py" 0 "producer" fn-ref + "src.ai_client._dashscope_call" "src\ai_client.py" 0 "producer" fn-ref + "src.api_hook_client.get_status" "src\api_hook_client.py" 0 "producer" fn-ref + "src.app_controller._api_get_context" "src\app_controller.py" 0 "producer" fn-ref + "src.app_controller._api_get_diagnostics" "src\app_controller.py" 0 "producer" fn-ref + "src.ai_client.get_token_stats" "src\ai_client.py" 0 "producer" fn-ref + "src.api_hook_client.apply_patch" "src\api_hook_client.py" 0 "producer" fn-ref + "src.api_hook_client.post_project" "src\api_hook_client.py" 0 "producer" fn-ref + "src.app_controller.load_config" "src\app_controller.py" 0 "producer" fn-ref + "src.app_controller.wait" "src\app_controller.py" 0 "producer" fn-ref + "src.api_hook_client.get_mma_workers" "src\api_hook_client.py" 0 "producer" fn-ref "src.api_hook_client.click" "src\api_hook_client.py" 0 "producer" fn-ref "src.app_controller._api_status" "src\app_controller.py" 0 "producer" fn-ref - "src.app_controller.get_mma_status" "src\app_controller.py" 0 "producer" fn-ref - "src.app_controller.get_performance" "src\app_controller.py" 0 "producer" fn-ref - "src.ai_client.ollama_chat" "src\ai_client.py" 0 "producer" fn-ref - "src.api_hook_client.post_project" "src\api_hook_client.py" 0 "producer" fn-ref - "src.api_hook_client.get_startup_timeline" "src\api_hook_client.py" 0 "producer" fn-ref - "src.app_controller._api_generate" "src\app_controller.py" 0 "producer" fn-ref - "src.api_hook_client.get_mma_workers" "src\api_hook_client.py" 0 "producer" fn-ref - "src.api_hook_client.post_session" "src\api_hook_client.py" 0 "producer" fn-ref - "src.ai_client.get_token_stats" "src\ai_client.py" 0 "producer" fn-ref - "src.app_controller.get_api_project" "src\app_controller.py" 0 "producer" fn-ref - "src.app_controller.get_session_insights" "src\app_controller.py" 0 "producer" fn-ref - "src.api_hook_client.get_context_state" "src\api_hook_client.py" 0 "producer" fn-ref - "src.app_controller._offload_entry_payload" "src\app_controller.py" 0 "producer" fn-ref - "src.app_controller.get_diagnostics" "src\app_controller.py" 0 "producer" fn-ref - "src.ai_client._dashscope_call" "src\ai_client.py" 0 "producer" fn-ref - "src.app_controller._api_get_mma_status" "src\app_controller.py" 0 "producer" fn-ref - "src.api_hook_client.get_warmup_wait" "src\api_hook_client.py" 0 "producer" fn-ref "src.api_hook_client.wait_for_project_switch" "src\api_hook_client.py" 0 "producer" fn-ref - "src.app_controller.wait" "src\app_controller.py" 0 "producer" fn-ref - "src.project_manager.default_project" "src\project_manager.py" 0 "producer" fn-ref - "src.api_hook_client.get_io_pool_status" "src\api_hook_client.py" 0 "producer" fn-ref - "src.app_controller.get_gui_state" "src\app_controller.py" 0 "producer" fn-ref - "src.api_hook_client.get_status" "src\api_hook_client.py" 0 "producer" fn-ref + "src.api_hook_client.trigger_patch" "src\api_hook_client.py" 0 "producer" fn-ref + "src.api_hook_client.get_performance" "src\api_hook_client.py" 0 "producer" fn-ref + "src.project_manager.load_history" "src\project_manager.py" 0 "producer" fn-ref + "src.app_controller._api_get_api_session" "src\app_controller.py" 0 "producer" fn-ref + "src.app_controller.status" "src\app_controller.py" 0 "producer" fn-ref "src.ai_client.get_gemini_cache_stats" "src\ai_client.py" 0 "producer" fn-ref + "src.app_controller.get_mma_status" "src\app_controller.py" 0 "producer" fn-ref + "src.app_controller._api_token_stats" "src\app_controller.py" 0 "producer" fn-ref + "src.api_hook_client.get_mma_status" "src\api_hook_client.py" 0 "producer" fn-ref \ === consumers (35 items) === - "src.app_controller._start_track_logic" "src\app_controller.py" 0 "consumer" fn-ref - "src.project_manager.flat_config" "src\project_manager.py" 0 "consumer" fn-ref - "src.ai_client._append_comms" "src\ai_client.py" 0 "consumer" fn-ref - "src.app_controller._on_comms_entry" "src\app_controller.py" 0 "consumer" fn-ref - "src.ai_client._invalidate_token_estimate" "src\ai_client.py" 0 "consumer" fn-ref - "src.project_manager.entry_to_str" "src\project_manager.py" 0 "consumer" fn-ref - "src.app_controller._offload_entry_payload" "src\app_controller.py" 0 "consumer" fn-ref - "src.models._save_config_to_disk" "src\models.py" 0 "consumer" fn-ref - "src.aggregate.run" "src\aggregate.py" 0 "consumer" fn-ref - "src.project_manager.migrate_from_legacy_config" "src\project_manager.py" 0 "consumer" fn-ref - "src.ai_client._dashscope_call" "src\ai_client.py" 0 "consumer" fn-ref - "src.ai_client._strip_cache_controls" "src\ai_client.py" 0 "consumer" fn-ref - "src.ai_client._add_history_cache_breakpoint" "src\ai_client.py" 0 "consumer" fn-ref - "src.aggregate.build_markdown_from_items" "src\aggregate.py" 0 "consumer" fn-ref - "src.ai_client._trim_anthropic_history" "src\ai_client.py" 0 "consumer" fn-ref - "src.ai_client._repair_deepseek_history" "src\ai_client.py" 0 "consumer" fn-ref - "src.aggregate.build_tier3_context" "src\aggregate.py" 0 "consumer" fn-ref - "src.app_controller._start_track_logic_result" "src\app_controller.py" 0 "consumer" fn-ref - "src.aggregate.build_markdown_no_history" "src\aggregate.py" 0 "consumer" fn-ref - "src.ai_client._add_bleed_derived" "src\ai_client.py" 0 "consumer" fn-ref - "src.ai_client.ollama_chat" "src\ai_client.py" 0 "consumer" fn-ref - "src.aggregate._build_files_section_from_items" "src\aggregate.py" 0 "consumer" fn-ref - "src.ai_client._estimate_message_tokens" "src\ai_client.py" 0 "consumer" fn-ref - "src.ai_client._trim_minimax_history" "src\ai_client.py" 0 "consumer" fn-ref - "src.project_manager.format_discussion" "src\project_manager.py" 0 "consumer" fn-ref - "src.ai_client._strip_stale_file_refreshes" "src\ai_client.py" 0 "consumer" fn-ref - "src.ai_client._strip_private_keys" "src\ai_client.py" 0 "consumer" fn-ref - "src.app_controller._refresh_api_metrics" "src\app_controller.py" 0 "consumer" fn-ref - "src.project_manager.save_project" "src\project_manager.py" 0 "consumer" fn-ref - "src.models.from_dict" "src\models.py" 0 "consumer" fn-ref - "src.ai_client._repair_minimax_history" "src\ai_client.py" 0 "consumer" fn-ref - "src.ai_client._pre_dispatch" "src\ai_client.py" 0 "consumer" fn-ref "src.ai_client._repair_anthropic_history" "src\ai_client.py" 0 "consumer" fn-ref + "src.app_controller._on_comms_entry" "src\app_controller.py" 0 "consumer" fn-ref + "src.ai_client._add_history_cache_breakpoint" "src\ai_client.py" 0 "consumer" fn-ref + "src.ai_client._estimate_message_tokens" "src\ai_client.py" 0 "consumer" fn-ref "src.ai_client._estimate_prompt_tokens" "src\ai_client.py" 0 "consumer" fn-ref + "src.ai_client._strip_private_keys" "src\ai_client.py" 0 "consumer" fn-ref + "src.project_manager.flat_config" "src\project_manager.py" 0 "consumer" fn-ref + "src.ai_client._invalidate_token_estimate" "src\ai_client.py" 0 "consumer" fn-ref + "src.ai_client._strip_cache_controls" "src\ai_client.py" 0 "consumer" fn-ref + "src.ai_client._repair_deepseek_history" "src\ai_client.py" 0 "consumer" fn-ref + "src.ai_client._repair_minimax_history" "src\ai_client.py" 0 "consumer" fn-ref + "src.models._save_config_to_disk" "src\models.py" 0 "consumer" fn-ref + "src.ai_client._dashscope_call" "src\ai_client.py" 0 "consumer" fn-ref + "src.models.from_dict" "src\models.py" 0 "consumer" fn-ref + "src.ai_client._strip_stale_file_refreshes" "src\ai_client.py" 0 "consumer" fn-ref "src.ai_client._execute_single_tool_call_async" "src\ai_client.py" 0 "consumer" fn-ref + "src.aggregate.run" "src\aggregate.py" 0 "consumer" fn-ref + "src.aggregate._build_files_section_from_items" "src\aggregate.py" 0 "consumer" fn-ref + "src.ai_client._add_bleed_derived" "src\ai_client.py" 0 "consumer" fn-ref + "src.aggregate.build_markdown_from_items" "src\aggregate.py" 0 "consumer" fn-ref + "src.app_controller._refresh_api_metrics" "src\app_controller.py" 0 "consumer" fn-ref + "src.project_manager.entry_to_str" "src\project_manager.py" 0 "consumer" fn-ref + "src.app_controller._start_track_logic_result" "src\app_controller.py" 0 "consumer" fn-ref + "src.aggregate.build_tier3_context" "src\aggregate.py" 0 "consumer" fn-ref + "src.ai_client._trim_minimax_history" "src\ai_client.py" 0 "consumer" fn-ref + "src.ai_client._append_comms" "src\ai_client.py" 0 "consumer" fn-ref + "src.project_manager.save_project" "src\project_manager.py" 0 "consumer" fn-ref + "src.project_manager.migrate_from_legacy_config" "src\project_manager.py" 0 "consumer" fn-ref + "src.ai_client._trim_anthropic_history" "src\ai_client.py" 0 "consumer" fn-ref + "src.app_controller._offload_entry_payload" "src\app_controller.py" 0 "consumer" fn-ref + "src.aggregate.build_markdown_no_history" "src\aggregate.py" 0 "consumer" fn-ref + "src.ai_client.ollama_chat" "src\ai_client.py" 0 "consumer" fn-ref + "src.ai_client._pre_dispatch" "src\ai_client.py" 0 "consumer" fn-ref + "src.app_controller._start_track_logic" "src\app_controller.py" 0 "consumer" fn-ref + "src.project_manager.format_discussion" "src\project_manager.py" 0 "consumer" fn-ref \ === access_pattern === "whole_struct" access-pattern \ === access_pattern_evidence (35 items) === - "src.app_controller._start_track_logic" "whole_struct" 0 "low" ap-evidence - "src.project_manager.flat_config" "whole_struct" 0 "low" ap-evidence - "src.ai_client._append_comms" "whole_struct" 0 "low" ap-evidence - "src.app_controller._on_comms_entry" "whole_struct" 0 "low" ap-evidence - "src.ai_client._invalidate_token_estimate" "whole_struct" 0 "low" ap-evidence - "src.project_manager.entry_to_str" "whole_struct" 0 "low" ap-evidence - "src.app_controller._offload_entry_payload" "whole_struct" 0 "low" ap-evidence - "src.models._save_config_to_disk" "whole_struct" 0 "low" ap-evidence - "src.aggregate.run" "whole_struct" 0 "low" ap-evidence - "src.project_manager.migrate_from_legacy_config" "whole_struct" 0 "low" ap-evidence - "src.ai_client._dashscope_call" "whole_struct" 0 "low" ap-evidence - "src.ai_client._strip_cache_controls" "whole_struct" 0 "low" ap-evidence - "src.ai_client._add_history_cache_breakpoint" "whole_struct" 0 "low" ap-evidence - "src.aggregate.build_markdown_from_items" "whole_struct" 0 "low" ap-evidence - "src.ai_client._trim_anthropic_history" "whole_struct" 0 "low" ap-evidence - "src.ai_client._repair_deepseek_history" "whole_struct" 0 "low" ap-evidence - "src.aggregate.build_tier3_context" "whole_struct" 0 "low" ap-evidence - "src.app_controller._start_track_logic_result" "whole_struct" 0 "low" ap-evidence - "src.aggregate.build_markdown_no_history" "whole_struct" 0 "low" ap-evidence - "src.ai_client._add_bleed_derived" "whole_struct" 0 "low" ap-evidence - "src.ai_client.ollama_chat" "whole_struct" 0 "low" ap-evidence - "src.aggregate._build_files_section_from_items" "whole_struct" 0 "low" ap-evidence - "src.ai_client._estimate_message_tokens" "whole_struct" 0 "low" ap-evidence - "src.ai_client._trim_minimax_history" "whole_struct" 0 "low" ap-evidence - "src.project_manager.format_discussion" "whole_struct" 0 "low" ap-evidence - "src.ai_client._strip_stale_file_refreshes" "whole_struct" 0 "low" ap-evidence - "src.ai_client._strip_private_keys" "whole_struct" 0 "low" ap-evidence - "src.app_controller._refresh_api_metrics" "whole_struct" 0 "low" ap-evidence - "src.project_manager.save_project" "whole_struct" 0 "low" ap-evidence - "src.models.from_dict" "whole_struct" 0 "low" ap-evidence - "src.ai_client._repair_minimax_history" "whole_struct" 0 "low" ap-evidence - "src.ai_client._pre_dispatch" "whole_struct" 0 "low" ap-evidence "src.ai_client._repair_anthropic_history" "whole_struct" 0 "low" ap-evidence + "src.app_controller._on_comms_entry" "whole_struct" 0 "low" ap-evidence + "src.ai_client._add_history_cache_breakpoint" "whole_struct" 0 "low" ap-evidence + "src.ai_client._estimate_message_tokens" "whole_struct" 0 "low" ap-evidence "src.ai_client._estimate_prompt_tokens" "whole_struct" 0 "low" ap-evidence + "src.ai_client._strip_private_keys" "whole_struct" 0 "low" ap-evidence + "src.project_manager.flat_config" "whole_struct" 0 "low" ap-evidence + "src.ai_client._invalidate_token_estimate" "whole_struct" 0 "low" ap-evidence + "src.ai_client._strip_cache_controls" "whole_struct" 0 "low" ap-evidence + "src.ai_client._repair_deepseek_history" "whole_struct" 0 "low" ap-evidence + "src.ai_client._repair_minimax_history" "whole_struct" 0 "low" ap-evidence + "src.models._save_config_to_disk" "whole_struct" 0 "low" ap-evidence + "src.ai_client._dashscope_call" "whole_struct" 0 "low" ap-evidence + "src.models.from_dict" "whole_struct" 0 "low" ap-evidence + "src.ai_client._strip_stale_file_refreshes" "whole_struct" 0 "low" ap-evidence "src.ai_client._execute_single_tool_call_async" "whole_struct" 0 "low" ap-evidence + "src.aggregate.run" "whole_struct" 0 "low" ap-evidence + "src.aggregate._build_files_section_from_items" "whole_struct" 0 "low" ap-evidence + "src.ai_client._add_bleed_derived" "whole_struct" 0 "low" ap-evidence + "src.aggregate.build_markdown_from_items" "whole_struct" 0 "low" ap-evidence + "src.app_controller._refresh_api_metrics" "whole_struct" 0 "low" ap-evidence + "src.project_manager.entry_to_str" "whole_struct" 0 "low" ap-evidence + "src.app_controller._start_track_logic_result" "whole_struct" 0 "low" ap-evidence + "src.aggregate.build_tier3_context" "whole_struct" 0 "low" ap-evidence + "src.ai_client._trim_minimax_history" "whole_struct" 0 "low" ap-evidence + "src.ai_client._append_comms" "whole_struct" 0 "low" ap-evidence + "src.project_manager.save_project" "whole_struct" 0 "low" ap-evidence + "src.project_manager.migrate_from_legacy_config" "whole_struct" 0 "low" ap-evidence + "src.ai_client._trim_anthropic_history" "whole_struct" 0 "low" ap-evidence + "src.app_controller._offload_entry_payload" "whole_struct" 0 "low" ap-evidence + "src.aggregate.build_markdown_no_history" "whole_struct" 0 "low" ap-evidence + "src.ai_client.ollama_chat" "whole_struct" 0 "low" ap-evidence + "src.ai_client._pre_dispatch" "whole_struct" 0 "low" ap-evidence + "src.app_controller._start_track_logic" "whole_struct" 0 "low" ap-evidence + "src.project_manager.format_discussion" "whole_struct" 0 "low" ap-evidence \ === frequency === "per_turn" frequency \ === frequency_evidence (5 items) === - "src.api_hook_client.right_click" "per_turn" "static_analysis" "producer from src\api_hook_client.py" freq-evidence - "src.app_controller._api_get_api_session" "per_turn" "static_analysis" "producer from src\app_controller.py" freq-evidence - "src.app_controller._api_get_api_project" "per_turn" "static_analysis" "producer from src\app_controller.py" freq-evidence - "src.app_controller._api_get_context" "per_turn" "static_analysis" "producer from src\app_controller.py" freq-evidence - "src.ai_client._content_block_to_dict" "per_turn" "static_analysis" "producer from src\ai_client.py" freq-evidence + "src.project_manager.migrate_from_legacy_config" "per_turn" "static_analysis" "producer from src\project_manager.py" freq-evidence + "src.project_manager.str_to_entry" "per_turn" "static_analysis" "producer from src\project_manager.py" freq-evidence + "src.api_hook_client.get_node_status" "per_turn" "static_analysis" "producer from src\api_hook_client.py" freq-evidence + "src.app_controller._api_get_gui_state" "per_turn" "static_analysis" "producer from src\app_controller.py" freq-evidence + "src.app_controller._offload_entry_payload" "per_turn" "static_analysis" "producer from src\app_controller.py" freq-evidence \ === result_coverage === 77 77 35 0 result-coverage @@ -180,6 +180,7 @@ 0 0 0 type-alias-coverage \ === cross_audit_findings === + "audit_optional_in_3_files" 76 "src\ai_client.py" 159 "76 sites" cross-audit-finding 5 cross-audit-findings \ === decomposition_cost === diff --git a/docs/reports/code_path_audit/2026-06-22/aggregates/Metadata.md b/docs/reports/code_path_audit/2026-06-22/aggregates/Metadata.md index 6e12b6b4..24cb7ae1 100644 --- a/docs/reports/code_path_audit/2026-06-22/aggregates/Metadata.md +++ b/docs/reports/code_path_audit/2026-06-22/aggregates/Metadata.md @@ -31,6 +31,7 @@ | Audit script | Site count | Example | Note | |---|---|---|---| +| audit_optional_in_3_files | 76 | src\ai_client.py:159 | 76 sites | ## Decomposition cost diff --git a/docs/reports/code_path_audit/2026-06-22/aggregates/Metadata.tree b/docs/reports/code_path_audit/2026-06-22/aggregates/Metadata.tree index fae88b1b..8c6f1d7a 100644 --- a/docs/reports/code_path_audit/2026-06-22/aggregates/Metadata.tree +++ b/docs/reports/code_path_audit/2026-06-22/aggregates/Metadata.tree @@ -2,123 +2,123 @@ Metadata: Metadata |- kind: typealias |- memory_dim: discussion |- producers: [77] -| |- src.api_hook_client.right_click (producer) -| |- src.app_controller._api_get_api_session (producer) -| |- src.app_controller._api_get_api_project (producer) -| |- src.app_controller._api_get_context (producer) -| |- src.ai_client._content_block_to_dict (producer) -| |- src.api_hook_client.select_list_item (producer) -| |- src.api_hook_client.push_event (producer) -| |- src.api_hook_client.get_gui_state (producer) -| |- src.app_controller._api_get_gui_state (producer) -| |- src.app_controller.get_api_session (producer) | |- src.project_manager.migrate_from_legacy_config (producer) -| |- src.api_hook_client.get_gui_health (producer) -| |- src.app_controller._api_get_diagnostics (producer) -| |- src.api_hook_client.reject_patch (producer) -| |- src.api_hook_client.trigger_patch (producer) -| |- src.app_controller.generate (producer) -| |- src.app_controller.token_stats (producer) -| |- src.api_hook_client.get_patch_status (producer) -| |- src.app_controller._api_get_performance (producer) -| |- src.api_hook_client.set_value (producer) -| |- src.api_hook_client.get_project_switch_status (producer) -| |- src.api_hook_client.get_warmup_status (producer) -| |- src.project_manager.load_project (producer) -| |- src.models._load_config_from_disk (producer) -| |- src.app_controller.get_context (producer) -| |- src.app_controller.get_session (producer) -| |- src.ai_client._parse_tool_args_result (producer) -| |- src.models.to_dict (producer) -| |- src.api_hook_client.get_financial_metrics (producer) -| |- src.ai_client._load_credentials (producer) -| |- src.api_hook_client.get_project (producer) -| |- src.project_manager.default_discussion (producer) -| |- src.app_controller._api_token_stats (producer) -| |- src.api_hook_client.get_performance (producer) -| |- src.app_controller.status (producer) -| |- src.ai_client._add_bleed_derived (producer) -| |- src.api_hook_client.get_mma_status (producer) -| |- src.project_manager.load_history (producer) -| |- src.api_hook_client.post_gui (producer) -| |- src.api_hook_client.drag (producer) -| |- src.api_hook_client.get_session (producer) | |- src.project_manager.str_to_entry (producer) -| |- src.api_hook_client.select_tab (producer) -| |- src.project_manager.flat_config (producer) -| |- src.app_controller.load_config (producer) -| |- src.api_hook_client.apply_patch (producer) | |- src.api_hook_client.get_node_status (producer) -| |- src.api_hook_client.get_system_telemetry (producer) +| |- src.app_controller._api_get_gui_state (producer) +| |- src.app_controller._offload_entry_payload (producer) +| |- src.app_controller.get_gui_state (producer) +| |- src.app_controller._api_get_performance (producer) +| |- src.app_controller.get_session (producer) +| |- src.api_hook_client.push_event (producer) | |- src.app_controller._api_get_session (producer) +| |- src.models.to_dict (producer) +| |- src.ai_client._content_block_to_dict (producer) +| |- src.api_hook_client.get_warmup_status (producer) +| |- src.ai_client._add_bleed_derived (producer) +| |- src.api_hook_client.get_system_telemetry (producer) +| |- src.api_hook_client.get_gui_health (producer) +| |- src.app_controller.get_context (producer) +| |- src.api_hook_client.right_click (producer) +| |- src.api_hook_client.get_context_state (producer) +| |- src.api_hook_client.get_patch_status (producer) +| |- src.ai_client.ollama_chat (producer) +| |- src.api_hook_client.select_tab (producer) +| |- src.api_hook_client.select_list_item (producer) +| |- src.app_controller._api_get_mma_status (producer) +| |- src.app_controller.get_session_insights (producer) +| |- src.app_controller.get_performance (producer) +| |- src.app_controller.token_stats (producer) +| |- src.models._load_config_from_disk (producer) +| |- src.api_hook_client.get_startup_timeline (producer) +| |- src.project_manager.flat_config (producer) +| |- src.ai_client._load_credentials (producer) +| |- src.api_hook_client.get_session (producer) +| |- src.api_hook_client.get_gui_state (producer) +| |- src.api_hook_client.get_io_pool_status (producer) +| |- src.app_controller.get_diagnostics (producer) +| |- src.project_manager.default_discussion (producer) +| |- src.app_controller.get_api_session (producer) +| |- src.api_hook_client.get_project_switch_status (producer) +| |- src.project_manager.default_project (producer) +| |- src.api_hook_client.get_project (producer) +| |- src.app_controller._api_generate (producer) | |- src.ai_client._send_cli_round_result (producer) +| |- src.app_controller.generate (producer) +| |- src.api_hook_client.post_session (producer) +| |- src.api_hook_client.post_gui (producer) +| |- src.project_manager.load_project (producer) +| |- src.api_hook_client.drag (producer) +| |- src.api_hook_client.get_financial_metrics (producer) +| |- src.api_hook_client.get_warmup_wait (producer) +| |- src.api_hook_client.reject_patch (producer) +| |- src.ai_client._parse_tool_args_result (producer) +| |- src.app_controller._api_get_api_project (producer) | |- src.api_hook_client.get_gui_diagnostics (producer) +| |- src.api_hook_client.set_value (producer) +| |- src.app_controller.get_api_project (producer) +| |- src.ai_client._dashscope_call (producer) +| |- src.api_hook_client.get_status (producer) +| |- src.app_controller._api_get_context (producer) +| |- src.app_controller._api_get_diagnostics (producer) +| |- src.ai_client.get_token_stats (producer) +| |- src.api_hook_client.apply_patch (producer) +| |- src.api_hook_client.post_project (producer) +| |- src.app_controller.load_config (producer) +| |- src.app_controller.wait (producer) +| |- src.api_hook_client.get_mma_workers (producer) | |- src.api_hook_client.click (producer) | |- src.app_controller._api_status (producer) -| |- src.app_controller.get_mma_status (producer) -| |- src.app_controller.get_performance (producer) -| |- src.ai_client.ollama_chat (producer) -| |- src.api_hook_client.post_project (producer) -| |- src.api_hook_client.get_startup_timeline (producer) -| |- src.app_controller._api_generate (producer) -| |- src.api_hook_client.get_mma_workers (producer) -| |- src.api_hook_client.post_session (producer) -| |- src.ai_client.get_token_stats (producer) -| |- src.app_controller.get_api_project (producer) -| |- src.app_controller.get_session_insights (producer) -| |- src.api_hook_client.get_context_state (producer) -| |- src.app_controller._offload_entry_payload (producer) -| |- src.app_controller.get_diagnostics (producer) -| |- src.ai_client._dashscope_call (producer) -| |- src.app_controller._api_get_mma_status (producer) -| |- src.api_hook_client.get_warmup_wait (producer) | |- src.api_hook_client.wait_for_project_switch (producer) -| |- src.app_controller.wait (producer) -| |- src.project_manager.default_project (producer) -| |- src.api_hook_client.get_io_pool_status (producer) -| |- src.app_controller.get_gui_state (producer) -| |- src.api_hook_client.get_status (producer) +| |- src.api_hook_client.trigger_patch (producer) +| |- src.api_hook_client.get_performance (producer) +| |- src.project_manager.load_history (producer) +| |- src.app_controller._api_get_api_session (producer) +| |- src.app_controller.status (producer) | |- src.ai_client.get_gemini_cache_stats (producer) +| |- src.app_controller.get_mma_status (producer) +| |- src.app_controller._api_token_stats (producer) +| |- src.api_hook_client.get_mma_status (producer) |- consumers: [35] -| |- src.app_controller._start_track_logic (consumer) -| |- src.project_manager.flat_config (consumer) -| |- src.ai_client._append_comms (consumer) -| |- src.app_controller._on_comms_entry (consumer) -| |- src.ai_client._invalidate_token_estimate (consumer) -| |- src.project_manager.entry_to_str (consumer) -| |- src.app_controller._offload_entry_payload (consumer) -| |- src.models._save_config_to_disk (consumer) -| |- src.aggregate.run (consumer) -| |- src.project_manager.migrate_from_legacy_config (consumer) -| |- src.ai_client._dashscope_call (consumer) -| |- src.ai_client._strip_cache_controls (consumer) -| |- src.ai_client._add_history_cache_breakpoint (consumer) -| |- src.aggregate.build_markdown_from_items (consumer) -| |- src.ai_client._trim_anthropic_history (consumer) -| |- src.ai_client._repair_deepseek_history (consumer) -| |- src.aggregate.build_tier3_context (consumer) -| |- src.app_controller._start_track_logic_result (consumer) -| |- src.aggregate.build_markdown_no_history (consumer) -| |- src.ai_client._add_bleed_derived (consumer) -| |- src.ai_client.ollama_chat (consumer) -| |- src.aggregate._build_files_section_from_items (consumer) -| |- src.ai_client._estimate_message_tokens (consumer) -| |- src.ai_client._trim_minimax_history (consumer) -| |- src.project_manager.format_discussion (consumer) -| |- src.ai_client._strip_stale_file_refreshes (consumer) -| |- src.ai_client._strip_private_keys (consumer) -| |- src.app_controller._refresh_api_metrics (consumer) -| |- src.project_manager.save_project (consumer) -| |- src.models.from_dict (consumer) -| |- src.ai_client._repair_minimax_history (consumer) -| |- src.ai_client._pre_dispatch (consumer) | |- src.ai_client._repair_anthropic_history (consumer) +| |- src.app_controller._on_comms_entry (consumer) +| |- src.ai_client._add_history_cache_breakpoint (consumer) +| |- src.ai_client._estimate_message_tokens (consumer) | |- src.ai_client._estimate_prompt_tokens (consumer) +| |- src.ai_client._strip_private_keys (consumer) +| |- src.project_manager.flat_config (consumer) +| |- src.ai_client._invalidate_token_estimate (consumer) +| |- src.ai_client._strip_cache_controls (consumer) +| |- src.ai_client._repair_deepseek_history (consumer) +| |- src.ai_client._repair_minimax_history (consumer) +| |- src.models._save_config_to_disk (consumer) +| |- src.ai_client._dashscope_call (consumer) +| |- src.models.from_dict (consumer) +| |- src.ai_client._strip_stale_file_refreshes (consumer) | |- src.ai_client._execute_single_tool_call_async (consumer) +| |- src.aggregate.run (consumer) +| |- src.aggregate._build_files_section_from_items (consumer) +| |- src.ai_client._add_bleed_derived (consumer) +| |- src.aggregate.build_markdown_from_items (consumer) +| |- src.app_controller._refresh_api_metrics (consumer) +| |- src.project_manager.entry_to_str (consumer) +| |- src.app_controller._start_track_logic_result (consumer) +| |- src.aggregate.build_tier3_context (consumer) +| |- src.ai_client._trim_minimax_history (consumer) +| |- src.ai_client._append_comms (consumer) +| |- src.project_manager.save_project (consumer) +| |- src.project_manager.migrate_from_legacy_config (consumer) +| |- src.ai_client._trim_anthropic_history (consumer) +| |- src.app_controller._offload_entry_payload (consumer) +| |- src.aggregate.build_markdown_no_history (consumer) +| |- src.ai_client.ollama_chat (consumer) +| |- src.ai_client._pre_dispatch (consumer) +| |- src.app_controller._start_track_logic (consumer) +| |- src.project_manager.format_discussion (consumer) |- access_pattern: whole_struct |- frequency: per_turn |- result_coverage: 77 producers, 35 consumers |- type_alias_coverage: 0 sites -|- cross_audit_findings: 0 findings +|- cross_audit_findings: 1 findings |- decomposition_cost: hold (470 us) |- optimization_candidates: [0] \ No newline at end of file diff --git a/docs/reports/code_path_audit/2026-06-22/cross_audit_summary.md b/docs/reports/code_path_audit/2026-06-22/cross_audit_summary.md index 02689057..21ee8950 100644 --- a/docs/reports/code_path_audit/2026-06-22/cross_audit_summary.md +++ b/docs/reports/code_path_audit/2026-06-22/cross_audit_summary.md @@ -2,7 +2,7 @@ | Aggregate | weak_types | exception_handling | optional_in_baseline | config_io | import_graph | total | |---|---|---|---|---|---|---| -| Metadata | 0 | 0 | 0 | 0 | 0 | 0 | +| Metadata | 0 | 0 | 1 | 0 | 0 | 1 | | FileItem | 0 | 0 | 0 | 0 | 0 | 0 | | FileItems | 0 | 0 | 0 | 0 | 0 | 0 | | CommsLogEntry | 0 | 0 | 0 | 0 | 0 | 0 | diff --git a/src/code_path_audit.py b/src/code_path_audit.py index 24c9f17f..74e4008f 100644 --- a/src/code_path_audit.py +++ b/src/code_path_audit.py @@ -1043,8 +1043,15 @@ def synthesize_aggregate_profile( audit_inputs: dict[str, dict], overrides: dict, is_candidate: bool, + _full_pcg_producers: dict[str, list[FunctionRef]] | None = None, + _full_pcg_consumers: dict[str, list[FunctionRef]] | None = None, ) -> AggregateProfile: - """Synthesize one AggregateProfile.""" + """Synthesize one AggregateProfile. + + _full_pcg_producers and _full_pcg_consumers are the full PCG dicts + across all aggregates (used for cross-audit mapping). If not provided, + fall back to this aggregate's refs only. + """ if is_candidate: return AggregateProfile( name=aggregate, @@ -1082,25 +1089,18 @@ def synthesize_aggregate_profile( consumers, aggregate, type_registry, "src" ) tac = compute_real_type_alias_coverage(aggregate, producers, consumers, type_registry, "src") - cross_findings = CrossAuditFindings((), (), (), (), ()) - for audit_name in ("audit_weak_types", "audit_exception_handling"): + from src.code_path_audit_cross_audit import ( + aggregate_findings, + build_cross_audit_findings_for_aggregate, + ) + full_producers = _full_pcg_producers if _full_pcg_producers is not None else pcg_producers + full_consumers = _full_pcg_consumers if _full_pcg_consumers is not None else pcg_consumers + aggregated: dict[str, dict[str, list]] = {} + for audit_name in ("audit_weak_types", "audit_exception_handling", "audit_optional_in_3_files", "audit_no_models_config_io", "audit_main_thread_imports"): if audit_name in audit_inputs: findings = audit_inputs[audit_name].get("findings", []) - example_file = findings[0].get("file", "") if findings else "" - example_line = findings[0].get("line", 0) if findings else 0 - matched = aggregate_cross_audit_findings( - audit_name=audit_name, - findings=findings, - example_file=example_file, - example_line=example_line, - ) - cross_findings = CrossAuditFindings( - weak_types=cross_findings.weak_types + matched.weak_types, - exception_handling=cross_findings.exception_handling + matched.exception_handling, - optional_in_baseline=cross_findings.optional_in_baseline + matched.optional_in_baseline, - config_io_ownership=cross_findings.config_io_ownership + matched.config_io_ownership, - import_graph=cross_findings.import_graph + matched.import_graph, - ) + aggregated[audit_name] = aggregate_findings(audit_name, findings, full_producers, full_consumers) + cross_findings = build_cross_audit_findings_for_aggregate(aggregate, aggregated) producer_count = len({f.fqname for f in producers}) consumer_count = len({f.fqname for f in consumers}) branches_on_errors = set() @@ -1159,11 +1159,13 @@ def run_audit( for aggregate in AGGREGATES_IN_SCOPE: profile = synthesize_aggregate_profile( aggregate=aggregate, - pcg_producers=pcg.producers, - pcg_consumers=pcg.consumers, + pcg_producers={aggregate: list(pcg.producers.get(aggregate, []))}, + pcg_consumers={aggregate: list(pcg.consumers.get(aggregate, []))}, audit_inputs=audit_inputs, overrides=overrides, is_candidate=False, + _full_pcg_producers=pcg.producers, + _full_pcg_consumers=pcg.consumers, ) profiles.append(profile) for candidate in CANDIDATE_AGGREGATES: diff --git a/src/code_path_audit_cross_audit.py b/src/code_path_audit_cross_audit.py new file mode 100644 index 00000000..ba861e0e --- /dev/null +++ b/src/code_path_audit_cross_audit.py @@ -0,0 +1,170 @@ +"""Per-aggregate cross-audit mapping. + +Maps each audit finding (file:line) to one or more aggregates +via the PCG's producers + consumers dictionaries. +""" +from __future__ import annotations +from pathlib import Path +from src.code_path_audit import ( + CrossAuditFinding, + CrossAuditFindings, + FunctionRef, + find_enclosing_function, +) + +AUDIT_BUCKET_FIELDS: dict[str, str] = { + "audit_weak_types": "weak_types", + "audit_exception_handling": "exception_handling", + "audit_optional_in_3_files": "optional_in_baseline", + "audit_no_models_config_io": "config_io_ownership", + "audit_main_thread_imports": "import_graph", +} + + +def _all_function_refs( + producers: dict[str, list[FunctionRef]], + consumers: dict[str, list[FunctionRef]], +) -> list[FunctionRef]: + """Flatten all FunctionRefs from the PCG dicts.""" + out: list[FunctionRef] = [] + for refs in producers.values(): + out.extend(refs) + for refs in consumers.values(): + out.extend(refs) + return out + +def _file_to_aggregates( + producers: dict[str, list[FunctionRef]], + consumers: dict[str, list[FunctionRef]], +) -> dict[str, set[str]]: + """Build a {file: {aggregate, ...}} index for file-level fallback mapping.""" + out: dict[str, set[str]] = {} + for aggregate, refs in producers.items(): + for r in refs: + out.setdefault(_normalize_path(r.file), set()).add(aggregate) + for aggregate, refs in consumers.items(): + for r in refs: + out.setdefault(_normalize_path(r.file), set()).add(aggregate) + return out + + +def _aggregate_for_fqname( + fqname: str, + producers: dict[str, list[FunctionRef]], + consumers: dict[str, list[FunctionRef]], +) -> str: + """Find which aggregate this FunctionRef is associated with.""" + for ag, refs in producers.items(): + if any(r.fqname == fqname for r in refs): + return ag + for ag, refs in consumers.items(): + if any(r.fqname == fqname for r in refs): + return ag + return "" + + +def _normalize_path(p: str) -> str: + """Normalize file path separators for comparison.""" + return p.replace("\\", "/") + + +def map_finding_to_aggregates( + file: str, + line: int, + producers: dict[str, list[FunctionRef]], + consumers: dict[str, list[FunctionRef]], +) -> set[str]: + """Map a (file, line) finding to a set of aggregate names. + + Tier 1: function lookup via find_enclosing_function (with line=0 fallback + to file-only match). Tier 2: file heuristic via the PCG's file index. + + File paths are normalized to forward-slash form for comparison. + """ + all_refs = _all_function_refs(producers, consumers) + normalized = _normalize_path(file) + fref = find_enclosing_function(file=normalized, line=line, function_refs=all_refs) + if fref is None: + same_file = [r for r in all_refs if _normalize_path(r.file) == normalized] + return {_aggregate_for_fqname(r.fqname, producers, consumers) for r in same_file} + return {_aggregate_for_fqname(fref.fqname, producers, consumers)} + + +def aggregate_findings( + audit_name: str, + findings: list[dict], + producers: dict[str, list[FunctionRef]], + consumers: dict[str, list[FunctionRef]], +) -> dict[str, list[CrossAuditFinding]]: + """Group findings by aggregate via the PCG. + + Mapping tiers: + 1. Function lookup (find_enclosing_function) -> exact match + 2. File-level fallback (file has any producer/consumer of the aggregate) + 3. Unbucketed (the file has no Metadata-touching functions) + """ + out: dict[str, list[CrossAuditFinding]] = {} + file_index = _file_to_aggregates(producers, consumers) + for finding in findings: + file = finding.get("file", "") or finding.get("filename", "") + line = int(finding.get("line", 0) or 0) + note = finding.get("category", "") or finding.get("body_summary", "") or finding.get("note", "") or "" + aggregates = map_finding_to_aggregates(file, line, producers, consumers) + if not aggregates: + normalized = _normalize_path(file) + aggregates = file_index.get(normalized, set()) + if not aggregates: + aggregates = {""} + for aggregate in aggregates: + cf = CrossAuditFinding( + audit_script=audit_name, + site_count=1, + example_file=file, + example_line=line, + note=note, + ) + out.setdefault(aggregate, []).append(cf) + return out + + +def build_cross_audit_findings_for_aggregate( + aggregate: str, + aggregated: dict[str, dict[str, list[CrossAuditFinding]]], +) -> CrossAuditFindings: + """Build a CrossAuditFindings struct for one aggregate from aggregated data.""" + weak = () + exc = () + opt = () + cfg = () + imp = () + for audit_name, by_agg in aggregated.items(): + findings = by_agg.get(aggregate, []) + if not findings: + continue + bucket = AUDIT_BUCKET_FIELDS.get(audit_name, "") + total = len(findings) + first = findings[0] + combined = CrossAuditFinding( + audit_script=audit_name, + site_count=total, + example_file=first.example_file, + example_line=first.example_line, + note=f"{total} sites", + ) + if bucket == "weak_types": + weak = (combined,) + elif bucket == "exception_handling": + exc = (combined,) + elif bucket == "optional_in_baseline": + opt = (combined,) + elif bucket == "config_io_ownership": + cfg = (combined,) + elif bucket == "import_graph": + imp = (combined,) + return CrossAuditFindings( + weak_types=weak, + exception_handling=exc, + optional_in_baseline=opt, + config_io_ownership=cfg, + import_graph=imp, + ) \ No newline at end of file