rever client for now bugged
This commit is contained in:
129
ai_client.py
129
ai_client.py
@@ -14,7 +14,7 @@ _anthropic_client = None
|
|||||||
_anthropic_history: list[dict] = []
|
_anthropic_history: list[dict] = []
|
||||||
|
|
||||||
# Injected by gui.py - called when AI wants to run a command.
|
# Injected by gui.py - called when AI wants to run a command.
|
||||||
# Signature: (script: str, base_dir: str) -> str | None
|
# Signature: (script: str) -> str | None
|
||||||
# Returns the output string if approved, None if rejected.
|
# Returns the output string if approved, None if rejected.
|
||||||
confirm_and_run_callback = None
|
confirm_and_run_callback = None
|
||||||
|
|
||||||
@@ -22,20 +22,8 @@ confirm_and_run_callback = None
|
|||||||
# Signature: (entry: dict) -> None
|
# Signature: (entry: dict) -> None
|
||||||
comms_log_callback = None
|
comms_log_callback = None
|
||||||
|
|
||||||
# Injected by gui.py - called whenever a tool call completes.
|
|
||||||
# Signature: (script: str, result: str, script_path: str | None) -> None
|
|
||||||
tool_log_callback = None
|
|
||||||
|
|
||||||
MAX_TOOL_ROUNDS = 5
|
MAX_TOOL_ROUNDS = 5
|
||||||
|
|
||||||
# Anthropic system prompt - cached as the first turn so it counts toward
|
|
||||||
# the prompt-cache prefix on every subsequent request.
|
|
||||||
_ANTHROPIC_SYSTEM = (
|
|
||||||
"You are a helpful coding assistant with access to a PowerShell tool. "
|
|
||||||
"When asked to create or edit files, prefer targeted edits over full rewrites. "
|
|
||||||
"Always explain what you are doing before invoking the tool."
|
|
||||||
)
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------ comms log
|
# ------------------------------------------------------------------ comms log
|
||||||
|
|
||||||
_comms_log: list[dict] = []
|
_comms_log: list[dict] = []
|
||||||
@@ -154,6 +142,8 @@ def _classify_anthropic_error(exc: Exception) -> ProviderError:
|
|||||||
def _classify_gemini_error(exc: Exception) -> ProviderError:
|
def _classify_gemini_error(exc: Exception) -> ProviderError:
|
||||||
"""Map a google-genai SDK exception to a ProviderError."""
|
"""Map a google-genai SDK exception to a ProviderError."""
|
||||||
body = str(exc).lower()
|
body = str(exc).lower()
|
||||||
|
# google-genai surfaces HTTP errors as google.api_core exceptions or
|
||||||
|
# google.genai exceptions; inspect the message text as a reliable fallback.
|
||||||
try:
|
try:
|
||||||
from google.api_core import exceptions as gac
|
from google.api_core import exceptions as gac
|
||||||
if isinstance(exc, gac.ResourceExhausted):
|
if isinstance(exc, gac.ResourceExhausted):
|
||||||
@@ -166,6 +156,7 @@ def _classify_gemini_error(exc: Exception) -> ProviderError:
|
|||||||
return ProviderError("network", "gemini", exc)
|
return ProviderError("network", "gemini", exc)
|
||||||
except ImportError:
|
except ImportError:
|
||||||
pass
|
pass
|
||||||
|
# Fallback: parse status code / message string
|
||||||
if "429" in body or "quota" in body or "resource exhausted" in body:
|
if "429" in body or "quota" in body or "resource exhausted" in body:
|
||||||
return ProviderError("quota", "gemini", exc)
|
return ProviderError("quota", "gemini", exc)
|
||||||
if "rate" in body and "limit" in body:
|
if "rate" in body and "limit" in body:
|
||||||
@@ -288,20 +279,12 @@ def _run_script(script: str, base_dir: str) -> str:
|
|||||||
"""
|
"""
|
||||||
Delegate to the GUI confirmation callback.
|
Delegate to the GUI confirmation callback.
|
||||||
Returns result string (stdout/stderr) or a rejection message.
|
Returns result string (stdout/stderr) or a rejection message.
|
||||||
Also fires tool_log_callback if registered.
|
|
||||||
"""
|
"""
|
||||||
if confirm_and_run_callback is None:
|
if confirm_and_run_callback is None:
|
||||||
return "ERROR: no confirmation handler registered"
|
return "ERROR: no confirmation handler registered"
|
||||||
# confirm_and_run_callback returns (result, script_path) or None
|
result = confirm_and_run_callback(script, base_dir)
|
||||||
outcome = confirm_and_run_callback(script, base_dir)
|
if result is None:
|
||||||
if outcome is None:
|
return "USER REJECTED: command was not executed"
|
||||||
result = "USER REJECTED: command was not executed"
|
|
||||||
if tool_log_callback is not None:
|
|
||||||
tool_log_callback(script, result, None)
|
|
||||||
return result
|
|
||||||
result, script_path = outcome
|
|
||||||
if tool_log_callback is not None:
|
|
||||||
tool_log_callback(script, result, script_path)
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
# ------------------------------------------------------------------ gemini
|
# ------------------------------------------------------------------ gemini
|
||||||
@@ -338,6 +321,7 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str) -> str:
|
|||||||
response = _gemini_chat.send_message(full_message)
|
response = _gemini_chat.send_message(full_message)
|
||||||
|
|
||||||
for round_idx in range(MAX_TOOL_ROUNDS):
|
for round_idx in range(MAX_TOOL_ROUNDS):
|
||||||
|
# Log the raw response candidates as text summary
|
||||||
text_parts_raw = [
|
text_parts_raw = [
|
||||||
part.text
|
part.text
|
||||||
for candidate in response.candidates
|
for candidate in response.candidates
|
||||||
@@ -399,35 +383,6 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str) -> str:
|
|||||||
raise _classify_gemini_error(exc) from exc
|
raise _classify_gemini_error(exc) from exc
|
||||||
|
|
||||||
# ------------------------------------------------------------------ anthropic
|
# ------------------------------------------------------------------ anthropic
|
||||||
#
|
|
||||||
# Caching strategy (Anthropic prompt caching):
|
|
||||||
#
|
|
||||||
# The Anthropic API caches a prefix of the input tokens. To maximise hits:
|
|
||||||
#
|
|
||||||
# 1. A persistent system prompt is sent on every request with
|
|
||||||
# cache_control={"type":"ephemeral"} so it is cached after the first call
|
|
||||||
# and reused on subsequent calls within the 5-minute TTL window.
|
|
||||||
#
|
|
||||||
# 2. The context block (aggregated markdown) is placed as the FIRST user
|
|
||||||
# message in the history and also marked with cache_control. Because the
|
|
||||||
# system prompt and the context are stable across tool-use rounds within a
|
|
||||||
# single send() call, the cache hit rate is very high after round 0.
|
|
||||||
#
|
|
||||||
# 3. Tool definitions are passed with cache_control on the last tool so the
|
|
||||||
# entire tools array is also cached.
|
|
||||||
#
|
|
||||||
# Token accounting: the response payload contains cache_creation_input_tokens
|
|
||||||
# and cache_read_input_tokens in addition to the regular input_tokens field.
|
|
||||||
# These are included in the comms log under "usage".
|
|
||||||
|
|
||||||
def _anthropic_tools_with_cache() -> list[dict]:
|
|
||||||
"""Return the tools list with cache_control on the last entry."""
|
|
||||||
import copy
|
|
||||||
tools = copy.deepcopy(_ANTHROPIC_TOOLS)
|
|
||||||
# Mark the last tool so the entire prefix (system + tools) gets cached
|
|
||||||
tools[-1]["cache_control"] = {"type": "ephemeral"}
|
|
||||||
return tools
|
|
||||||
|
|
||||||
|
|
||||||
def _ensure_anthropic_client():
|
def _ensure_anthropic_client():
|
||||||
global _anthropic_client
|
global _anthropic_client
|
||||||
@@ -436,7 +391,6 @@ def _ensure_anthropic_client():
|
|||||||
creds = _load_credentials()
|
creds = _load_credentials()
|
||||||
_anthropic_client = anthropic.Anthropic(api_key=creds["anthropic"]["api_key"])
|
_anthropic_client = anthropic.Anthropic(api_key=creds["anthropic"]["api_key"])
|
||||||
|
|
||||||
|
|
||||||
def _send_anthropic(md_content: str, user_message: str, base_dir: str) -> str:
|
def _send_anthropic(md_content: str, user_message: str, base_dir: str) -> str:
|
||||||
global _anthropic_history
|
global _anthropic_history
|
||||||
import anthropic
|
import anthropic
|
||||||
@@ -444,54 +398,19 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str) -> str:
|
|||||||
try:
|
try:
|
||||||
_ensure_anthropic_client()
|
_ensure_anthropic_client()
|
||||||
|
|
||||||
# ----------------------------------------------------------------
|
full_message = f"<context>\n{md_content}\n</context>\n\n{user_message}"
|
||||||
# Build the user turn.
|
_anthropic_history.append({"role": "user", "content": full_message})
|
||||||
#
|
|
||||||
# Structure the content as two blocks so the large context portion
|
|
||||||
# can be cached independently of the user question:
|
|
||||||
#
|
|
||||||
# [0] context block <- cache_control applied here
|
|
||||||
# [1] user question <- not cached (changes every turn)
|
|
||||||
#
|
|
||||||
# The Anthropic cache anchors at the LAST cache_control marker in
|
|
||||||
# the prefix, so everything up to and including the context block
|
|
||||||
# will be served from cache on subsequent rounds.
|
|
||||||
# ----------------------------------------------------------------
|
|
||||||
user_content = [
|
|
||||||
{
|
|
||||||
"type": "text",
|
|
||||||
"text": f"<context>\n{md_content}\n</context>",
|
|
||||||
"cache_control": {"type": "ephemeral"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "text",
|
|
||||||
"text": user_message,
|
|
||||||
},
|
|
||||||
]
|
|
||||||
|
|
||||||
_anthropic_history.append({"role": "user", "content": user_content})
|
|
||||||
|
|
||||||
_append_comms("OUT", "request", {
|
_append_comms("OUT", "request", {
|
||||||
"message": f"<context>\n{md_content}\n</context>\n\n{user_message}",
|
"message": full_message,
|
||||||
})
|
})
|
||||||
|
|
||||||
for round_idx in range(MAX_TOOL_ROUNDS):
|
for round_idx in range(MAX_TOOL_ROUNDS):
|
||||||
response = _anthropic_client.messages.create(
|
response = _anthropic_client.messages.create(
|
||||||
model=_model,
|
model=_model,
|
||||||
max_tokens=8096,
|
max_tokens=8096,
|
||||||
system=[
|
tools=_ANTHROPIC_TOOLS,
|
||||||
{
|
messages=_anthropic_history
|
||||||
"type": "text",
|
|
||||||
"text": _ANTHROPIC_SYSTEM,
|
|
||||||
"cache_control": {"type": "ephemeral"},
|
|
||||||
}
|
|
||||||
],
|
|
||||||
tools=_anthropic_tools_with_cache(),
|
|
||||||
messages=_anthropic_history,
|
|
||||||
# Ask the API to return cache token counts
|
|
||||||
# betas=["prompt-caching-2024-07-31"],
|
|
||||||
# TODO(Claude): betas is not a valid field:
|
|
||||||
# ERROR: Messages.create() got an unexpected keyword argument 'betas'
|
|
||||||
)
|
)
|
||||||
|
|
||||||
_anthropic_history.append({
|
_anthropic_history.append({
|
||||||
@@ -499,34 +418,22 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str) -> str:
|
|||||||
"content": response.content
|
"content": response.content
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# Summarise the response content for the log
|
||||||
text_blocks = [b.text for b in response.content if hasattr(b, "text") and b.text]
|
text_blocks = [b.text for b in response.content if hasattr(b, "text") and b.text]
|
||||||
tool_use_blocks = [
|
tool_use_blocks = [
|
||||||
{"id": b.id, "name": b.name, "input": b.input}
|
{"id": b.id, "name": b.name, "input": b.input}
|
||||||
for b in response.content
|
for b in response.content
|
||||||
if b.type == "tool_use"
|
if b.type == "tool_use"
|
||||||
]
|
]
|
||||||
|
|
||||||
# Extended usage includes cache fields when the beta header is set
|
|
||||||
usage_dict: dict = {}
|
|
||||||
if response.usage:
|
|
||||||
usage_dict = {
|
|
||||||
"input_tokens": response.usage.input_tokens,
|
|
||||||
"output_tokens": response.usage.output_tokens,
|
|
||||||
}
|
|
||||||
# cache fields are present when the beta is active
|
|
||||||
cache_creation = getattr(response.usage, "cache_creation_input_tokens", None)
|
|
||||||
cache_read = getattr(response.usage, "cache_read_input_tokens", None)
|
|
||||||
if cache_creation is not None:
|
|
||||||
usage_dict["cache_creation_input_tokens"] = cache_creation
|
|
||||||
if cache_read is not None:
|
|
||||||
usage_dict["cache_read_input_tokens"] = cache_read
|
|
||||||
|
|
||||||
_append_comms("IN", "response", {
|
_append_comms("IN", "response", {
|
||||||
"round": round_idx,
|
"round": round_idx,
|
||||||
"stop_reason": response.stop_reason,
|
"stop_reason": response.stop_reason,
|
||||||
"text": "\n".join(text_blocks),
|
"text": "\n".join(text_blocks),
|
||||||
"tool_calls": tool_use_blocks,
|
"tool_calls": tool_use_blocks,
|
||||||
"usage": usage_dict,
|
"usage": {
|
||||||
|
"input_tokens": response.usage.input_tokens,
|
||||||
|
"output_tokens": response.usage.output_tokens,
|
||||||
|
} if response.usage else {},
|
||||||
})
|
})
|
||||||
|
|
||||||
if response.stop_reason != "tool_use":
|
if response.stop_reason != "tool_use":
|
||||||
|
|||||||
Reference in New Issue
Block a user