diff --git a/src/ai_client.py b/src/ai_client.py
index a9dd1ad8..34fa78b9 100644
--- a/src/ai_client.py
+++ b/src/ai_client.py
@@ -2901,19 +2901,20 @@ def _send_llama(md_content: str, user_message: str, base_dir: str,
    return _send_llama_native(md_content, user_message, base_dir, file_items, discussion_history, stream, pre_tool_callback, qa_callback, stream_callback, patch_callback)
   client = _ensure_llama_client()
   tools: list[Metadata] | None = _get_deepseek_tools() or None
-  with _llama_history_lock:
+  history = provider_state.get_history("llama")
+  with history.lock:
    user_content = user_message
    if file_items:
     for fi in file_items:
      if fi.get("is_image") and fi.get("base64_data"):
       user_content = f"[IMAGE: {fi.get('path', 'attachment')}]\n{user_content}"
-   if discussion_history and not _llama_history:
-    _llama_history.append({"role": "user", "content": f"[DISCUSSION HISTORY]\n\n{discussion_history}\n\n---\n\n{user_message}"})
+   if discussion_history and not history:
+    history.append({"role": "user", "content": f"[DISCUSSION HISTORY]\n\n{discussion_history}\n\n---\n\n{user_message}"})
    else:
-    _llama_history.append({"role": "user", "content": user_content})
+    history.append({"role": "user", "content": user_content})
   def _build_llama_request(_round_idx: int) -> OpenAICompatibleRequest:
-   with _llama_history_lock:
-    history_msgs: list[ChatMessage] = [ChatMessage(role=m["role"], content=m["content"]) for m in _llama_history]
+   with history.lock:
+    history_msgs: list[ChatMessage] = [ChatMessage(role=m["role"], content=m["content"]) for m in history]
     messages: list[ChatMessage] = [ChatMessage(role="system", content=f"{_get_combined_system_prompt()}\n\n<context>\n{md_content}\n</context>")]
     messages.extend(history_msgs)
    return OpenAICompatibleRequest(
@@ -2926,7 +2927,7 @@ def _send_llama(md_content: str, user_message: str, base_dir: str,
    client, _build_llama_request, capabilities=caps,
    pre_tool_callback=pre_tool_callback, qa_callback=qa_callback, stream_callback=stream_callback,
    patch_callback=patch_callback, base_dir=base_dir, vendor_name="llama",
-   history_lock=_llama_history_lock, history=_llama_history,
+   history_lock=history.lock, history=history,
   ))
  except Exception as exc:
   return Result(data="", errors=[_classify_openai_compatible_error(exc, source="ai_client.llama")])
@@ -2995,13 +2996,14 @@ def _send_llama_native(md_content: str, user_message: str, base_dir: str,
  """
  try:
   base_url = _llama_base_url.replace("/v1", "")
-  with _llama_history_lock:
-   if discussion_history and not _llama_history:
-    _llama_history.append({"role": "user", "content": f"[DISCUSSION HISTORY]\n\n{discussion_history}\n\n---\n\n{user_message}"})
+  history = provider_state.get_history("llama")
+  with history.lock:
+   if discussion_history and not history:
+    history.append({"role": "user", "content": f"[DISCUSSION HISTORY]\n\n{discussion_history}\n\n---\n\n{user_message}"})
    else:
-    _llama_history.append({"role": "user", "content": user_message})
+    history.append({"role": "user", "content": user_message})
    messages: list[Metadata] = [{"role": "system", "content": f"{_get_combined_system_prompt()}\n\n<context>\n{md_content}\n</context>"}]
-   messages.extend(_llama_history)
+   messages.extend(history)
    images: list[str] = []
    if file_items:
     for fi in file_items:
@@ -3010,11 +3012,11 @@ def _send_llama_native(md_content: str, user_message: str, base_dir: str,
   response = ollama_chat(_model, messages, images=images, base_url=base_url)
   text = response.get("message", {}).get("content", "")
   thinking = response.get("message", {}).get("thinking", "")
-  with _llama_history_lock:
+  with history.lock:
    msg: Metadata = {"role": "assistant", "content": text or None}
    if thinking:
     msg["thinking"] = thinking
-   _llama_history.append(msg)
+   history.append(msg)
   return Result(data=(f"<thinking>\n{thinking}\n</thinking>\n" if thinking else "") + text)
  except Exception as exc:
   return Result(data="", errors=[ErrorInfo(kind=ErrorKind.INTERNAL, message=str(exc), source="ai_client.llama_native", original=exc)])