diff --git a/src/ai_server.py b/src/ai_server.py
deleted file mode 100644
index df5a7e3..0000000
--- a/src/ai_server.py
+++ /dev/null
@@ -1,259 +0,0 @@
-#!/usr/bin/env python
-import json
-import sys
-import os
-import threading
-import hashlib
-import time
-import datetime
-from typing import Any, Optional
-
-_google_genai = None
-_anthropic = None
-_deepseek_client = None
-_minimax_client = None
-
-_providers = {
-    "gemini": ["gemini-2.5-flash-lite", "gemini-3-flash-preview", "gemini-3.1-pro-preview"],
-    "anthropic": ["claude-sonnet-4-20250514", "claude-3-5-sonnet-20241022"],
-    "deepseek": ["deepseek-chat", "deepseek-reasoner"],
-    "minimax": ["MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1", "MiniMax-M2"],
-    "gemini_cli": ["gemini-3-flash-preview", "gemini-3.1-pro-preview", "gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.0-flash", "gemini-2.5-flash-lite"],
-}
-
-_session_state = {
-    "provider": "gemini",
-    "model": "gemini-2.5-flash-lite",
-    "temperature": 0.0,
-    "top_p": 1.0,
-    "max_tokens": 8192,
-    "custom_system_prompt": "",
-    "base_system_prompt_override": "",
-    "use_default_base_prompt": True,
-    "project_context_marker": "",
-    "agent_tools": {},
-    "gemini_cache": None,
-    "gemini_cache_md_hash": None,
-    "gemini_cache_created_at": None,
-    "gemini_cached_file_paths": [],
-}
-
-_history = {
-    "gemini": [],
-    "anthropic": [],
-    "deepseek": [],
-    "minimax": [],
-}
-
-def _ensure_google_genai():
-    global _google_genai
-    if _google_genai is None:
-        from google import genai
-        _google_genai = genai
-    return _google_genai
-
-def _ensure_anthropic():
-    global _anthropic
-    if _anthropic is None:
-        import anthropic
-        _anthropic = anthropic
-    return _anthropic
-
-def _load_credentials():
-    import tomllib
-    cred_path = os.environ.get("SLOP_CREDENTIALS", str(os.path.join(os.path.dirname(__file__), "..", "credentials.toml")))
-    with open(cred_path, "rb") as f:
-        return tomllib.load(f)
-
-def handle_command(cmd: dict) -> dict:
-    method = cmd.get("method", "")
-    params = cmd.get("params", {})
-    cmd_id = cmd.get("id")
-
-    if method == "list_models":
-        provider = params.get("provider", "gemini")
-        if provider in _providers:
-            return {"id": cmd_id, "result": {"models": _providers[provider]}}
-        if provider == "gemini":
-            try:
-                client = _ensure_google_genai().Client(api_key=_load_credentials()["gemini"]["api_key"])
-                models = []
-                for m in client.models.list():
-                    name = m.name
-                    if name and name.startswith("models/"):
-                        name = name[len("models/"):]
-                    if name and "gemini" in name.lower():
-                        models.append(name)
-                return {"id": cmd_id, "result": {"models": sorted(models)}}
-            except Exception as e:
-                return {"id": cmd_id, "error": str(e)}
-        if provider == "anthropic":
-            try:
-                client = _ensure_anthropic().Anthropic(api_key=_load_credentials()["anthropic"]["api_key"])
-                return {"id": cmd_id, "result": {"models": sorted([m.id for m in client.models.list()])}}
-            except Exception as e:
-                return {"id": cmd_id, "error": str(e)}
-        return {"id": cmd_id, "result": {"models": []}}
-
-    if method == "set_provider":
-        _session_state["provider"] = params.get("provider", "gemini")
-        _session_state["model"] = params.get("model", "gemini-2.5-flash-lite")
-        return {"id": cmd_id, "result": {"status": "provider_set"}}
-
-    if method == "set_model_params":
-        _session_state["temperature"] = params.get("temperature", 0.0)
-        _session_state["top_p"] = params.get("top_p", 1.0)
-        _session_state["max_tokens"] = params.get("max_tokens", 8192)
-        return {"id": cmd_id, "result": {"status": "params_set"}}
-
-    if method == "cleanup":
-        global _google_genai
-        if _session_state["gemini_cache"]:
-            try:
-                _ensure_google_genai().Client(api_key=_load_credentials()["gemini"]["api_key"]).caches.delete(name=_session_state["gemini_cache"].name)
-            except Exception:
-                pass
-        _session_state["gemini_cache"] = None
-        _session_state["gemini_cached_file_paths"] = []
-        return {"id": cmd_id, "result": {"status": "cleaned"}}
-
-    if method == "reset_session":
-        _history["gemini"] = []
-        _history["anthropic"] = []
-        _history["deepseek"] = []
-        _history["minimax"] = []
-        _session_state["gemini_cache"] = None
-        _session_state["gemini_cache_md_hash"] = None
-        _session_state["gemini_cache_created_at"] = None
-        _session_state["gemini_cached_file_paths"] = []
-        return {"id": cmd_id, "result": {"status": "reset"}}
-
-    if method == "get_gemini_cache_stats":
-        try:
-            client = _ensure_google_genai().Client(api_key=_load_credentials()["gemini"]["api_key"])
-            caches = list(client.caches.list())
-            total_size = sum(getattr(c, 'size_bytes', 0) for c in caches)
-            return {"id": cmd_id, "result": {"cache_count": len(caches), "total_size_bytes": total_size, "cached_files": _session_state["gemini_cached_file_paths"]}}
-        except Exception as e:
-            return {"id": cmd_id, "result": {"cache_count": 0, "total_size_bytes": 0, "cached_files": []}}
-
-    if method == "send":
-        return _handle_send(cmd_id, params)
-
-    if method == "get_token_stats":
-        md_content = params.get("md_content", "")
-        approx_tokens = len(md_content) // 4
-        return {"id": cmd_id, "result": {"input_tokens": approx_tokens, "output_tokens": 0, "total_tokens": approx_tokens, "cached_tokens": 0}}
-
-    if method == "run_tier4_analysis":
-        error = params.get("error", "")
-        return {"id": cmd_id, "result": {"analysis": f"Analysis: {error[:100]}..."}}
-
-    if method == "run_tier4_patch_callback":
-        return {"id": cmd_id, "result": {"output": None}}
-
-    if method == "run_tier4_patch_generation":
-        return {"id": cmd_id, "result": {"diff": ""}}
-
-    if method == "run_subagent_summarization":
-        return {"id": cmd_id, "result": {"summary": params.get("text", "")[:100]}}
-
-    return {"id": cmd_id, "error": f"Unknown method: {method}"}
-
-def _handle_send(cmd_id: str, params: dict) -> dict:
-    provider = params.get("provider", _session_state.get("provider", "gemini"))
-    md_content = params.get("md_content", "")
-    user_message = params.get("user_message", "")
-    base_dir = params.get("base_dir", "")
-    enable_tools = params.get("enable_tools", True)
-
-    try:
-        if provider == "gemini":
-            response = _send_gemini(md_content, user_message, base_dir, enable_tools)
-        elif provider == "anthropic":
-            response = _send_anthropic(md_content, user_message)
-        elif provider == "deepseek":
-            response = _send_deepseek(md_content, user_message)
-        elif provider == "minimax":
-            response = _send_minimax(md_content, user_message)
-        elif provider == "gemini_cli":
-            response = _send_gemini_cli(md_content, user_message, base_dir)
-        else:
-            response = f"ERROR: Unknown provider {provider}"
-
-        return {"id": cmd_id, "result": {"response": response, "provider": provider}}
-    except Exception as e:
-        return {"id": cmd_id, "error": str(e)}
-
-def _send_gemini(md_content: str, user_message: str, base_dir: str, enable_tools: bool) -> str:
-    client = _ensure_google_genai().Client(api_key=_load_credentials()["gemini"]["api_key"])
-    model = _session_state.get("model", "gemini-2.5-flash-lite")
-
-    system_instruction = f"{_session_state.get('custom_system_prompt', '')}\n\n<context>\n{md_content}\n</context>"
-
-    config = {
-        "temperature": _session_state.get("temperature", 0.0),
-        "top_p": _session_state.get("top_p", 1.0),
-        "max_output_tokens": _session_state.get("max_tokens", 8192),
-    }
-
-    response = client.models.generate_content(model=model, contents=user_message, config=config)
-    return response.text
-
-def _send_anthropic(md_content: str, user_message: str) -> str:
-    client = _ensure_anthropic().Anthropic(api_key=_load_credentials()["anthropic"]["api_key"])
-
-    response = client.messages.create(
-        model=_session_state.get("model", "claude-sonnet-4-20250514"),
-        max_tokens=_session_state.get("max_tokens", 8192),
-        system=f"{_session_state.get('custom_system_prompt', '')}\n\n<context>\n{md_content}\n</context>",
-        messages=[{"role": "user", "content": user_message}]
-    )
-    return response.content[0].text
-
-def _send_deepseek(md_content: str, user_message: str) -> str:
-    from openai import OpenAI
-    global _deepseek_client
-    if _deepseek_client is None:
-        _deepseek_client = OpenAI(api_key=_load_credentials()["deepseek"]["api_key"], base_url="https://api.deepseek.com")
-
-    response = _deepseek_client.chat.completions.create(
-        model=_session_state.get("model", "deepseek-chat"),
-        messages=[{"role": "system", "content": f"{_session_state.get('custom_system_prompt', '')}\n\n<context>\n{md_content}\n</context>"}, {"role": "user", "content": user_message}]
-    )
-    return response.choices[0].message.content
-
-def _send_minimax(md_content: str, user_message: str) -> str:
-    from openai import OpenAI
-    global _minimax_client
-    if _minimax_client is None:
-        creds = _load_credentials()
-        _minimax_client = OpenAI(api_key=creds["minimax"]["api_key"], base_url="https://api.minimax.io/v1")
-
-    response = _minimax_client.chat.completions.create(
-        model=_session_state.get("model", "MiniMax-M2.5"),
-        messages=[{"role": "system", "content": f"{_session_state.get('custom_system_prompt', '')}\n\n<context>\n{md_content}\n</context>"}, {"role": "user", "content": user_message}]
-    )
-    return response.choices[0].message.content
-
-def _send_gemini_cli(md_content: str, user_message: str, base_dir: str) -> str:
-    return f"[gemini_cli] {user_message[:50]}..."
-
-def main():
-    print(json.dumps({"type": "ready"}), flush=True)
-
-    for line in sys.stdin:
-        line = line.strip()
-        if not line:
-            continue
-        try:
-            cmd = json.loads(line)
-            response = handle_command(cmd)
-            print(json.dumps(response), flush=True)
-        except json.JSONDecodeError as e:
-            print(json.dumps({"error": f"Invalid JSON: {e}"}), flush=True)
-        except Exception as e:
-            print(json.dumps({"error": str(e)}), flush=True)
-
-if __name__ == "__main__":
-    main()
\ No newline at end of file
diff --git a/tests/test_ai_server.py b/tests/test_ai_server.py
deleted file mode 100644
index 493f83e..0000000
--- a/tests/test_ai_server.py
+++ /dev/null
@@ -1,97 +0,0 @@
-import pytest
-import subprocess
-import json
-import time
-import sys
-import os
-
-
-def test_server_starts_and_exits_cleanly():
-    proc = subprocess.Popen(
-        [sys.executable, "-m", "src.ai_server"],
-        stdin=subprocess.PIPE,
-        stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE,
-        text=True,
-    )
-    proc.stdin.close()
-    proc.stdout.close()
-    proc.stderr.close()
-    proc.wait(timeout=5)
-    assert proc.returncode in (0, 120)
-
-
-def test_server_outputs_ready_marker():
-    proc = subprocess.Popen(
-        [sys.executable, "-m", "src.ai_server"],
-        stdin=subprocess.PIPE,
-        stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE,
-        text=True,
-    )
-    line = proc.stdout.readline()
-    proc.stdin.close()
-    proc.stdout.close()
-    proc.wait(timeout=5)
-    data = json.loads(line)
-    assert data.get("type") == "ready"
-
-
-def test_server_handles_unknown_method():
-    proc = subprocess.Popen(
-        [sys.executable, "-m", "src.ai_server"],
-        stdin=subprocess.PIPE,
-        stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE,
-        text=True,
-    )
-    proc.stdout.readline()
-    cmd = json.dumps({"id": "1", "method": "unknown_method", "params": {}})
-    proc.stdin.write(cmd + "\n")
-    proc.stdin.flush()
-    resp = proc.stdout.readline()
-    proc.stdin.close()
-    proc.stdout.close()
-    proc.wait(timeout=5)
-    data = json.loads(resp)
-    assert "error" in data
-    assert "Unknown method" in data["error"]
-
-
-def test_server_handles_list_models():
-    proc = subprocess.Popen(
-        [sys.executable, "-m", "src.ai_server"],
-        stdin=subprocess.PIPE,
-        stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE,
-        text=True,
-    )
-    proc.stdout.readline()
-    cmd = json.dumps({"id": "1", "method": "list_models", "params": {"provider": "gemini"}})
-    proc.stdin.write(cmd + "\n")
-    proc.stdin.flush()
-    resp = proc.stdout.readline()
-    proc.stdin.close()
-    proc.stdout.close()
-    proc.wait(timeout=5)
-    data = json.loads(resp)
-    assert "result" in data
-    assert "models" in data["result"]
-
-
-def test_server_loads_google_genai_quickly():
-    proc = subprocess.Popen(
-        [sys.executable, "-m", "src.ai_server"],
-        stdin=subprocess.PIPE,
-        stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE,
-        text=True,
-    )
-    start = time.time()
-    ready_line = proc.stdout.readline()
-    elapsed = time.time() - start
-    proc.stdin.close()
-    proc.stdout.close()
-    proc.wait(timeout=10)
-    assert elapsed < 5, f"Server took {elapsed}s to start"
-    assert proc.returncode == 0
\ No newline at end of file