forth_bootslop/estimate_context.py

import os

REFERENCES_DIR = "C:/projects/forth/bootslop/references"

def estimate_tokens():
    text_extensions = {".txt", ".md", ".csv"}
    image_extensions = {".jpg", ".jpeg", ".png"}

    total_text_chars = 0
    total_images = 0

    for root, _, files in os.walk(REFERENCES_DIR):
        for file in files:
            ext = os.path.splitext(file)[1].lower()
            filepath = os.path.join(root, file)

            if ext in text_extensions:
                try:
                    with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
                        total_text_chars += len(f.read())
                except Exception as e:
                    print(f"Error reading {filepath}: {e}")
            elif ext in image_extensions:
                total_images += 1

    # Approximate 1 token = 4 characters for English text
    estimated_text_tokens = total_text_chars // 4
    # Approximate Gemini 1.5 image token cost (typically 258 tokens per image)
    estimated_image_tokens = total_images * 258

    total_tokens = estimated_text_tokens + estimated_image_tokens

    print(f"Total Text Files Scanned: {total_text_chars} characters (~{estimated_text_tokens} tokens)")
    print(f"Total Images Scanned: {total_images} images (~{estimated_image_tokens} tokens)")
    print(f"---")
    print(f"Estimated Total Context Cost: ~{total_tokens} tokens")

if __name__ == "__main__":
    estimate_tokens()