Files
forth_bootslop/estimate_context.py
2026-02-19 16:37:12 -05:00

40 lines
1.4 KiB
Python

import os
REFERENCES_DIR = "C:/projects/forth/bootslop/references"
def estimate_tokens():
text_extensions = {".txt", ".md", ".csv"}
image_extensions = {".jpg", ".jpeg", ".png"}
total_text_chars = 0
total_images = 0
for root, _, files in os.walk(REFERENCES_DIR):
for file in files:
ext = os.path.splitext(file)[1].lower()
filepath = os.path.join(root, file)
if ext in text_extensions:
try:
with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
total_text_chars += len(f.read())
except Exception as e:
print(f"Error reading {filepath}: {e}")
elif ext in image_extensions:
total_images += 1
# Approximate 1 token = 4 characters for English text
estimated_text_tokens = total_text_chars // 4
# Approximate Gemini 1.5 image token cost (typically 258 tokens per image)
estimated_image_tokens = total_images * 258
total_tokens = estimated_text_tokens + estimated_image_tokens
print(f"Total Text Files Scanned: {total_text_chars} characters (~{estimated_text_tokens} tokens)")
print(f"Total Images Scanned: {total_images} images (~{estimated_image_tokens} tokens)")
print(f"---")
print(f"Estimated Total Context Cost: ~{total_tokens} tokens")
if __name__ == "__main__":
estimate_tokens()