import os REFERENCES_DIR = "C:/projects/forth/bootslop/references" def estimate_tokens(): text_extensions = {".txt", ".md", ".csv"} image_extensions = {".jpg", ".jpeg", ".png"} total_text_chars = 0 total_images = 0 for root, _, files in os.walk(REFERENCES_DIR): for file in files: ext = os.path.splitext(file)[1].lower() filepath = os.path.join(root, file) if ext in text_extensions: try: with open(filepath, "r", encoding="utf-8", errors="ignore") as f: total_text_chars += len(f.read()) except Exception as e: print(f"Error reading {filepath}: {e}") elif ext in image_extensions: total_images += 1 # Approximate 1 token = 4 characters for English text estimated_text_tokens = total_text_chars // 4 # Approximate Gemini 1.5 image token cost (typically 258 tokens per image) estimated_image_tokens = total_images * 258 total_tokens = estimated_text_tokens + estimated_image_tokens print(f"Total Text Files Scanned: {total_text_chars} characters (~{estimated_text_tokens} tokens)") print(f"Total Images Scanned: {total_images} images (~{estimated_image_tokens} tokens)") print(f"---") print(f"Estimated Total Context Cost: ~{total_tokens} tokens") if __name__ == "__main__": estimate_tokens()