curation & gather
This commit is contained in:
39
estimate_context.py
Normal file
39
estimate_context.py
Normal file
@@ -0,0 +1,39 @@
|
||||
import os
|
||||
|
||||
REFERENCES_DIR = "C:/projects/forth/bootslop/references"
|
||||
|
||||
def estimate_tokens():
|
||||
text_extensions = {".txt", ".md", ".csv"}
|
||||
image_extensions = {".jpg", ".jpeg", ".png"}
|
||||
|
||||
total_text_chars = 0
|
||||
total_images = 0
|
||||
|
||||
for root, _, files in os.walk(REFERENCES_DIR):
|
||||
for file in files:
|
||||
ext = os.path.splitext(file)[1].lower()
|
||||
filepath = os.path.join(root, file)
|
||||
|
||||
if ext in text_extensions:
|
||||
try:
|
||||
with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
|
||||
total_text_chars += len(f.read())
|
||||
except Exception as e:
|
||||
print(f"Error reading {filepath}: {e}")
|
||||
elif ext in image_extensions:
|
||||
total_images += 1
|
||||
|
||||
# Approximate 1 token = 4 characters for English text
|
||||
estimated_text_tokens = total_text_chars // 4
|
||||
# Approximate Gemini 1.5 image token cost (typically 258 tokens per image)
|
||||
estimated_image_tokens = total_images * 258
|
||||
|
||||
total_tokens = estimated_text_tokens + estimated_image_tokens
|
||||
|
||||
print(f"Total Text Files Scanned: {total_text_chars} characters (~{estimated_text_tokens} tokens)")
|
||||
print(f"Total Images Scanned: {total_images} images (~{estimated_image_tokens} tokens)")
|
||||
print(f"---")
|
||||
print(f"Estimated Total Context Cost: ~{total_tokens} tokens")
|
||||
|
||||
if __name__ == "__main__":
|
||||
estimate_tokens()
|
||||
Reference in New Issue
Block a user