Files
forth_bootslop/ocr_interaction.py
2026-02-19 16:09:48 -05:00

40 lines
1.4 KiB
Python

import asyncio
import os
from winsdk.windows.storage import StorageFile
from winsdk.windows.graphics.imaging import BitmapDecoder
from winsdk.windows.media.ocr import OcrEngine
from winsdk.windows.globalization import Language
async def ocr_single_image(img_path):
if not os.path.exists(img_path):
print(f"File not found: {img_path}")
return
file = await StorageFile.get_file_from_path_async(os.path.abspath(img_path))
stream = await file.open_read_async()
decoder = await BitmapDecoder.create_async(stream)
bitmap = await decoder.get_software_bitmap_async()
engine = OcrEngine.try_create_from_language(Language("en-US"))
if not engine:
print("Failed to create OCR engine")
return
result = await engine.recognize_async(bitmap)
output = [f"# OCR Thread: {os.path.basename(img_path)}\n"]
if result and result.lines:
for line in result.lines:
output.append(line.text)
out_path = img_path + ".ocr.md"
# Using join with a literal newline to avoid potential issues in some environments
content = "\n".join(output)
with open(out_path, "w", encoding="utf-8") as f:
f.write(content)
print(f"OCR result saved to {out_path}")
if __name__ == "__main__":
img = r"C:\projects\forth\bootslop\references\X.com - Onat & Lottes Interaction 1.png"
asyncio.run(ocr_single_image(img))