import asyncio import os from winsdk.windows.storage import StorageFile from winsdk.windows.graphics.imaging import BitmapDecoder from winsdk.windows.media.ocr import OcrEngine from winsdk.windows.globalization import Language async def ocr_single_image(img_path): if not os.path.exists(img_path): print(f"File not found: {img_path}") return file = await StorageFile.get_file_from_path_async(os.path.abspath(img_path)) stream = await file.open_read_async() decoder = await BitmapDecoder.create_async(stream) bitmap = await decoder.get_software_bitmap_async() engine = OcrEngine.try_create_from_language(Language("en-US")) if not engine: print("Failed to create OCR engine") return result = await engine.recognize_async(bitmap) output = [f"# OCR Thread: {os.path.basename(img_path)}\n"] if result and result.lines: for line in result.lines: output.append(line.text) out_path = img_path + ".ocr.md" # Using join with a literal newline to avoid potential issues in some environments content = "\n".join(output) with open(out_path, "w", encoding="utf-8") as f: f.write(content) print(f"OCR result saved to {out_path}") if __name__ == "__main__": img = r"C:\projects\forth\bootslop\references\X.com - Onat & Lottes Interaction 1.png" asyncio.run(ocr_single_image(img))