40 lines
1.4 KiB
Python
40 lines
1.4 KiB
Python
import asyncio
|
|
import os
|
|
from winsdk.windows.storage import StorageFile
|
|
from winsdk.windows.graphics.imaging import BitmapDecoder
|
|
from winsdk.windows.media.ocr import OcrEngine
|
|
from winsdk.windows.globalization import Language
|
|
|
|
async def ocr_single_image(img_path):
|
|
if not os.path.exists(img_path):
|
|
print(f"File not found: {img_path}")
|
|
return
|
|
|
|
file = await StorageFile.get_file_from_path_async(os.path.abspath(img_path))
|
|
stream = await file.open_read_async()
|
|
decoder = await BitmapDecoder.create_async(stream)
|
|
bitmap = await decoder.get_software_bitmap_async()
|
|
|
|
engine = OcrEngine.try_create_from_language(Language("en-US"))
|
|
if not engine:
|
|
print("Failed to create OCR engine")
|
|
return
|
|
|
|
result = await engine.recognize_async(bitmap)
|
|
|
|
output = [f"# OCR Thread: {os.path.basename(img_path)}\n"]
|
|
if result and result.lines:
|
|
for line in result.lines:
|
|
output.append(line.text)
|
|
|
|
out_path = img_path + ".ocr.md"
|
|
# Using join with a literal newline to avoid potential issues in some environments
|
|
content = "\n".join(output)
|
|
with open(out_path, "w", encoding="utf-8") as f:
|
|
f.write(content)
|
|
print(f"OCR result saved to {out_path}")
|
|
|
|
if __name__ == "__main__":
|
|
img = r"C:\projects\forth\bootslop\references\X.com - Onat & Lottes Interaction 1.png"
|
|
asyncio.run(ocr_single_image(img))
|