forth_bootslop/fetch_blog.py

import csv
import os
import requests
from markdownify import markdownify as md

CSV_PATH = "C:/projects/forth/bootslop/references/TimothyLottesBlog.csv"
OUT_DIR = "C:/projects/forth/bootslop/references/TimothyLottesBlog"

os.makedirs(OUT_DIR, exist_ok=True)

with open(CSV_PATH, "r", encoding="utf-8") as f:
    reader = csv.reader(f)
    for row in reader:
        if len(row) < 2:
            continue
        title = row[0].strip()
        url = row[1].strip()

        # Clean up title for filename
        filename = "".join(c for c in title if c.isalnum() or c in (' ', '-', '_')).strip()
        filename = filename.replace(' ', '_') + ".md"
        out_path = os.path.join(OUT_DIR, filename)

        if os.path.exists(out_path):
            print(f"Already exists: {filename}")
            continue

        print(f"Fetching: {title}")
        try:
            response = requests.get(url)
            response.raise_for_status()

            # Convert HTML to markdown
            markdown_text = md(response.text, heading_style="ATX")

            with open(out_path, "w", encoding="utf-8") as out_f:
                out_f.write(f"# {title}\n\n")
                out_f.write(f"**Source:** {url}\n\n")
                out_f.write(markdown_text)

            print(f"Saved: {filename}")
        except Exception as e:
            print(f"Failed to fetch {title}: {e}")