#!/usr/bin/env python3 # # SPDX-License-Identifier: ISC # # Copyright © 2019 Free Software Foundation of India. # import html import datetime import os import os.path import re import sys import mistune URL = "https://fsf.org.in" SECTIONS = ["news", "article", "case-study"] TARGET = "/".join(["_build", "feed.atom"]) F_PH = { "updated": "", "entries": "", } E_PH = { "id": "", "title": "", "link": "", "updated": "", "content": "", } def err(s): print("Error: {}".format(s)) sys.exit(1) def fok(f): p = os.path.basename(f.path) if re.search(r"(^[\.\#])|(~$)", p): return False return True def files(sec): files = os.scandir("md" + "/" + sec) fs = [] for f in files: if not fok(f): print("Ignoring {}".format(f.path)) else: fs.append(f) return fs def read(f): with open(f) as f: c = f.read() return c def write(p, c): d = os.path.dirname(p) if not os.path.exists(d): os.makedirs(d) with open(p, "w") as f: f.write(c) def slug(p): m = re.search(r"(([0-9a-zA-Z\-]+)(\.([a-z]{2}))?)\.md", p) if not m: err("Unable to get slug") return m.group(1), m.group(2), m.group(4) def template(type): return read("templates/atom/{}.atom".format(type)) def title(c): m = re.search(r"^\# (.+)$", c, re.M) if not m: err("Title not found") return m.group(1) def elink(sec, s, l): if l is None: return "/".join([URL, sec, s]) else: return "/".join([URL, sec, s, l]) def time(c): m = re.search(r"pubdate: ([0-9]{8})", c) if not m: err("Publication date not found") d = m.group(1) d = datetime.datetime.strptime(d, "%Y%m%d").strftime("%Y-%m-%d") return d + "T00:00:00Z" def markdown(c): try: r = mistune.markdown(c, False, parse_block_html=True, parse_inline_html=True) except Exception as e: err("Markdown parsing failed for {}".format(e)) return r def massage(c): c = html.escape(c) c = c.replace("\n", " ") c = re.sub(r" +", " ", c) return c def content(c): m = re.search(r"^\# (.+)$", c, re.M) if not m: err("Unable to slurp content") c = c[m.end() :] c = markdown(c) return massage(c) def now(): n = datetime.datetime.today() return n.strftime("%Y-%m-%dT%H:%M:%SZ") def entry(sec, f): c = read(f.path) u, s, l = slug(f.path) t = time(c) id = t + ":" + u e = template("entry") e = e.replace(E_PH["id"], id, 1) e = e.replace(E_PH["title"], title(c), 1) e = e.replace(E_PH["link"], elink(sec, s, l), 1) e = e.replace(E_PH["updated"], t, 1) e = e.replace(E_PH["content"], content(c), 1) return id, e def esort(esd): ids = sorted(esd.keys(), reverse=True) es = [] for id in ids: es.append(esd[id]) return es def feed(es): f = template("feed") f = f.replace(F_PH["updated"], now(), 1) f = f.replace(F_PH["entries"], es, 1) return f def process(sec, esd): for f in files(sec): id, e = entry(sec, f) esd[id] = e return esd def stringify(esd): return "".join(esort(esd)) def commit(es): write(TARGET, feed(es)) def run(): esd = {} for sec in SECTIONS: esd = process(sec, esd) commit(stringify(esd)) if __name__ == "__main__": run()