path: root/bin/html



#!/usr/bin/env python3
#
#   SPDX-License-Identifier: ISC
#
#   Copyright © 2019 Free Software Foundation of India.
#

import datetime
import os
import os.path
import re
import stat as st
import sys

import mistune

SECTIONS = ['news', 'article']

# placeholders
PH = {
   'title': '<!-- ITEM-TITLE -->',
   'author': '<!-- AUTHOR -->',
   'date': '<!-- DATE -->',
   'content': '<!-- MAIN-CONTENT -->',
   'lang': '<!-- LANG-LIST -->'
}


def err(s):
    print('Error: {}'.format(s))
    sys.exit(1)


def fok(f):
    p = os.path.basename(f.path)

    if re.search(r'(^[\.\#])|(~$)', p):
        return False

    return True


def files(sec):
    files = os.scandir('md' + '/' + sec)

    fs = []
    for f in files:
        if not fok(f):
            print('Ignoring {}'.format(f.path))
        else:
            fs.append(f)

    return fs


def read(f):
    with open(f) as f:
        c = f.read()
    return c


def write(p, c):
    d = os.path.dirname(p)

    if not os.path.exists(d):
        os.makedirs(d)

    with open(p, 'w') as f:
        f.write(c)

    os.chmod(p, st.S_IRUSR | st.S_IWUSR | st.S_IXUSR
             | st.S_IRGRP | st.S_IXGRP
             | st.S_IROTH | st.S_IXOTH)


def slug(p):
    m = re.search(r'([a-zA-Z\-]+)(\.([a-z]{2}))?\.md', p)

    if not m:
        err('Unable to get slug')

    return m.group(1, 3)


def title(c):
    m = re.search(r'^\# (.+)$', c, re.M)

    if not m:
        err('Title not found')

    return m.group(1)


def author(c):
    m = re.search(r'<!-- author: ([\w\. ]+) -->', c)

    if not m:
        return ''

    return 'By ' + m.group(1)


def date(c):
    m = re.search(r'pubdate: ([0-9]{8})', c)

    if not m:
        err('Publication date not found')

    return m.group(1)


def content(c):
    m = re.search(r'^\# (.+)$', c, re.M)

    if not m:
        err('Unable to slurp content')

    return c[m.end():]


def template(type):
    return read('templates/html/{}.html'.format(type))


def datefmt(d):
    return datetime.datetime.strptime(d, '%Y%m%d').strftime('%B %d, %Y')


def markdown(c):
    try:
        r = mistune.markdown(c, False, parse_block_html=True, parse_inline_html=True)

    except Exception as e:
        err('Markdown parsing failed: {}'.format(e))

    return r


def lhref(sec, s, l):
    if l == 'en':
        return '<a href="/{}/{}">{}</a>'.format(sec, s, l)
    else:
        return '<a href="/{}/{}/{}">{}</a>'.format(sec, s, l, l)


def langhtml(sec, s, lng, lngs):
    lsh = []

    for l in lngs:
        if (l == 'en' and lng is None) or (l == lng):
            lsh.append(l)
        else:
            lsh.append(lhref(sec, s, l))

    return template('lang').replace(PH['lang'], ' | '.join(lsh), 1)


def html(sec, f, lm):
    c = read(f.path)
    s, l = slug(f.path)

    t = title(c)
    a = author(c)
    d = date(c)
    c = content(c)

    h = template(sec)
    h = h.replace(PH['title'], t, 2)
    h = h.replace(PH['date'], datefmt(d), 1)
    h = h.replace(PH['content'], markdown(c), 1)

    if author:
        h = h.replace(PH['author'], a, 1)

    if len(lm[s]) > 1:
        h = h.replace(PH['lang'], langhtml(sec, s, l, lm[s]), 1)

    return s, h, l


def langmap(sec):
    lm = {}
    for f in files(sec):
        s, l = slug(f.path)

        if not s in lm:
            lm[s] = []

        if not l:
            lm[s].append('en')
        else:
            lm[s].append(l)

        lm[s].sort()

    return lm


def process(sec):
    lm = langmap(sec)

    for f in files(sec):
        s, h, l = html(sec, f, lm)
        if not l:
            write('/'.join(['_build', sec, s, 'index.html']), h)
        else:
            write('/'.join(['_build', sec, s, l, 'index.html']), h)


def run():
    for sec in SECTIONS:
        process(sec)


if __name__ == "__main__":
    run()