lpschedule-generator

libreplanet schedule generator - ricketyspace.net/lpschedule-generator
git clone git://git.ricketyspace.net/lpschedule-generator.git
Log | Files | Refs

lps_gen.py (20271B)


      1 # -*- coding: utf-8 -*-
      2 #
      3 #   SPDX-License-Identifier: CC0-1.0
      4 #
      5 #   This file is part of lpschedule-generator.
      6 #
      7 
      8 
      9 import json
     10 import re
     11 import sys
     12 
     13 import pkg_resources as pkgr
     14 import pytz
     15 
     16 from argparse import ArgumentParser
     17 from collections import OrderedDict
     18 from datetime import datetime
     19 from os import path
     20 
     21 from bs4 import BeautifulSoup
     22 from icalendar import Calendar, Event, vCalAddress, vText, vDatetime
     23 from jinja2 import Environment, FileSystemLoader
     24 from jinja2.exceptions import TemplateNotFound
     25 from mistune import Renderer, Markdown
     26 from pytz import timezone
     27 from unidecode import unidecode
     28 
     29 from lpschedule_generator._version import __version__
     30 
     31 
     32 # Python dictionary that will contain the lp schedule.
     33 lps_dict = OrderedDict()
     34 
     35 # Python dictionary that will contain the lp speakers.
     36 lpspeakers_dict = OrderedDict()
     37 
     38 
     39 def read_file(filename):
     40     """Read file and return it as a string.
     41 
     42     :param str filename: Absolute pathname of the file.
     43 
     44     """
     45     content = ''
     46 
     47     try:
     48         with open(filename, 'r') as f:
     49             for line in f:
     50                 content = content + line
     51     except IOError:
     52         print('Error: unable to open {}'.format(filename))
     53 
     54     return content
     55 
     56 
     57 def write_file(filename, filecontent):
     58     """Write `filecontent` to `filename`.
     59 
     60     :param str filename:
     61         Absolute pathname of the file.
     62     :param str filecontent:
     63         Data to write to `filename`.
     64 
     65     """
     66     file_ = None
     67     try:
     68       file_   = open(filename, 'w')
     69       file_.write(filecontent)
     70       file_.close()
     71     except IOError:
     72         print('Error creating and writing content to {}'.format(filename))
     73         exit(1)
     74 
     75 
     76 def json_write(filename, obj):
     77     """Serialize `obj` to JSON formatted `str` to `filename`.
     78 
     79     `filename` is written relative to the current working directory.
     80 
     81     """
     82     write_file(filename, json.dumps(obj, ensure_ascii=False, indent=4))
     83 
     84 
     85 def json_read(filename):
     86     """Deserialize JSON from `filename` into Python object.
     87 
     88     """
     89     if not path.isfile(filename):
     90         return False
     91 
     92     return json.loads(read_file(filename),
     93                       object_pairs_hook=OrderedDict)
     94 
     95 
     96 def template_read(name):
     97     """Return template as `str`.
     98     """
     99     p = 'lpschedule_generator'
    100     r = 'data/{}.jinja2'.format(name)
    101 
    102     t = None
    103     try:
    104         t = pkgr.resource_string(p, r).decode('utf-8')
    105     except Exception as e:
    106         print(e, file=sys.stderr)
    107 
    108     return t
    109 
    110 
    111 class LPiCal(object):
    112     """Used for producing iCal for LP schedule.
    113     """
    114 
    115     def __init__(self, lps_dict, lp_year):
    116         self.lps_dict = lps_dict
    117         self.lp_year = str(lp_year)
    118 
    119         # Matches strings like '09:45 - 10:30: Lorem ipsum dolor sit.'
    120         self.timeslot_re = re.compile(r'(\d+:\d+).+?(\d+:\d+)'
    121                                       r'\s*[:-]?\s*(.+\b)?')
    122         # Matches strings like 'Saturday, March 19'
    123         self.month_day_re = re.compile(r'\w+,\s*([a-zA-Z]+)\s*(\d+)')
    124 
    125         self.cal = Calendar()
    126         self.cal.add('prodid', '-//lpschedule generator//mxm.dk//')
    127         self.cal.add('version', '2.0')
    128         self.cal.add('x-wr-calname', 'LibrePlanet {}'.format(self.lp_year))
    129 
    130         # RFC 2445 requires DTSTAMP to be in UTC. DTSTAMP is used in
    131         # VEVENT (Event object, see `add_event` method).
    132         self.dtstamp = vDatetime(datetime.now(pytz.utc))
    133 
    134         # used to generate uid for ical.
    135         self.ucounter = 0
    136 
    137 
    138     def gen_uid(self):
    139         """Returns an unique id.
    140 
    141         Used for Event object.
    142         """
    143         self.ucounter = self.ucounter + 1
    144         return '{}@LP{}@libreplanet.org'.format(str(self.ucounter),
    145                                                 self.lp_year)
    146 
    147 
    148     def get_timeslot(self, s):
    149         """Get start and end time for a timeslot.
    150         """
    151 
    152         timeslot = self.timeslot_re.search(s)
    153 
    154         if not timeslot:
    155             return None, None, None
    156 
    157         t_start = timeslot.group(1)
    158         t_end = timeslot.group(2)
    159         name = timeslot.group(3) or ''
    160 
    161         return t_start, t_end, name
    162 
    163 
    164     def get_month_day(self, s):
    165         """Get month and day.
    166         """
    167 
    168         month_day = self.month_day_re.search(s)
    169 
    170         if (not month_day) or (len(month_day.groups()) < 2):
    171             return None, None
    172 
    173         month = month_day.group(1)
    174         day = month_day.group(2)
    175 
    176         return month, day
    177 
    178 
    179     def mk_datetime(self, month, day, time):
    180         """Returns datetime object (EST).
    181         """
    182         # Day %d
    183         # Month %B
    184         # Year %Y
    185         # Hour %H (24-hr)
    186         # Minute %M (zero padded)
    187         # Second %S (zero padded)
    188         datetime_fmt = '%d %B %Y %H:%M:%S'
    189         eastern = timezone('US/Eastern')
    190 
    191         hour = time.split(':')[0]
    192         minute = time.split(':')[1]
    193         datetime_str = '{} {} {} {}:{}:{}'.format(day, month,
    194                                                   self.lp_year,
    195                                                   hour.zfill(2),
    196                                                   minute.zfill(2),
    197                                                   '00')
    198 
    199         dt_object = datetime.strptime(datetime_str, datetime_fmt)
    200 
    201         return vDatetime(eastern.localize(dt_object))
    202 
    203 
    204     def mk_attendee(self, speaker):
    205         """Make Attendee to be added to an Event object.
    206 
    207         See `add_event` method.
    208         """
    209         # Get rid of HTML (<a> element, etc) in `speaker`
    210         speaker = BeautifulSoup(speaker, 'html.parser').get_text()
    211 
    212         attendee = vCalAddress('invalid:nomail')
    213         attendee.params['cn'] = vText(speaker)
    214         attendee.params['ROLE'] = vText('REQ-PARTICIPANT')
    215         attendee.params['CUTYPE'] = vText('INDIVIDUAL')
    216 
    217         return attendee
    218 
    219 
    220     def add_event(self, month, day, t_start, t_end, t_name, session,
    221                       session_info):
    222         """Adds event to calendar.
    223         """
    224         event = Event()
    225         event['uid'] = self.gen_uid()
    226         event['dtstamp'] = self.dtstamp
    227         event['class'] = vText('PUBLIC')
    228         event['status'] = vText('CONFIRMED')
    229         event['method'] = vText('PUBLISH')
    230 
    231         if session == 'st-from-ts':
    232             event['summary'] = t_name
    233         else:
    234             event['summary'] = session
    235 
    236         event['location'] = vText(session_info['room'])
    237 
    238         # Get rid of HTML in 'desc'
    239         desc = BeautifulSoup(' '.join(
    240             session_info['desc']).replace(
    241                 '\n', ' '), 'html.parser').get_text()
    242         event['description'] = desc
    243 
    244         # Add speakers
    245         for speaker in session_info['speakers']:
    246             event.add('attendee', self.mk_attendee(speaker), encode=0)
    247 
    248         dt_start = self.mk_datetime(month, day, t_start)
    249         dt_end = self.mk_datetime(month, day, t_end)
    250 
    251         event['dtstart'] = dt_start
    252         event['dtend'] = dt_end
    253 
    254         # Add to calendar
    255         self.cal.add_component(event)
    256 
    257         return event
    258 
    259 
    260     def gen_ical(self):
    261         """Parse LP schedule dict and generate iCal Calendar object.
    262         """
    263 
    264         for day_str, timeslots in self.lps_dict.items():
    265             month, day = self.get_month_day(day_str)
    266             if not month:
    267                 # month, day not specified; cannot generate ical for
    268                 # this day
    269                 continue
    270             for timeslot_str, sessions in timeslots.items():
    271                 t_start, t_end, t_name = self.get_timeslot(timeslot_str)
    272                 if not t_start:
    273                     # timeslot not specified; cannot generate ical for
    274                     # this timeslot
    275                     continue
    276                 for session, session_info in sessions.items():
    277                     self.add_event(month, day, t_start, t_end, t_name,
    278                                    session, session_info)
    279 
    280         return self.cal.to_ical().decode('utf-8')
    281 
    282 
    283     def to_ical(self):
    284         """Writes iCal to disk.
    285         """
    286         filename = 'lp{}-schedule.ics'.format(self.lp_year)
    287         write_file(filename, self.gen_ical())
    288 
    289         return filename
    290 
    291 
    292 class LPSRenderer(Renderer):
    293     """Helps convert Markdown version of LP schedule to a dictionary.
    294     """
    295 
    296     def __init__(self, **kwargs):
    297         super(LPSRenderer, self).__init__(**kwargs)
    298         self.last_day = None
    299         self.last_time_slot = None
    300         self.last_session = None
    301 
    302         # Denotes the no. of the paragraph under a session; this
    303         # information will be helpful in identifying the "speaker",
    304         # "room" and session "description".
    305         self.no_paragraph = None
    306 
    307         # Contains a list of speakers' names which are marked up for
    308         # auto-linking[1], but don't have an id to link to.
    309         #
    310         # [1]: Markup for auto-linking speakers is [John Hacker]().
    311         self.speakers_noids = []
    312 
    313         # If it is 'False', then the 'speaker.ids' file was not found;
    314         # otherwise it is an OrderedDict containing the mapping of
    315         # speakers and their corresponding id.
    316         self.speakers_ids = json_read('speakers.ids')
    317 
    318 
    319     def get_uid(self, speaker):
    320         """Generate unique id for `speaker`.
    321 
    322         Returns unique id for `speaker` if it exists; `False` otherwise.
    323         """
    324         if not self.speakers_ids:
    325             # There is no speakers_ids OrderedDict available.
    326             return False
    327 
    328         speaker = str(speaker)
    329         if speaker in self.speakers_ids.keys():
    330             return self.speakers_ids[speaker]
    331         else:
    332             # speaker not found in speakers_ids OrderedDict.
    333             return False
    334 
    335 
    336     def _check_session_title_exists(self):
    337         """Checks if :py:attr:`.last_session` is set.
    338 
    339         If :py:attr:`.last_session` is not set and first paragraph is
    340         encountered, then it is assumed that the current timeslot is in
    341         the following format::
    342 
    343             ### 9:00 - 10:45: Opening Keynote - Beyond unfree...
    344 
    345             [Cory Doctorow][doctorow]
    346 
    347             Room 32-123
    348 
    349             Software has eaten the world...
    350 
    351         This method is meant to be called from the
    352         :py:method:`.paragraph` method.
    353         """
    354         if not self.last_session and self.no_paragraph == 0:
    355             # Current timeslot has only one session and there
    356             # no session title.
    357             #
    358             # st-from-ts -> session title from time slot.
    359             lps_dict[self.last_day][self.last_time_slot][
    360                 'st-from-ts'] = OrderedDict()
    361             self.last_session = 'st-from-ts'
    362 
    363 
    364     def _process_video(self, text):
    365         """Process the video text.
    366 
    367         If it's a link, just extract the link and return it.
    368 
    369         This method is meant to be called from the
    370         :py:method:`.paragraph` method.
    371         """
    372         soup = BeautifulSoup(text, 'html.parser')
    373         links = soup.find_all('a')
    374 
    375         if len(links) == 0:
    376             # no links found, so
    377             return text
    378 
    379         # link(s) found, return the first link's href.
    380         return links[0]['href']
    381 
    382     def link(self, link, title, text):
    383         # Here, we catch speaker names that have to be autolinked and
    384         # autolink them if there is an id available for the speaker.
    385         if not link:
    386             # We found a speaker that has to be autolinked.
    387 
    388             # Here, `text` is the speaker' name.
    389             id_ = self.get_uid(text)
    390             if id_:
    391                 link = 'speakers.html#{}'.format(id_)
    392             else:
    393                 # Oh no, there is no id for this speaker.
    394                 self.speakers_noids.append(text)
    395                 # Don't linkify this speaker; they don't have an id.
    396                 return text
    397 
    398         return super(LPSRenderer, self).link(link, title, text)
    399 
    400 
    401     def header(self, text, level, raw=None):
    402         global lps_dict
    403 
    404         if level == 2:
    405             # Add new day.
    406             lps_dict[text] = OrderedDict()
    407             self.last_day = text
    408         elif level == 3:
    409             # Add new timeslot
    410             lps_dict[self.last_day][text] = OrderedDict()
    411             self.last_time_slot = text
    412             # New timeslot, reset paragraphs processed and
    413             # last session.
    414             self.no_paragraph = 0
    415             self.last_session = None
    416         elif level == 4:
    417             # Add new session
    418             lps_dict[self.last_day][self.last_time_slot][
    419                 text] = OrderedDict()
    420             self.last_session = text
    421             # We found a new session; set no of paragraphs processed
    422             # to 0.
    423             self.no_paragraph = 0
    424 
    425         return super(LPSRenderer, self).header(text, level, raw)
    426 
    427 
    428     def paragraph(self, text):
    429         global lps_dict
    430 
    431         self._check_session_title_exists()
    432         p = super(LPSRenderer, self).paragraph(text)
    433 
    434         if self.no_paragraph == 0:
    435             # Speaker
    436             speakers = text.split(', ')
    437 
    438             lps_dict[self.last_day][self.last_time_slot][
    439                 self.last_session]['speakers'] = speakers
    440             self.no_paragraph = self.no_paragraph + 1
    441         elif self.no_paragraph == 1:
    442             # Room
    443             lps_dict[self.last_day][self.last_time_slot][
    444                 self.last_session]['room'] = text
    445             self.no_paragraph = self.no_paragraph + 1
    446         elif self.no_paragraph == 2:
    447             lps_dict[self.last_day][self.last_time_slot][
    448                 self.last_session]['video'] = self._process_video(text)
    449             # Initialize description
    450             lps_dict[self.last_day][self.last_time_slot][
    451                 self.last_session]['desc'] = []
    452             self.no_paragraph = self.no_paragraph + 1
    453         elif self.no_paragraph > 1:
    454             lps_dict[self.last_day][self.last_time_slot][
    455                 self.last_session]['desc'].append(text)
    456 
    457         return p
    458 
    459 
    460 class LPSpeakersRenderer(Renderer):
    461     """Helps convert Markdown version of LP speakers to a dictionary.
    462     """
    463 
    464     def __init__(self, **kwargs):
    465         super(LPSpeakersRenderer, self).__init__(**kwargs)
    466         global lpspeakers_dict
    467 
    468         lpspeakers_dict = OrderedDict()
    469         lpspeakers_dict['keynote-speakers'] = []
    470         lpspeakers_dict['speakers'] = []
    471 
    472         # Type of present speaker being processed; can either be
    473         # 'keynote-speakers' or 'speakers'.
    474         self.speaker_type = None
    475 
    476         # Maintain a dict of speakers and their IDs.
    477         self.speakers_ids = OrderedDict()
    478 
    479 
    480     def mk_uid(self, speaker_block):
    481         """Returns a unique id.
    482         """
    483         # 'John HÖcker, Onion Project' -> 'John HÖcker'
    484         speaker = str(speaker_block.split(', ')[0])
    485 
    486         # 'John HÖcker' -> 'John Hacker'
    487         ascii_speaker = unidecode(speaker)
    488 
    489         # 'John Hacker' -> 'hacker'
    490         id_ = ascii_speaker.split()[-1].lower()
    491 
    492         if id_ not in self.speakers_ids.values():
    493             self.speakers_ids[speaker]= id_
    494             return id_
    495         else:
    496             # 'John Hacker' -> 'john_hacker'
    497             id_ = '_'.join([s.lower() for s in ascii_speaker.split()])
    498             self.speakers_ids[speaker] = id_
    499             return id_
    500 
    501 
    502     def header(self, text, level, raw=None):
    503         global lpspeakers_dict
    504 
    505         if level == 1:
    506             self.speaker_type = 'keynote-speakers'
    507             lpspeakers_dict[self.speaker_type].append(OrderedDict())
    508 
    509             lpspeakers_dict[self.speaker_type][-1]['speaker'] = text
    510             lpspeakers_dict[self.speaker_type][-1][
    511                 'id'] = self.mk_uid(text)
    512             lpspeakers_dict[self.speaker_type][-1][
    513                 'bio']  = []
    514         elif level == 2:
    515             self.speaker_type = 'speakers'
    516             lpspeakers_dict[self.speaker_type].append(OrderedDict())
    517 
    518             lpspeakers_dict[self.speaker_type][
    519                 -1]['speaker'] = text.split(', ')[0]
    520             lpspeakers_dict[self.speaker_type][
    521                 -1]['id'] = self.mk_uid(text)
    522             lpspeakers_dict[self.speaker_type][
    523                 -1]['bio']  = []
    524 
    525         return super(LPSpeakersRenderer, self).header(text, level, raw)
    526 
    527 
    528     def image(self, src, title, text):
    529         global lpspeakers_dict
    530 
    531         lpspeakers_dict[self.speaker_type][-1]['img_url'] = src
    532         lpspeakers_dict[self.speaker_type][-1]['img_alt'] = text
    533 
    534         return super(LPSpeakersRenderer, self).image(src, title, text)
    535 
    536 
    537     def paragraph(self, text):
    538         global lpspeakers_dict
    539 
    540         p = super(LPSpeakersRenderer, self).paragraph(text)
    541 
    542         if text.startswith('<img'):
    543             # ignore
    544             return p
    545 
    546         lpspeakers_dict[self.speaker_type][-1]['bio'].append(text)
    547         return p
    548 
    549 
    550 class LPSMarkdown(Markdown):
    551     """Converts MD LP schedule to a dictionary.
    552 
    553     Returns the Markdown version of LP schedule as a dictionary.
    554     """
    555     def __init__(self, inline=None, block=None, **kwargs):
    556         """
    557         Initialize with LPSRenderer as the renderer.
    558         """
    559         self.sessions_renderer = LPSRenderer()
    560         super(LPSMarkdown, self).__init__(
    561             renderer=self.sessions_renderer,
    562             inline=None, block=None,
    563             **kwargs)
    564 
    565 
    566     def parse(self, text):
    567         global lps_dict
    568 
    569         lps_dict = OrderedDict()
    570         html = super(LPSMarkdown, self).parse(text)
    571 
    572         # Write list of speakers with no ids to `speakers.noids`.
    573         json_write('speakers.noids',
    574                     self.sessions_renderer.speakers_noids)
    575 
    576         return lps_dict
    577 
    578 
    579 class LPSpeakersMarkdown(Markdown):
    580     """Converts MD LP speakers to a dictionary.
    581 
    582     Returns the Markdown version of LP speakers as a dictionary.
    583     """
    584 
    585     def __init__(self, inline=None, block=None, **kwargs):
    586         """
    587         Initialize with LPSpeakersRenderer as the renderer.
    588         """
    589         self.speakers_renderer = LPSpeakersRenderer()
    590         super(LPSpeakersMarkdown, self).__init__(
    591             renderer=self.speakers_renderer,
    592             inline=None, block=None,
    593             **kwargs)
    594 
    595 
    596     def parse(self, text):
    597         global lpspeakers_dict
    598 
    599         html = super(LPSpeakersMarkdown, self).parse(text)
    600 
    601         # Write mapping of speakers and their ids to `speakers.ids`.
    602         json_write('speakers.ids', self.speakers_renderer.speakers_ids)
    603 
    604         return lpspeakers_dict
    605 
    606 
    607 def RenderHTML(lp_dict, template_name):
    608     """Renders LP schedule/speakers in HTML from a python dictionary.
    609 
    610     Returns the HTML as a string.
    611     """
    612     template_content = template_read(template_name)
    613     if not template_content:
    614         exit('Unable to read {} template'.format(template_name))
    615 
    616     template = Environment(
    617         trim_blocks=True,
    618         lstrip_blocks=True
    619     ).from_string(template_content)
    620 
    621     lp_html = template.render(lp_dict=lp_dict)
    622 
    623     return str(BeautifulSoup(lp_html, 'html.parser')).strip()
    624 
    625 
    626 def main():
    627     parser = ArgumentParser()
    628 
    629     group = parser.add_mutually_exclusive_group()
    630     group.add_argument("-sc", "--schedule", action="store_true",
    631                        help="Generate LP schedule")
    632     group.add_argument("-sp", "--speakers", action="store_true",
    633                        help="Generate LP speakers")
    634 
    635     parser.add_argument("--ical", type=int,
    636                         help="Specify LP year as argument; "
    637                             + "generates iCal")
    638     parser.add_argument("--version", action="version",
    639                         version='lpschedule-generator version {}'
    640                         .format(__version__),
    641                         help="Show version number and exit.")
    642     parser.add_argument("lp_md",
    643                         help="Path to the LP markdown.")
    644     args = parser.parse_args()
    645 
    646     lp_md_content = read_file(path.abspath(args.lp_md))
    647 
    648     if lp_md_content:
    649         template_name = ''
    650 
    651         if args.schedule:
    652             markdown = LPSMarkdown()
    653             template_name = 'schedule'
    654         elif args.speakers:
    655             markdown = LPSpeakersMarkdown()
    656             template_name = 'speakers'
    657         else:
    658             parser.error('No action requested, add -s or -sp switch')
    659 
    660         lp_dict = markdown(lp_md_content)
    661         lp_html = RenderHTML(lp_dict, template_name)
    662 
    663         if args.ical and args.schedule:
    664             LPiCal(lp_dict, args.ical).to_ical()
    665 
    666     else:
    667         exit('Unable to read LP markdown')
    668 
    669     if lp_html:
    670         # stdout lps html
    671         print(lp_html)
    672     else:
    673         print('Error generating LP HTML.')
    674 
    675 
    676 if __name__ == "__main__":
    677     main()