introducing Pelican Scheduler
authorM. Taylor Saotome-Westlake <[email protected]>
Thu, 23 Nov 2017 03:43:22 +0000 (19:43 -0800)
committerM. Taylor Saotome-Westlake <[email protected]>
Thu, 23 Nov 2017 04:01:01 +0000 (20:01 -0800)
I haven't tested this end-to-end yet, but I've spot-checked most of the
individual pieces and it's plausible that this will work.

Some might say, "You know, you could schedule posts in advance without writing
any code at all if you just used WordPress"

but

then you'd be using WordPress

provisioning/pelican_scheduler.py [new file with mode: 0755]

diff --git a/provisioning/pelican_scheduler.py b/provisioning/pelican_scheduler.py
new file mode 100755 (executable)
index 0000000..8882e56
--- /dev/null
@@ -0,0 +1,81 @@
+#!/usr/bin/env python3
+
+"""A script to schedule Pelican posts in advance, appropriate for a Git
+post-receive hook. Requires the `at` job-scheduling utility."""
+
+import datetime
+import os
+import re
+import subprocess
+
+WORKING_REPO = "/home/mtsw/working"
+INPUT_DIR = os.path.join(WORKING_REPO, "content")
+OUTPUT_DIR = "/var/www/html"
+PUBLISH_CONF = os.path.join(WORKING_REPO, "publishconf.py")
+SITEGEN_COMMAND = "bash -c 'cd {} && source bin/activate && pelican {} -o {} -s {}'".format(
+    WORKING_REPO, INPUT_DIR, OUTPUT_DIR, PUBLISH_CONF)
+
+DATELINE_REGEX = re.compile(r"^Date: *(\d{4}-\d{2}-\d{2} \d{2}:\d{2}) *$",
+                            re.MULTILINE)
+JOBLINE_REGEX = re.compile(r"\d+\s\w{3} (\w{3} +\d{1,2} \d{2}:\d{2}:\d{2} \d{4})")
+
+def get_future_publication_times():
+    now = datetime.datetime.now()
+    times = set()
+    for path, _dirnames, filenames in os.walk(INPUT_DIR):
+        if path.endswith("drafts"):
+            continue
+        for filename in filenames:
+            if not filename.endswith(".md"):
+                continue
+            with open(os.path.join(path, filename)) as post_file:
+                match = DATELINE_REGEX.search(post_file.read())
+                if match:
+                    time = datetime.datetime.strptime(match.group(1),
+                                                      "%Y-%m-%d %H:%M")
+                    if time > now:
+                        times.add(time)
+    return times
+
+
+def get_extant_at_job_times():
+    times = set()
+    result = subprocess.run(["atq"], stdout=subprocess.PIPE)
+    job_lines = result.stdout.decode('utf8').split('\n')
+    for job_line in job_lines:
+        match = JOBLINE_REGEX.match(job_line)
+        if match:
+            times.add(datetime.datetime.strptime(match.group(1),
+                                                 "%b %d %H:%M:%S %Y"))
+    return times
+
+
+def schedule(command, when):
+    timestamp = when.strftime("%H:%M %Y-%m-%d")
+    at_command = ['at', timestamp]
+    at = subprocess.Popen(
+        at_command,
+        stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE
+    )
+    at.communicate(command.encode())
+
+
+def main():
+    # sync our "working" repo with the bare one
+    subprocess.run(["git", "pull"], cwd=WORKING_REPO)
+
+    # look for scheduled future posts
+    future_publication_times = get_future_publication_times()
+
+    # look at atq
+    extant_at_job_times = get_extant_at_job_times()
+
+    # if there are future posts that don't have an atq entry, schedule a
+    # site-regen at that time
+    to_schedule = future_publication_times - extant_at_job_times
+    for time in to_schedule:
+        schedule(SITEGEN_COMMAND, time)
+
+
+if __name__ == "__main__":
+    main()