#! /usr/bin/env python import sys import re import urlparse import os.path import urllib2 from StringIO import StringIO import requests def fetch_images(url, dest): print("Parsing image URLs from %s." % (url)) urlparts = urlparse.urlparse(url) r = requests.get(url) for m in re.finditer(r"(/attachments/[^\s]+\.png)\"", r.text): filename = os.path.basename(m.group(1)) image_url = "%s://%s%s" % ( urlparts.scheme, urlparts.netloc, m.group(1)) if not os.path.exists(dest): os.makedirs(dest) if os.path.exists("%s/%s" % (dest, filename)): print("Image %s already exists." % (filename)) continue print("Fetching image %s." % (image_url)) open(os.path.join(dest, filename), "w").write( urllib2.urlopen(image_url).read()) def main(): url = sys.argv[1] output = sys.argv[2] fetch_images(url, output) print("Fetching %s." % (url)) r = requests.get("%s.json" % url) text = r.json()["wiki_page"]["text"] text = text.replace("\r", "") inpre = False with open("%s.rst" % output, "w") as fileobj: for line in StringIO(text): if line.startswith("
"):
                inpre = True
                line = line.replace("", "\n::\n\n  ")
                if line.find("") > -1:
                    print("Removing  from end of line.")
                    line = line.replace("", "")
                    inpre = False
            if line.startswith(""):
                inpre = False
                line = ""
            if inpre and line:
                line = "  %s" % line
            # Images.
            line = re.sub(
                r"!([^\s]+)!", r"\n.. image:: %s/\1" % output, line)
            # h1.
            if line.startswith("h1."):
                line = re.sub("^h1\.\s+", "", line)
                line += "=" * (len(line) - 1) + "\n"
            # h2.
            if line.startswith("h2."):
                line = re.sub("^h2\.\s+", "", line)
                line += "-" * (len(line) - 1) + "\n"
            # h3.
            if line.startswith("h3."):
                line = re.sub("^h3\.\s+", "", line)
                line += "~" * (len(line) - 1) + "\n"
            # *bold* -> **bold**
            line = re.sub(r"(^|\s)\*([\w:]+)\*", r"\1**\2**", line)
            # _italic_ -> *italic*
            line = re.sub(r"\s_(\w+)_\s", r" *\1* ", line)
            fileobj.write(line.encode("utf-8"))
if __name__ == "__main__":
    sys.exit(main())