diff --git a/epilogue/archive_generator.py b/epilogue/archive_generator.py
index 4e1fff5..aae45ad 100644
--- a/epilogue/archive_generator.py
+++ b/epilogue/archive_generator.py
@@ -9,7 +9,7 @@ import chevron
import bbcode
import html
-from .wiki import Template, Renderer, reformat_page_title, translate_page_title, NAMESPACES as WIKI_NAMESPACES
+from .wiki import Template, Renderer, Linker, reformat_page_title, NAMESPACES as WIKI_NAMESPACES
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("ArchiveGenerator")
@@ -61,14 +61,15 @@ class ArchiveGenerator():
categories = {}
templates = dict([(page.title.split(":")[1], Template(page.get_latest().text)) for page in wiki.get_pages() if page.namespace == WIKI_NAMESPACES['TEMPLATE']])
- wikitext_renderer = Renderer(templates, DEXES)
+ linker = Linker(directory_names=DEXES)
+ wikitext_renderer = Renderer(templates, linker)
for page in wiki.get_pages():
try:
if page.namespace != WIKI_NAMESPACES['MAIN']:
continue
- page_out = translate_page_title(page.title, DEXES)
- base = ""
+ page_out = linker.translate_page_title(page.title)
+ base = "./"
if "/" in page_out:
base = "../" * page_out.count("/")
try:
@@ -78,7 +79,7 @@ class ArchiveGenerator():
if page.redirect:
logger.info("Archiving redirect page (%s -> %s) to %s", page.title, page.redirect, page_out)
renderer.render_template_to_file("redirect", page_out, {
- "target": f"{base}{translate_page_title(page.redirect, DEXES)}"
+ "target": f"{base}{linker.translate_page_title(page.redirect)}"
})
else:
logger.info("Archiving page %s to %s", page.title, page_out)
diff --git a/epilogue/wiki.py b/epilogue/wiki.py
index 7de8a0e..687f9b6 100644
--- a/epilogue/wiki.py
+++ b/epilogue/wiki.py
@@ -32,6 +32,7 @@ INTERWIKI_NAMESPACES = {
FILE_NAMESPACES = ["File:", "Image:"]
CATEGORY_NAMESPACE = "Category:"
+CATEGORY_LINK_NAMESPACE = f":{CATEGORY_NAMESPACE}"
class Wiki():
def __init__ (self, xml_path):
@@ -85,9 +86,9 @@ class Contributor():
self.username = child.text
class Renderer():
- def __init__ (self, templates={}, directory_names=[]):
+ def __init__ (self, templates={}, linker=None):
self.templates = templates
- self.directory_names = directory_names
+ self.linker = linker if linker else Linker()
def render (self, wikitext, base="", *args, **kwargs):
categories = []
@@ -101,7 +102,7 @@ class Renderer():
wikitext.remove(link)
categories.append(link.title[len(CATEGORY_NAMESPACE):])
- rendered = [render(wikitext, base, self.directory_names)]
+ rendered = [render(wikitext, base, self.linker)]
if categories:
rendered.append('
Categories
')
for category in categories:
@@ -130,28 +131,32 @@ class Renderer():
except ValueError: pass
return wikitext
-def render (wikitext, base="", directory_names=[]):
+def render (wikitext, base="", linker=None):
rendered = []
+
+ if not linker:
+ linker = Linker()
+
for node in wikitext.ifilter(False):
# node types:
# https://mwparserfromhell.readthedocs.io/en/latest/api/mwparserfromhell.nodes.html#module-mwparserfromhell.nodes.text
node_type = type(node)
if node_type is Wikilink:
- image_name = translate_image_title(node.title)
+ image_name = linker.translate_image_title(node.title)
if image_name:
rendered.append('
'.format(
base,
image_name,
- render(mwparserfromhell.parse(node.text), base, directory_names)
+ render(mwparserfromhell.parse(node.text), base, linker)
))
else:
- url = translate_interwiki_title(node.title)
+ url = linker.translate_interwiki_title(node.title)
if not url:
- url = f"{base}{translate_page_title(node.title, directory_names)}"
+ url = f"{base}{linker.translate_page_title(node.title)}"
rendered.append('{}'.format(
url,
- render(node.text if node.text else node.title, base, directory_names)
+ render(node.text if node.text else node.title, base, linker)
))
elif node_type is ExternalLink:
rendered.append('{}'.format(
@@ -161,13 +166,13 @@ def render (wikitext, base="", directory_names=[]):
elif node_type is Tag:
rendered.append("<{}>{}{}>".format(
render(node.tag),
- render(node.contents, base, directory_names),
+ render(node.contents, base, linker),
render(node.tag)
))
elif node_type is Heading:
rendered.append("{}".format(
node.level,
- render(node.title, base, directory_names),
+ render(node.title, base, linker),
node.level
))
elif node_type is Text:
@@ -175,31 +180,40 @@ def render (wikitext, base="", directory_names=[]):
return "".join(rendered).strip().replace("\n\n", "
")
-def translate_interwiki_title (page_title):
- for namespace, url in INTERWIKI_NAMESPACES.items():
- if page_title.startswith(namespace):
- return url.format(page_title[len(namespace):])
+class Linker():
+ def __init__ (self, file_namespaces=FILE_NAMESPACES, interwiki_namespaces=INTERWIKI_NAMESPACES, directory_names=[]):
+ self.file_namespaces = file_namespaces
+ self.interwiki_namespaces = interwiki_namespaces
+ self.directory_names = directory_names
-def translate_page_title (page_title, directory_names=[]):
- directory_name = ""
- for name in directory_names:
- if page_title.startswith(f"{name}/"):
- directory_name = name
- page_title = page_title[len(directory_name) + 1:]
- break
+ def translate_interwiki_title (self, page_title):
+ for namespace, url in self.interwiki_namespaces.items():
+ if page_title.startswith(namespace):
+ return url.format(page_title[len(namespace):])
- return f"{reformat_page_title(directory_name)}{'/' if directory_name else ''}{reformat_page_title(page_title)}.html"
+ def translate_page_title (self, page_title):
+ if page_title.startswith(CATEGORY_LINK_NAMESPACE):
+ page_title = page_title[1:]
+
+ directory_name = ""
+ for name in self.directory_names:
+ if page_title.startswith(f"{name}/"):
+ directory_name = name
+ page_title = page_title[len(directory_name) + 1:]
+ break
-def translate_image_title (page_title):
- for namespace in FILE_NAMESPACES:
- if page_title.startswith(namespace):
- return reformat_page_title(page_title[len(namespace):])
+ return f"{reformat_page_title(directory_name)}{'/' if directory_name else ''}{reformat_page_title(page_title)}.html"
+
+ def translate_image_title (self, page_title):
+ for namespace in self.file_namespaces:
+ if page_title.startswith(namespace):
+ return reformat_page_title(page_title[len(namespace):])
def reformat_page_title (page_title):
if not page_title:
return ""
- return "{}{}".format(page_title[0].upper(), page_title[1:].replace(" ", "_").replace("/", "%2F"))
+ return f"{page_title[0].upper()}{page_title[1:].replace(' ', '_').replace('/', '%2F')}"
class Template():
def __init__ (self, wikicode):