Compare commits

..

No commits in common. "2aa1a7cf47ad83703f06fd30d3fc8d38db951b2f" and "df25b09eb7223a2d3291d7e314877a8f9c2dc86b" have entirely different histories.

2 changed files with 28 additions and 73 deletions

View File

@ -2,26 +2,17 @@ import os
import logging import logging
import shutil import shutil
from itertools import chain
from traceback import print_exc from traceback import print_exc
import chevron import chevron
import bbcode import bbcode
import html import html
from .wiki import Template, Renderer, Linker, reformat_page_title, NAMESPACES as WIKI_NAMESPACES from .wiki import Template, Renderer, reformat_page_title, NAMESPACES as WIKI_NAMESPACES
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("ArchiveGenerator") logger = logging.getLogger("ArchiveGenerator")
DEX_LANGUAGES = ["", "DE", "ES", "FR", "IT", "JP", "KO"]
DEX_TYPES = [
"GlitchDex", "AttackDex", "DexDex", "AreaDex", "TrainerDex", "FieldMoveDex", "ItemDex", "FamilyDex", "DecDex", "DayDex",
"MDIGlitchDex", "MetascriptDex", "TMHMDex", "StatDex", "PosterDex", "TypeDex", "UnownDex", "DollDex", "DefaultNameDex",
"BattleTypeDe", "BadgeDescriptionDex", "FacingDex"
]
DEXES = list(chain.from_iterable([[f"{dex_type}{language}" for dex_type in DEX_TYPES] for language in DEX_LANGUAGES]))
def prepare_thread (thread): def prepare_thread (thread):
thread.subject = html.unescape(thread.subject) thread.subject = html.unescape(thread.subject)
return thread return thread
@ -61,15 +52,14 @@ class ArchiveGenerator():
categories = {} categories = {}
templates = dict([(page.title.split(":")[1], Template(page.get_latest().text)) for page in wiki.get_pages() if page.namespace == WIKI_NAMESPACES['TEMPLATE']]) templates = dict([(page.title.split(":")[1], Template(page.get_latest().text)) for page in wiki.get_pages() if page.namespace == WIKI_NAMESPACES['TEMPLATE']])
linker = Linker(directory_names=DEXES) wikitext_renderer = Renderer(templates)
wikitext_renderer = Renderer(templates, linker)
for page in wiki.get_pages(): for page in wiki.get_pages():
try: try:
if page.namespace != WIKI_NAMESPACES['MAIN']: if page.namespace != WIKI_NAMESPACES['MAIN']:
continue continue
page_out = linker.translate_page_title(page.title) page_out = "{}.html".format(page.title).replace(" ", "_")
base = "./" base = ""
if "/" in page_out: if "/" in page_out:
base = "../" * page_out.count("/") base = "../" * page_out.count("/")
try: try:
@ -79,11 +69,11 @@ class ArchiveGenerator():
if page.redirect: if page.redirect:
logger.info("Archiving redirect page (%s -> %s) to %s", page.title, page.redirect, page_out) logger.info("Archiving redirect page (%s -> %s) to %s", page.title, page.redirect, page_out)
renderer.render_template_to_file("redirect", page_out, { renderer.render_template_to_file("redirect", page_out, {
"target": f"{base}{linker.translate_page_title(page.redirect)}" "target": f"{base}{reformat_page_title(page.redirect)}.html"
}) })
else: else:
logger.info("Archiving page %s to %s", page.title, page_out) logger.info("Archiving page %s to %s", page.title, page_out)
(rendered, page_categories) = wikitext_renderer.render(page.get_latest().text, base, page=page) (rendered, page_categories) = wikitext_renderer.render(page.get_latest().text, page=page)
for category in page_categories: for category in page_categories:
if not category in categories: if not category in categories:

View File

@ -32,7 +32,6 @@ INTERWIKI_NAMESPACES = {
FILE_NAMESPACES = ["File:", "Image:"] FILE_NAMESPACES = ["File:", "Image:"]
CATEGORY_NAMESPACE = "Category:" CATEGORY_NAMESPACE = "Category:"
CATEGORY_LINK_NAMESPACE = f":{CATEGORY_NAMESPACE}"
class Wiki(): class Wiki():
def __init__ (self, xml_path): def __init__ (self, xml_path):
@ -86,11 +85,10 @@ class Contributor():
self.username = child.text self.username = child.text
class Renderer(): class Renderer():
def __init__ (self, templates={}, linker=None): def __init__ (self, templates={}):
self.templates = templates self.templates = templates
self.linker = linker if linker else Linker()
def render (self, wikitext, base="", *args, **kwargs): def render (self, wikitext, *args, **kwargs):
categories = [] categories = []
wikitext = self.transclude_templates(wikitext, *args, **kwargs) wikitext = self.transclude_templates(wikitext, *args, **kwargs)
@ -102,15 +100,11 @@ class Renderer():
wikitext.remove(link) wikitext.remove(link)
categories.append(link.title[len(CATEGORY_NAMESPACE):]) categories.append(link.title[len(CATEGORY_NAMESPACE):])
rendered = [render(wikitext, base, self.linker)] rendered = [render(wikitext)]
if categories: if categories:
rendered.append('<h2>Categories</h2><ul class="categories">') rendered.append('<h2>Categories</h2><ul class="categories">')
for category in categories: for category in categories:
rendered.append('<li><a href="{}Category:{}.html">{}</a></li>'.format( rendered.append('<li><a href="Category:{}.html">{}</a></li>'.format(reformat_page_title(category), category))
base,
reformat_page_title(category),
category
))
rendered.append("</ul>") rendered.append("</ul>")
return ("".join(rendered), categories) return ("".join(rendered), categories)
@ -131,32 +125,23 @@ class Renderer():
except ValueError: pass except ValueError: pass
return wikitext return wikitext
def render (wikitext, base="", linker=None): def render (wikitext):
rendered = [] rendered = []
if not linker:
linker = Linker()
for node in wikitext.ifilter(False): for node in wikitext.ifilter(False):
# node types: # node types:
# https://mwparserfromhell.readthedocs.io/en/latest/api/mwparserfromhell.nodes.html#module-mwparserfromhell.nodes.text # https://mwparserfromhell.readthedocs.io/en/latest/api/mwparserfromhell.nodes.html#module-mwparserfromhell.nodes.text
node_type = type(node) node_type = type(node)
if node_type is Wikilink: if node_type is Wikilink:
image_name = linker.translate_image_title(node.title) image_name = translate_image_title(node.title)
if image_name: if image_name:
rendered.append('<img src="{}{}" />'.format( rendered.append('<img src="{}" />'.format(
base,
image_name, image_name,
render(mwparserfromhell.parse(node.text), base, linker) render(mwparserfromhell.parse(node.text))
)) ))
else: else:
url = linker.translate_interwiki_title(node.title)
if not url:
url = f"{base}{linker.translate_page_title(node.title)}"
rendered.append('<a href="{}">{}</a>'.format( rendered.append('<a href="{}">{}</a>'.format(
url, translate_page_title(node.title),
render(node.text if node.text else node.title, base, linker) render(node.text if node.text else node.title)
)) ))
elif node_type is ExternalLink: elif node_type is ExternalLink:
rendered.append('<a href="{}">{}</a>'.format( rendered.append('<a href="{}">{}</a>'.format(
@ -166,13 +151,13 @@ def render (wikitext, base="", linker=None):
elif node_type is Tag: elif node_type is Tag:
rendered.append("<{}>{}</{}>".format( rendered.append("<{}>{}</{}>".format(
render(node.tag), render(node.tag),
render(node.contents, base, linker), render(node.contents),
render(node.tag) render(node.tag)
)) ))
elif node_type is Heading: elif node_type is Heading:
rendered.append("<h{}>{}</h{}>".format( rendered.append("<h{}>{}</h{}>".format(
node.level, node.level,
render(node.title, base, linker), render(node.title),
node.level node.level
)) ))
elif node_type is Text: elif node_type is Text:
@ -180,40 +165,20 @@ def render (wikitext, base="", linker=None):
return "".join(rendered).strip().replace("\n\n", "<br /><br />") return "".join(rendered).strip().replace("\n\n", "<br /><br />")
class Linker(): def translate_page_title (page_title):
def __init__ (self, file_namespaces=FILE_NAMESPACES, interwiki_namespaces=INTERWIKI_NAMESPACES, directory_names=[]): for namespace, url in INTERWIKI_NAMESPACES.items():
self.file_namespaces = file_namespaces if page_title.startswith(namespace):
self.interwiki_namespaces = interwiki_namespaces return url.format(page_title[len(namespace):])
self.directory_names = directory_names
def translate_interwiki_title (self, page_title): return "{}.html".format(reformat_page_title(page_title))
for namespace, url in self.interwiki_namespaces.items():
if page_title.startswith(namespace):
return url.format(page_title[len(namespace):])
def translate_page_title (self, page_title): def translate_image_title (page_title):
if page_title.startswith(CATEGORY_LINK_NAMESPACE): for namespace in FILE_NAMESPACES:
page_title = page_title[1:] if page_title.startswith(namespace):
return reformat_page_title(page_title[len(namespace):])
directory_name = ""
for name in self.directory_names:
if page_title.startswith(f"{name}/"):
directory_name = name
page_title = page_title[len(directory_name) + 1:]
break
return f"{reformat_page_title(directory_name)}{'/' if directory_name else ''}{reformat_page_title(page_title)}.html"
def translate_image_title (self, page_title):
for namespace in self.file_namespaces:
if page_title.startswith(namespace):
return reformat_page_title(page_title[len(namespace):])
def reformat_page_title (page_title): def reformat_page_title (page_title):
if not page_title: return "{}{}".format(page_title[0].upper(), page_title[1:].replace(" ", "_"))
return ""
return f"{page_title[0].upper()}{page_title[1:].replace(' ', '_').replace('/', '%2F')}"
class Template(): class Template():
def __init__ (self, wikicode): def __init__ (self, wikicode):