Add all dex types and make wiki render aware of them, so that dex subpages will be rendered as GlitchDex/RB:000 while phrases like G/S will be rendered as G%2FS.

This unfortunately means the redirector will need to have special logic to be aware of dexes too, but it could probably be as simple as "if directory name contains 'Dex' then treat it as a directory name"
This commit is contained in:
Adrian Kuschelyagi Malacoda 2020-08-23 06:02:28 -05:00
parent ea7e1294b1
commit 77b160a35b
2 changed files with 48 additions and 18 deletions

View File

@ -2,17 +2,26 @@ import os
import logging
import shutil
from itertools import chain
from traceback import print_exc
import chevron
import bbcode
import html
from .wiki import Template, Renderer, reformat_page_title, NAMESPACES as WIKI_NAMESPACES
from .wiki import Template, Renderer, reformat_page_title, translate_page_title, NAMESPACES as WIKI_NAMESPACES
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("ArchiveGenerator")
DEX_LANGUAGES = ["", "DE", "ES", "FR", "IT", "JP", "KO"]
DEX_TYPES = [
"GlitchDex", "AttackDex", "DexDex", "AreaDex", "TrainerDex", "FieldMoveDex", "ItemDex", "FamilyDex", "DecDex", "DayDex",
"MDIGlitchDex", "MetascriptDex", "TMHMDex", "StatDex", "PosterDex", "TypeDex", "UnownDex", "DollDex", "DefaultNameDex",
"BattleTypeDe", "BadgeDescriptionDex", "FacingDex"
]
DEXES = list(chain.from_iterable([[f"{dex_type}{language}" for dex_type in DEX_TYPES] for language in DEX_LANGUAGES]))
def prepare_thread (thread):
thread.subject = html.unescape(thread.subject)
return thread
@ -52,13 +61,13 @@ class ArchiveGenerator():
categories = {}
templates = dict([(page.title.split(":")[1], Template(page.get_latest().text)) for page in wiki.get_pages() if page.namespace == WIKI_NAMESPACES['TEMPLATE']])
wikitext_renderer = Renderer(templates)
wikitext_renderer = Renderer(templates, DEXES)
for page in wiki.get_pages():
try:
if page.namespace != WIKI_NAMESPACES['MAIN']:
continue
page_out = f"{reformat_page_title(page.title)}.html"
page_out = translate_page_title(page.title, DEXES)
base = ""
if "/" in page_out:
base = "../" * page_out.count("/")
@ -69,11 +78,11 @@ class ArchiveGenerator():
if page.redirect:
logger.info("Archiving redirect page (%s -> %s) to %s", page.title, page.redirect, page_out)
renderer.render_template_to_file("redirect", page_out, {
"target": f"{base}{reformat_page_title(page.redirect)}.html"
"target": f"{base}{translate_page_title(page.redirect, DEXES)}"
})
else:
logger.info("Archiving page %s to %s", page.title, page_out)
(rendered, page_categories) = wikitext_renderer.render(page.get_latest().text, page=page)
(rendered, page_categories) = wikitext_renderer.render(page.get_latest().text, base, page=page)
for category in page_categories:
if not category in categories:

View File

@ -85,10 +85,11 @@ class Contributor():
self.username = child.text
class Renderer():
def __init__ (self, templates={}):
def __init__ (self, templates={}, directory_names=[]):
self.templates = templates
self.directory_names = directory_names
def render (self, wikitext, *args, **kwargs):
def render (self, wikitext, base="", *args, **kwargs):
categories = []
wikitext = self.transclude_templates(wikitext, *args, **kwargs)
@ -100,11 +101,15 @@ class Renderer():
wikitext.remove(link)
categories.append(link.title[len(CATEGORY_NAMESPACE):])
rendered = [render(wikitext)]
rendered = [render(wikitext, base, self.directory_names)]
if categories:
rendered.append('<h2>Categories</h2><ul class="categories">')
for category in categories:
rendered.append('<li><a href="Category:{}.html">{}</a></li>'.format(reformat_page_title(category), category))
rendered.append('<li><a href="{}Category:{}.html">{}</a></li>'.format(
base,
reformat_page_title(category),
category
))
rendered.append("</ul>")
return ("".join(rendered), categories)
@ -125,7 +130,7 @@ class Renderer():
except ValueError: pass
return wikitext
def render (wikitext):
def render (wikitext, base="", directory_names=[]):
rendered = []
for node in wikitext.ifilter(False):
# node types:
@ -134,14 +139,19 @@ def render (wikitext):
if node_type is Wikilink:
image_name = translate_image_title(node.title)
if image_name:
rendered.append('<img src="{}" />'.format(
rendered.append('<img src="{}{}" />'.format(
base,
image_name,
render(mwparserfromhell.parse(node.text))
render(mwparserfromhell.parse(node.text), base, directory_names)
))
else:
url = translate_interwiki_title(node.title)
if not url:
url = f"{base}{translate_page_title(node.title, directory_names)}"
rendered.append('<a href="{}">{}</a>'.format(
translate_page_title(node.title),
render(node.text if node.text else node.title)
url,
render(node.text if node.text else node.title, base, directory_names)
))
elif node_type is ExternalLink:
rendered.append('<a href="{}">{}</a>'.format(
@ -151,13 +161,13 @@ def render (wikitext):
elif node_type is Tag:
rendered.append("<{}>{}</{}>".format(
render(node.tag),
render(node.contents),
render(node.contents, base, directory_names),
render(node.tag)
))
elif node_type is Heading:
rendered.append("<h{}>{}</h{}>".format(
node.level,
render(node.title),
render(node.title, base, directory_names),
node.level
))
elif node_type is Text:
@ -165,12 +175,20 @@ def render (wikitext):
return "".join(rendered).strip().replace("\n\n", "<br /><br />")
def translate_page_title (page_title):
def translate_interwiki_title (page_title):
for namespace, url in INTERWIKI_NAMESPACES.items():
if page_title.startswith(namespace):
return url.format(page_title[len(namespace):])
return "{}.html".format(reformat_page_title(page_title))
def translate_page_title (page_title, directory_names=[]):
directory_name = ""
for name in directory_names:
if page_title.startswith(f"{name}/"):
directory_name = name
page_title = page_title[len(directory_name) + 1:]
break
return f"{reformat_page_title(directory_name)}{'/' if directory_name else ''}{reformat_page_title(page_title)}.html"
def translate_image_title (page_title):
for namespace in FILE_NAMESPACES:
@ -178,6 +196,9 @@ def translate_image_title (page_title):
return reformat_page_title(page_title[len(namespace):])
def reformat_page_title (page_title):
if not page_title:
return ""
return "{}{}".format(page_title[0].upper(), page_title[1:].replace(" ", "_").replace("/", "%2F"))
class Template():