Add all dex types and make wiki render aware of them, so that dex subpages will be rendered as GlitchDex/RB:000 while phrases like G/S will be rendered as G%2FS.

This unfortunately means the redirector will need to have special logic to be aware of dexes too, but it could probably be as simple as "if directory name contains 'Dex' then treat it as a directory name"
This commit is contained in:
Adrian Kuschelyagi Malacoda 2020-08-23 06:02:28 -05:00
parent ea7e1294b1
commit 77b160a35b
2 changed files with 48 additions and 18 deletions

View File

@ -2,17 +2,26 @@ import os
import logging import logging
import shutil import shutil
from itertools import chain
from traceback import print_exc from traceback import print_exc
import chevron import chevron
import bbcode import bbcode
import html import html
from .wiki import Template, Renderer, reformat_page_title, NAMESPACES as WIKI_NAMESPACES from .wiki import Template, Renderer, reformat_page_title, translate_page_title, NAMESPACES as WIKI_NAMESPACES
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("ArchiveGenerator") logger = logging.getLogger("ArchiveGenerator")
DEX_LANGUAGES = ["", "DE", "ES", "FR", "IT", "JP", "KO"]
DEX_TYPES = [
"GlitchDex", "AttackDex", "DexDex", "AreaDex", "TrainerDex", "FieldMoveDex", "ItemDex", "FamilyDex", "DecDex", "DayDex",
"MDIGlitchDex", "MetascriptDex", "TMHMDex", "StatDex", "PosterDex", "TypeDex", "UnownDex", "DollDex", "DefaultNameDex",
"BattleTypeDe", "BadgeDescriptionDex", "FacingDex"
]
DEXES = list(chain.from_iterable([[f"{dex_type}{language}" for dex_type in DEX_TYPES] for language in DEX_LANGUAGES]))
def prepare_thread (thread): def prepare_thread (thread):
thread.subject = html.unescape(thread.subject) thread.subject = html.unescape(thread.subject)
return thread return thread
@ -52,13 +61,13 @@ class ArchiveGenerator():
categories = {} categories = {}
templates = dict([(page.title.split(":")[1], Template(page.get_latest().text)) for page in wiki.get_pages() if page.namespace == WIKI_NAMESPACES['TEMPLATE']]) templates = dict([(page.title.split(":")[1], Template(page.get_latest().text)) for page in wiki.get_pages() if page.namespace == WIKI_NAMESPACES['TEMPLATE']])
wikitext_renderer = Renderer(templates) wikitext_renderer = Renderer(templates, DEXES)
for page in wiki.get_pages(): for page in wiki.get_pages():
try: try:
if page.namespace != WIKI_NAMESPACES['MAIN']: if page.namespace != WIKI_NAMESPACES['MAIN']:
continue continue
page_out = f"{reformat_page_title(page.title)}.html" page_out = translate_page_title(page.title, DEXES)
base = "" base = ""
if "/" in page_out: if "/" in page_out:
base = "../" * page_out.count("/") base = "../" * page_out.count("/")
@ -69,11 +78,11 @@ class ArchiveGenerator():
if page.redirect: if page.redirect:
logger.info("Archiving redirect page (%s -> %s) to %s", page.title, page.redirect, page_out) logger.info("Archiving redirect page (%s -> %s) to %s", page.title, page.redirect, page_out)
renderer.render_template_to_file("redirect", page_out, { renderer.render_template_to_file("redirect", page_out, {
"target": f"{base}{reformat_page_title(page.redirect)}.html" "target": f"{base}{translate_page_title(page.redirect, DEXES)}"
}) })
else: else:
logger.info("Archiving page %s to %s", page.title, page_out) logger.info("Archiving page %s to %s", page.title, page_out)
(rendered, page_categories) = wikitext_renderer.render(page.get_latest().text, page=page) (rendered, page_categories) = wikitext_renderer.render(page.get_latest().text, base, page=page)
for category in page_categories: for category in page_categories:
if not category in categories: if not category in categories:

View File

@ -85,10 +85,11 @@ class Contributor():
self.username = child.text self.username = child.text
class Renderer(): class Renderer():
def __init__ (self, templates={}): def __init__ (self, templates={}, directory_names=[]):
self.templates = templates self.templates = templates
self.directory_names = directory_names
def render (self, wikitext, *args, **kwargs): def render (self, wikitext, base="", *args, **kwargs):
categories = [] categories = []
wikitext = self.transclude_templates(wikitext, *args, **kwargs) wikitext = self.transclude_templates(wikitext, *args, **kwargs)
@ -100,11 +101,15 @@ class Renderer():
wikitext.remove(link) wikitext.remove(link)
categories.append(link.title[len(CATEGORY_NAMESPACE):]) categories.append(link.title[len(CATEGORY_NAMESPACE):])
rendered = [render(wikitext)] rendered = [render(wikitext, base, self.directory_names)]
if categories: if categories:
rendered.append('<h2>Categories</h2><ul class="categories">') rendered.append('<h2>Categories</h2><ul class="categories">')
for category in categories: for category in categories:
rendered.append('<li><a href="Category:{}.html">{}</a></li>'.format(reformat_page_title(category), category)) rendered.append('<li><a href="{}Category:{}.html">{}</a></li>'.format(
base,
reformat_page_title(category),
category
))
rendered.append("</ul>") rendered.append("</ul>")
return ("".join(rendered), categories) return ("".join(rendered), categories)
@ -125,7 +130,7 @@ class Renderer():
except ValueError: pass except ValueError: pass
return wikitext return wikitext
def render (wikitext): def render (wikitext, base="", directory_names=[]):
rendered = [] rendered = []
for node in wikitext.ifilter(False): for node in wikitext.ifilter(False):
# node types: # node types:
@ -134,14 +139,19 @@ def render (wikitext):
if node_type is Wikilink: if node_type is Wikilink:
image_name = translate_image_title(node.title) image_name = translate_image_title(node.title)
if image_name: if image_name:
rendered.append('<img src="{}" />'.format( rendered.append('<img src="{}{}" />'.format(
base,
image_name, image_name,
render(mwparserfromhell.parse(node.text)) render(mwparserfromhell.parse(node.text), base, directory_names)
)) ))
else: else:
url = translate_interwiki_title(node.title)
if not url:
url = f"{base}{translate_page_title(node.title, directory_names)}"
rendered.append('<a href="{}">{}</a>'.format( rendered.append('<a href="{}">{}</a>'.format(
translate_page_title(node.title), url,
render(node.text if node.text else node.title) render(node.text if node.text else node.title, base, directory_names)
)) ))
elif node_type is ExternalLink: elif node_type is ExternalLink:
rendered.append('<a href="{}">{}</a>'.format( rendered.append('<a href="{}">{}</a>'.format(
@ -151,13 +161,13 @@ def render (wikitext):
elif node_type is Tag: elif node_type is Tag:
rendered.append("<{}>{}</{}>".format( rendered.append("<{}>{}</{}>".format(
render(node.tag), render(node.tag),
render(node.contents), render(node.contents, base, directory_names),
render(node.tag) render(node.tag)
)) ))
elif node_type is Heading: elif node_type is Heading:
rendered.append("<h{}>{}</h{}>".format( rendered.append("<h{}>{}</h{}>".format(
node.level, node.level,
render(node.title), render(node.title, base, directory_names),
node.level node.level
)) ))
elif node_type is Text: elif node_type is Text:
@ -165,12 +175,20 @@ def render (wikitext):
return "".join(rendered).strip().replace("\n\n", "<br /><br />") return "".join(rendered).strip().replace("\n\n", "<br /><br />")
def translate_page_title (page_title): def translate_interwiki_title (page_title):
for namespace, url in INTERWIKI_NAMESPACES.items(): for namespace, url in INTERWIKI_NAMESPACES.items():
if page_title.startswith(namespace): if page_title.startswith(namespace):
return url.format(page_title[len(namespace):]) return url.format(page_title[len(namespace):])
return "{}.html".format(reformat_page_title(page_title)) def translate_page_title (page_title, directory_names=[]):
directory_name = ""
for name in directory_names:
if page_title.startswith(f"{name}/"):
directory_name = name
page_title = page_title[len(directory_name) + 1:]
break
return f"{reformat_page_title(directory_name)}{'/' if directory_name else ''}{reformat_page_title(page_title)}.html"
def translate_image_title (page_title): def translate_image_title (page_title):
for namespace in FILE_NAMESPACES: for namespace in FILE_NAMESPACES:
@ -178,6 +196,9 @@ def translate_image_title (page_title):
return reformat_page_title(page_title[len(namespace):]) return reformat_page_title(page_title[len(namespace):])
def reformat_page_title (page_title): def reformat_page_title (page_title):
if not page_title:
return ""
return "{}{}".format(page_title[0].upper(), page_title[1:].replace(" ", "_").replace("/", "%2F")) return "{}{}".format(page_title[0].upper(), page_title[1:].replace(" ", "_").replace("/", "%2F"))
class Template(): class Template():