Compare commits
6 Commits
2e73ecd59f
...
ef3f3dd60c
Author | SHA1 | Date | |
---|---|---|---|
ef3f3dd60c | |||
0e3f1274cc | |||
1b7e3ce08b | |||
646b840be4 | |||
a382e6d4fd | |||
ade44491d4 |
@ -1,6 +1,7 @@
|
|||||||
import os
|
import os
|
||||||
import logging
|
import logging
|
||||||
import shutil
|
import shutil
|
||||||
|
import math
|
||||||
|
|
||||||
from itertools import chain
|
from itertools import chain
|
||||||
from traceback import print_exc
|
from traceback import print_exc
|
||||||
@ -9,7 +10,8 @@ import chevron
|
|||||||
import bbcode
|
import bbcode
|
||||||
import html
|
import html
|
||||||
|
|
||||||
from .wiki import Template, Renderer, Linker, reformat_page_title, NAMESPACES as WIKI_NAMESPACES
|
from .forum import DEFAULT_POSTS_PER_PAGE
|
||||||
|
from .wiki import Template, Renderer, Linker, NAMESPACES as WIKI_NAMESPACES
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
logger = logging.getLogger("ArchiveGenerator")
|
logger = logging.getLogger("ArchiveGenerator")
|
||||||
@ -22,6 +24,30 @@ DEX_TYPES = [
|
|||||||
]
|
]
|
||||||
DEXES = list(chain.from_iterable([[f"{dex_type}{language}" for dex_type in DEX_TYPES] for language in DEX_LANGUAGES]))
|
DEXES = list(chain.from_iterable([[f"{dex_type}{language}" for dex_type in DEX_TYPES] for language in DEX_LANGUAGES]))
|
||||||
|
|
||||||
|
class ArchiveLinker(Linker):
|
||||||
|
def __init__ (self, directory_names=[]):
|
||||||
|
super().__init__()
|
||||||
|
self.directory_names = directory_names
|
||||||
|
self.replacements = {
|
||||||
|
"/": "+",
|
||||||
|
#":": ""
|
||||||
|
}
|
||||||
|
|
||||||
|
def translate_page_title (self, page_title):
|
||||||
|
page_title = super().translate_page_title(page_title)
|
||||||
|
|
||||||
|
directory_name = ""
|
||||||
|
for name in self.directory_names:
|
||||||
|
if page_title.startswith(f"{name}/"):
|
||||||
|
directory_name = name
|
||||||
|
page_title = page_title[len(directory_name) + 1:]
|
||||||
|
break
|
||||||
|
|
||||||
|
for key, value in self.replacements.items():
|
||||||
|
page_title = page_title.replace(key, value)
|
||||||
|
|
||||||
|
return f"{directory_name}{'/' if directory_name else ''}{page_title}.html"
|
||||||
|
|
||||||
def prepare_thread (thread):
|
def prepare_thread (thread):
|
||||||
thread.subject = html.unescape(thread.subject)
|
thread.subject = html.unescape(thread.subject)
|
||||||
return thread
|
return thread
|
||||||
@ -61,7 +87,7 @@ class ArchiveGenerator():
|
|||||||
|
|
||||||
categories = {}
|
categories = {}
|
||||||
templates = dict([(page.title.split(":")[1], Template(page.get_latest().text)) for page in wiki.get_pages() if page.namespace == WIKI_NAMESPACES['TEMPLATE']])
|
templates = dict([(page.title.split(":")[1], Template(page.get_latest().text)) for page in wiki.get_pages() if page.namespace == WIKI_NAMESPACES['TEMPLATE']])
|
||||||
linker = Linker(directory_names=DEXES)
|
linker = ArchiveLinker(directory_names=DEXES)
|
||||||
wikitext_renderer = Renderer(templates, linker)
|
wikitext_renderer = Renderer(templates, linker)
|
||||||
for page in wiki.get_pages():
|
for page in wiki.get_pages():
|
||||||
try:
|
try:
|
||||||
@ -107,7 +133,7 @@ class ArchiveGenerator():
|
|||||||
raise e
|
raise e
|
||||||
|
|
||||||
for category, pages in categories.items():
|
for category, pages in categories.items():
|
||||||
category_out = f"Category:{reformat_page_title(category)}.html"
|
category_out = f"Category:{linker.translate_page_title(category)}"
|
||||||
logger.info("Archiving category %s to %s", category, category_out)
|
logger.info("Archiving category %s to %s", category, category_out)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -166,6 +192,8 @@ class ArchiveGenerator():
|
|||||||
"target": "page-0.html"
|
"target": "page-0.html"
|
||||||
})
|
})
|
||||||
|
|
||||||
|
total_pages = math.ceil((thread.num_replies + 1) / DEFAULT_POSTS_PER_PAGE)
|
||||||
|
page_links = [{"label": page + 1, "link": f"page-{page}.html"} for page in range(total_pages)]
|
||||||
page = 0
|
page = 0
|
||||||
while True:
|
while True:
|
||||||
posts = [prepare_post(post) for post in forum.get_posts_in_thread(thread, page)]
|
posts = [prepare_post(post) for post in forum.get_posts_in_thread(thread, page)]
|
||||||
@ -180,6 +208,7 @@ class ArchiveGenerator():
|
|||||||
"thread": thread,
|
"thread": thread,
|
||||||
"page": page,
|
"page": page,
|
||||||
"next": page + 1,
|
"next": page + 1,
|
||||||
|
"page_links": page_links,
|
||||||
"prev": page - 1,
|
"prev": page - 1,
|
||||||
"posts": posts
|
"posts": posts
|
||||||
})
|
})
|
||||||
|
@ -23,6 +23,9 @@ GET_POSTS = """
|
|||||||
LIMIT ? OFFSET ?
|
LIMIT ? OFFSET ?
|
||||||
""".format(PREFIX)
|
""".format(PREFIX)
|
||||||
|
|
||||||
|
DEFAULT_POSTS_PER_PAGE = 15
|
||||||
|
DEFAULT_THREADS_PER_PAGE = 2000
|
||||||
|
|
||||||
def fix_encoding (string):
|
def fix_encoding (string):
|
||||||
return string.encode("latin1", errors="ignore").decode(errors="ignore")
|
return string.encode("latin1", errors="ignore").decode(errors="ignore")
|
||||||
|
|
||||||
@ -50,7 +53,7 @@ class Forum():
|
|||||||
cursor.execute(GET_BOARDS)
|
cursor.execute(GET_BOARDS)
|
||||||
return [Board(board) for board in cursor.fetchall()]
|
return [Board(board) for board in cursor.fetchall()]
|
||||||
|
|
||||||
def get_threads_in_board (self, board, page=0, per_page=2000):
|
def get_threads_in_board (self, board, page=0, per_page=DEFAULT_THREADS_PER_PAGE):
|
||||||
try:
|
try:
|
||||||
board = board.id
|
board = board.id
|
||||||
except ValueError: pass
|
except ValueError: pass
|
||||||
@ -58,7 +61,7 @@ class Forum():
|
|||||||
cursor.execute(GET_THREADS, (board, per_page, page * per_page))
|
cursor.execute(GET_THREADS, (board, per_page, page * per_page))
|
||||||
return [Thread(thread) for thread in cursor.fetchall()]
|
return [Thread(thread) for thread in cursor.fetchall()]
|
||||||
|
|
||||||
def get_posts_in_thread (self, thread, page=0, per_page=15):
|
def get_posts_in_thread (self, thread, page=0, per_page=DEFAULT_POSTS_PER_PAGE):
|
||||||
try:
|
try:
|
||||||
thread = thread.id
|
thread = thread.id
|
||||||
except ValueError: pass
|
except ValueError: pass
|
||||||
@ -89,6 +92,7 @@ class Thread():
|
|||||||
self.datetime = datetime.fromtimestamp(row['poster_time'])
|
self.datetime = datetime.fromtimestamp(row['poster_time'])
|
||||||
self.subject = fix_encoding(row['subject'])
|
self.subject = fix_encoding(row['subject'])
|
||||||
self.poster_name = fix_encoding(row['poster_name'])
|
self.poster_name = fix_encoding(row['poster_name'])
|
||||||
|
self.num_replies = row['num_replies']
|
||||||
|
|
||||||
class Post():
|
class Post():
|
||||||
def __init__ (self, row):
|
def __init__ (self, row):
|
||||||
|
@ -1,39 +1,33 @@
|
|||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
from flask import Flask, redirect
|
from .archive_generator import ArchiveLinker, DEXES
|
||||||
|
|
||||||
|
from flask import Flask, redirect, request
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
|
|
||||||
def is_wiki_directory_name (name):
|
## Wiki redirector
|
||||||
return "Dex" in name
|
@app.route("/wiki/<path:path>")
|
||||||
|
def redirect_wiki (path):
|
||||||
def escape_wiki_page_name (page_name):
|
return redirect(make_wiki_url(path))
|
||||||
page_name = page_name[0].upper() + page_name[1:].replace(" ", "_")
|
|
||||||
|
|
||||||
if page_name.endswith("/"):
|
|
||||||
page_name = page_name[:-1]
|
|
||||||
|
|
||||||
if "/" in page_name:
|
|
||||||
(prefix, suffix) = page_name.split("/", 1)
|
|
||||||
suffix = suffix.replace("/", "%2F")
|
|
||||||
page_name = prefix + ("/" if is_wiki_directory_name(prefix) else "%2F") + suffix
|
|
||||||
|
|
||||||
return page_name
|
|
||||||
|
|
||||||
def make_wiki_url (path):
|
def make_wiki_url (path):
|
||||||
url = app.args.wiki_archive
|
url = app.args.wiki_archive
|
||||||
|
|
||||||
|
if path.endswith("/"):
|
||||||
|
path = path[:-1]
|
||||||
|
|
||||||
if not url.endswith("/"):
|
if not url.endswith("/"):
|
||||||
url = url + "/"
|
url = url + "/"
|
||||||
|
|
||||||
return url + escape_wiki_page_name(path) + ".html"
|
return url + app.wiki_linker.translate_page_title(path)
|
||||||
|
|
||||||
|
## Forum redirector
|
||||||
@app.route('/forums/<path:path>')
|
@app.route('/forums/<path:path>')
|
||||||
def redirect_forums (path):
|
def redirect_forums (path):
|
||||||
pass
|
return redirect(make_forum_url(path))
|
||||||
|
|
||||||
@app.route("/wiki/<path:path>")
|
def make_forum_url (request):
|
||||||
def redirect_wiki (path):
|
return str(request)
|
||||||
return redirect(make_wiki_url(path))
|
|
||||||
|
|
||||||
def main ():
|
def main ():
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
@ -41,4 +35,5 @@ def main ():
|
|||||||
parser.add_argument("--forums-archive", help="URL to forums archive")
|
parser.add_argument("--forums-archive", help="URL to forums archive")
|
||||||
|
|
||||||
app.args = parser.parse_args()
|
app.args = parser.parse_args()
|
||||||
|
app.wiki_linker = ArchiveLinker(directory_names=DEXES)
|
||||||
app.run()
|
app.run()
|
@ -106,9 +106,9 @@ class Renderer():
|
|||||||
if categories:
|
if categories:
|
||||||
rendered.append('<h2>Categories</h2><ul class="categories">')
|
rendered.append('<h2>Categories</h2><ul class="categories">')
|
||||||
for category in categories:
|
for category in categories:
|
||||||
rendered.append('<li><a href="{}Category:{}.html">{}</a></li>'.format(
|
rendered.append('<li><a href="{}Category:{}">{}</a></li>'.format(
|
||||||
base,
|
base,
|
||||||
reformat_page_title(category),
|
self.linker.translate_page_title(category),
|
||||||
category
|
category
|
||||||
))
|
))
|
||||||
rendered.append("</ul>")
|
rendered.append("</ul>")
|
||||||
@ -181,10 +181,9 @@ def render (wikitext, base="", linker=None):
|
|||||||
return "".join(rendered).strip().replace("\n\n", "<br /><br />")
|
return "".join(rendered).strip().replace("\n\n", "<br /><br />")
|
||||||
|
|
||||||
class Linker():
|
class Linker():
|
||||||
def __init__ (self, file_namespaces=FILE_NAMESPACES, interwiki_namespaces=INTERWIKI_NAMESPACES, directory_names=[]):
|
def __init__ (self, file_namespaces=FILE_NAMESPACES, interwiki_namespaces=INTERWIKI_NAMESPACES):
|
||||||
self.file_namespaces = file_namespaces
|
self.file_namespaces = file_namespaces
|
||||||
self.interwiki_namespaces = interwiki_namespaces
|
self.interwiki_namespaces = interwiki_namespaces
|
||||||
self.directory_names = directory_names
|
|
||||||
|
|
||||||
def translate_interwiki_title (self, page_title):
|
def translate_interwiki_title (self, page_title):
|
||||||
for namespace, url in self.interwiki_namespaces.items():
|
for namespace, url in self.interwiki_namespaces.items():
|
||||||
@ -194,15 +193,8 @@ class Linker():
|
|||||||
def translate_page_title (self, page_title):
|
def translate_page_title (self, page_title):
|
||||||
if page_title.startswith(CATEGORY_LINK_NAMESPACE):
|
if page_title.startswith(CATEGORY_LINK_NAMESPACE):
|
||||||
page_title = page_title[1:]
|
page_title = page_title[1:]
|
||||||
|
|
||||||
directory_name = ""
|
|
||||||
for name in self.directory_names:
|
|
||||||
if page_title.startswith(f"{name}/"):
|
|
||||||
directory_name = name
|
|
||||||
page_title = page_title[len(directory_name) + 1:]
|
|
||||||
break
|
|
||||||
|
|
||||||
return f"{reformat_page_title(directory_name)}{'/' if directory_name else ''}{reformat_page_title(page_title)}.html"
|
return reformat_page_title(page_title)
|
||||||
|
|
||||||
def translate_image_title (self, page_title):
|
def translate_image_title (self, page_title):
|
||||||
for namespace in self.file_namespaces:
|
for namespace in self.file_namespaces:
|
||||||
@ -213,7 +205,7 @@ def reformat_page_title (page_title):
|
|||||||
if not page_title:
|
if not page_title:
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
return f"{page_title[0].upper()}{page_title[1:].replace(' ', '_').replace('/', '%2F')}"
|
return f"{page_title[0].upper()}{page_title[1:].replace(' ', '_')}"
|
||||||
|
|
||||||
class Template():
|
class Template():
|
||||||
def __init__ (self, wikicode):
|
def __init__ (self, wikicode):
|
||||||
|
@ -68,6 +68,7 @@ TOPICS_DUMP = "threads.sql"
|
|||||||
# Categories we are not interested in archiving.
|
# Categories we are not interested in archiving.
|
||||||
# `id_cat` in (1, 2)
|
# `id_cat` in (1, 2)
|
||||||
DO_NOT_ARCHIVE_CATEGORIES = [
|
DO_NOT_ARCHIVE_CATEGORIES = [
|
||||||
|
7, # Links
|
||||||
12, # Epsilon: ?????
|
12, # Epsilon: ?????
|
||||||
6, # Sigma: Higher Access
|
6, # Sigma: Higher Access
|
||||||
8 # Omega: Garbage
|
8 # Omega: Garbage
|
||||||
@ -76,17 +77,21 @@ DO_NOT_ARCHIVE_CATEGORIES = [
|
|||||||
# Boards we are not interested in archiving.
|
# Boards we are not interested in archiving.
|
||||||
# `id_board` in (1, 2)
|
# `id_board` in (1, 2)
|
||||||
DO_NOT_ARCHIVE_BOARDS = [
|
DO_NOT_ARCHIVE_BOARDS = [
|
||||||
40, # Exclusive Board
|
24, 94, 118, 121 # Links
|
||||||
65, # Requests for Moderatorship
|
40, # Exclusive Board
|
||||||
66, # Requests for Membership+
|
65, # Requests for Moderatorship
|
||||||
67, # Requests for Distinguished Membership
|
66, # Requests for Membership+
|
||||||
23, # M.A.S.K. HQ (Staff Board)
|
67, # Requests for Distinguished Membership
|
||||||
22, # Admins Only Board
|
23, # M.A.S.K. HQ (Staff Board)
|
||||||
89, # Test Board
|
22, # Admins Only Board
|
||||||
86, # Omega Archives
|
89, # Test Board
|
||||||
51, 37, 79, 26, 47, 44, 99, 93, 119, 96,
|
86, # Omega Archives
|
||||||
28, # The Dumpster Out Back
|
51, 37, 79, 26, 47, 44, 45, 99, 93, 119, 96,
|
||||||
123 # ?????
|
62, # Submit-A-Glitch Archives
|
||||||
|
3, 4, 5, 57, 58, 59, 38, 54, 63, 64,
|
||||||
|
68, 69, 70, 81, 82, 83,
|
||||||
|
28, # The Dumpster Out Back
|
||||||
|
123 # ?????
|
||||||
]
|
]
|
||||||
|
|
||||||
# Regexes for sensitive information
|
# Regexes for sensitive information
|
||||||
|
@ -11,7 +11,11 @@ ul.boards { margin-left: 0; padding-left: 0; }
|
|||||||
.label { font-weight: bold }
|
.label { font-weight: bold }
|
||||||
article { border-top: 1px solid black; }
|
article { border-top: 1px solid black; }
|
||||||
section { margin-top: 15px; margin-bottom: 15px; }
|
section { margin-top: 15px; margin-bottom: 15px; }
|
||||||
|
|
||||||
.next { float: right; }
|
.next { float: right; }
|
||||||
|
.pagination { margin-bottom: 10px; }
|
||||||
|
.pagination ul { list-style-type: none; margin-left: 0; padding-left: 0; display: inline; }
|
||||||
|
.pagination li { display: inline; }
|
||||||
|
|
||||||
.page { padding-top: 15px; }
|
.page { padding-top: 15px; }
|
||||||
.page table { width: 100%; }
|
.page table { width: 100%; }
|
@ -1,4 +1,9 @@
|
|||||||
<div class="pagination">
|
<div class="pagination">
|
||||||
<a class="prev" href="page-{{prev}}.html">Previous Page</a>
|
<a class="prev" href="page-{{prev}}.html">Previous Page</a>
|
||||||
|
<ul>
|
||||||
|
{{#page_links}}
|
||||||
|
<li><a href="{{link}}">{{label}}</a></li>
|
||||||
|
{{/page_links}}
|
||||||
|
</ul>
|
||||||
<a class="next" href="page-{{next}}.html">Next Page</a>
|
<a class="next" href="page-{{next}}.html">Next Page</a>
|
||||||
</div>
|
</div>
|
@ -3,14 +3,16 @@
|
|||||||
<table id="threads">
|
<table id="threads">
|
||||||
<tr>
|
<tr>
|
||||||
<th>Title</th>
|
<th>Title</th>
|
||||||
<th>Poster</th>
|
<th>Poster</th>
|
||||||
<th>Date</th>
|
<th>Date</th>
|
||||||
|
<th>Replies</th>
|
||||||
</tr>
|
</tr>
|
||||||
{{#threads}}
|
{{#threads}}
|
||||||
<tr>
|
<tr>
|
||||||
<td class="thread-subject"><a href="thread-{{id}}">{{subject}}</a></td>
|
<td class="thread-subject"><a href="thread-{{id}}">{{subject}}</a></td>
|
||||||
<td class="thread-poster">{{poster_name}}</td>
|
<td class="thread-poster">{{poster_name}}</td>
|
||||||
<td class="thread-date">{{datetime}}</td>
|
<td class="thread-date">{{datetime}}</td>
|
||||||
|
<td class="replies">{{num_replies}}</td>
|
||||||
</tr>
|
</tr>
|
||||||
{{/threads}}
|
{{/threads}}
|
||||||
</table>
|
</table>
|
||||||
|
Loading…
x
Reference in New Issue
Block a user