Compare commits
No commits in common. "ef3f3dd60c35c75947c0cf80dd2ccae6d24952ee" and "2e73ecd59ffe848f1ef334862d774462e2f1f991" have entirely different histories.
ef3f3dd60c
...
2e73ecd59f
@ -1,7 +1,6 @@
|
|||||||
import os
|
import os
|
||||||
import logging
|
import logging
|
||||||
import shutil
|
import shutil
|
||||||
import math
|
|
||||||
|
|
||||||
from itertools import chain
|
from itertools import chain
|
||||||
from traceback import print_exc
|
from traceback import print_exc
|
||||||
@ -10,8 +9,7 @@ import chevron
|
|||||||
import bbcode
|
import bbcode
|
||||||
import html
|
import html
|
||||||
|
|
||||||
from .forum import DEFAULT_POSTS_PER_PAGE
|
from .wiki import Template, Renderer, Linker, reformat_page_title, NAMESPACES as WIKI_NAMESPACES
|
||||||
from .wiki import Template, Renderer, Linker, NAMESPACES as WIKI_NAMESPACES
|
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
logger = logging.getLogger("ArchiveGenerator")
|
logger = logging.getLogger("ArchiveGenerator")
|
||||||
@ -24,30 +22,6 @@ DEX_TYPES = [
|
|||||||
]
|
]
|
||||||
DEXES = list(chain.from_iterable([[f"{dex_type}{language}" for dex_type in DEX_TYPES] for language in DEX_LANGUAGES]))
|
DEXES = list(chain.from_iterable([[f"{dex_type}{language}" for dex_type in DEX_TYPES] for language in DEX_LANGUAGES]))
|
||||||
|
|
||||||
class ArchiveLinker(Linker):
|
|
||||||
def __init__ (self, directory_names=[]):
|
|
||||||
super().__init__()
|
|
||||||
self.directory_names = directory_names
|
|
||||||
self.replacements = {
|
|
||||||
"/": "+",
|
|
||||||
#":": ""
|
|
||||||
}
|
|
||||||
|
|
||||||
def translate_page_title (self, page_title):
|
|
||||||
page_title = super().translate_page_title(page_title)
|
|
||||||
|
|
||||||
directory_name = ""
|
|
||||||
for name in self.directory_names:
|
|
||||||
if page_title.startswith(f"{name}/"):
|
|
||||||
directory_name = name
|
|
||||||
page_title = page_title[len(directory_name) + 1:]
|
|
||||||
break
|
|
||||||
|
|
||||||
for key, value in self.replacements.items():
|
|
||||||
page_title = page_title.replace(key, value)
|
|
||||||
|
|
||||||
return f"{directory_name}{'/' if directory_name else ''}{page_title}.html"
|
|
||||||
|
|
||||||
def prepare_thread (thread):
|
def prepare_thread (thread):
|
||||||
thread.subject = html.unescape(thread.subject)
|
thread.subject = html.unescape(thread.subject)
|
||||||
return thread
|
return thread
|
||||||
@ -87,7 +61,7 @@ class ArchiveGenerator():
|
|||||||
|
|
||||||
categories = {}
|
categories = {}
|
||||||
templates = dict([(page.title.split(":")[1], Template(page.get_latest().text)) for page in wiki.get_pages() if page.namespace == WIKI_NAMESPACES['TEMPLATE']])
|
templates = dict([(page.title.split(":")[1], Template(page.get_latest().text)) for page in wiki.get_pages() if page.namespace == WIKI_NAMESPACES['TEMPLATE']])
|
||||||
linker = ArchiveLinker(directory_names=DEXES)
|
linker = Linker(directory_names=DEXES)
|
||||||
wikitext_renderer = Renderer(templates, linker)
|
wikitext_renderer = Renderer(templates, linker)
|
||||||
for page in wiki.get_pages():
|
for page in wiki.get_pages():
|
||||||
try:
|
try:
|
||||||
@ -133,7 +107,7 @@ class ArchiveGenerator():
|
|||||||
raise e
|
raise e
|
||||||
|
|
||||||
for category, pages in categories.items():
|
for category, pages in categories.items():
|
||||||
category_out = f"Category:{linker.translate_page_title(category)}"
|
category_out = f"Category:{reformat_page_title(category)}.html"
|
||||||
logger.info("Archiving category %s to %s", category, category_out)
|
logger.info("Archiving category %s to %s", category, category_out)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -192,8 +166,6 @@ class ArchiveGenerator():
|
|||||||
"target": "page-0.html"
|
"target": "page-0.html"
|
||||||
})
|
})
|
||||||
|
|
||||||
total_pages = math.ceil((thread.num_replies + 1) / DEFAULT_POSTS_PER_PAGE)
|
|
||||||
page_links = [{"label": page + 1, "link": f"page-{page}.html"} for page in range(total_pages)]
|
|
||||||
page = 0
|
page = 0
|
||||||
while True:
|
while True:
|
||||||
posts = [prepare_post(post) for post in forum.get_posts_in_thread(thread, page)]
|
posts = [prepare_post(post) for post in forum.get_posts_in_thread(thread, page)]
|
||||||
@ -208,7 +180,6 @@ class ArchiveGenerator():
|
|||||||
"thread": thread,
|
"thread": thread,
|
||||||
"page": page,
|
"page": page,
|
||||||
"next": page + 1,
|
"next": page + 1,
|
||||||
"page_links": page_links,
|
|
||||||
"prev": page - 1,
|
"prev": page - 1,
|
||||||
"posts": posts
|
"posts": posts
|
||||||
})
|
})
|
||||||
|
@ -23,9 +23,6 @@ GET_POSTS = """
|
|||||||
LIMIT ? OFFSET ?
|
LIMIT ? OFFSET ?
|
||||||
""".format(PREFIX)
|
""".format(PREFIX)
|
||||||
|
|
||||||
DEFAULT_POSTS_PER_PAGE = 15
|
|
||||||
DEFAULT_THREADS_PER_PAGE = 2000
|
|
||||||
|
|
||||||
def fix_encoding (string):
|
def fix_encoding (string):
|
||||||
return string.encode("latin1", errors="ignore").decode(errors="ignore")
|
return string.encode("latin1", errors="ignore").decode(errors="ignore")
|
||||||
|
|
||||||
@ -53,7 +50,7 @@ class Forum():
|
|||||||
cursor.execute(GET_BOARDS)
|
cursor.execute(GET_BOARDS)
|
||||||
return [Board(board) for board in cursor.fetchall()]
|
return [Board(board) for board in cursor.fetchall()]
|
||||||
|
|
||||||
def get_threads_in_board (self, board, page=0, per_page=DEFAULT_THREADS_PER_PAGE):
|
def get_threads_in_board (self, board, page=0, per_page=2000):
|
||||||
try:
|
try:
|
||||||
board = board.id
|
board = board.id
|
||||||
except ValueError: pass
|
except ValueError: pass
|
||||||
@ -61,7 +58,7 @@ class Forum():
|
|||||||
cursor.execute(GET_THREADS, (board, per_page, page * per_page))
|
cursor.execute(GET_THREADS, (board, per_page, page * per_page))
|
||||||
return [Thread(thread) for thread in cursor.fetchall()]
|
return [Thread(thread) for thread in cursor.fetchall()]
|
||||||
|
|
||||||
def get_posts_in_thread (self, thread, page=0, per_page=DEFAULT_POSTS_PER_PAGE):
|
def get_posts_in_thread (self, thread, page=0, per_page=15):
|
||||||
try:
|
try:
|
||||||
thread = thread.id
|
thread = thread.id
|
||||||
except ValueError: pass
|
except ValueError: pass
|
||||||
@ -92,7 +89,6 @@ class Thread():
|
|||||||
self.datetime = datetime.fromtimestamp(row['poster_time'])
|
self.datetime = datetime.fromtimestamp(row['poster_time'])
|
||||||
self.subject = fix_encoding(row['subject'])
|
self.subject = fix_encoding(row['subject'])
|
||||||
self.poster_name = fix_encoding(row['poster_name'])
|
self.poster_name = fix_encoding(row['poster_name'])
|
||||||
self.num_replies = row['num_replies']
|
|
||||||
|
|
||||||
class Post():
|
class Post():
|
||||||
def __init__ (self, row):
|
def __init__ (self, row):
|
||||||
|
@ -1,33 +1,39 @@
|
|||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
from .archive_generator import ArchiveLinker, DEXES
|
from flask import Flask, redirect
|
||||||
|
|
||||||
from flask import Flask, redirect, request
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
|
|
||||||
## Wiki redirector
|
def is_wiki_directory_name (name):
|
||||||
@app.route("/wiki/<path:path>")
|
return "Dex" in name
|
||||||
def redirect_wiki (path):
|
|
||||||
return redirect(make_wiki_url(path))
|
def escape_wiki_page_name (page_name):
|
||||||
|
page_name = page_name[0].upper() + page_name[1:].replace(" ", "_")
|
||||||
|
|
||||||
|
if page_name.endswith("/"):
|
||||||
|
page_name = page_name[:-1]
|
||||||
|
|
||||||
|
if "/" in page_name:
|
||||||
|
(prefix, suffix) = page_name.split("/", 1)
|
||||||
|
suffix = suffix.replace("/", "%2F")
|
||||||
|
page_name = prefix + ("/" if is_wiki_directory_name(prefix) else "%2F") + suffix
|
||||||
|
|
||||||
|
return page_name
|
||||||
|
|
||||||
def make_wiki_url (path):
|
def make_wiki_url (path):
|
||||||
url = app.args.wiki_archive
|
url = app.args.wiki_archive
|
||||||
|
|
||||||
if path.endswith("/"):
|
|
||||||
path = path[:-1]
|
|
||||||
|
|
||||||
if not url.endswith("/"):
|
if not url.endswith("/"):
|
||||||
url = url + "/"
|
url = url + "/"
|
||||||
|
|
||||||
return url + app.wiki_linker.translate_page_title(path)
|
return url + escape_wiki_page_name(path) + ".html"
|
||||||
|
|
||||||
## Forum redirector
|
|
||||||
@app.route('/forums/<path:path>')
|
@app.route('/forums/<path:path>')
|
||||||
def redirect_forums (path):
|
def redirect_forums (path):
|
||||||
return redirect(make_forum_url(path))
|
pass
|
||||||
|
|
||||||
def make_forum_url (request):
|
@app.route("/wiki/<path:path>")
|
||||||
return str(request)
|
def redirect_wiki (path):
|
||||||
|
return redirect(make_wiki_url(path))
|
||||||
|
|
||||||
def main ():
|
def main ():
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
@ -35,5 +41,4 @@ def main ():
|
|||||||
parser.add_argument("--forums-archive", help="URL to forums archive")
|
parser.add_argument("--forums-archive", help="URL to forums archive")
|
||||||
|
|
||||||
app.args = parser.parse_args()
|
app.args = parser.parse_args()
|
||||||
app.wiki_linker = ArchiveLinker(directory_names=DEXES)
|
|
||||||
app.run()
|
app.run()
|
@ -106,9 +106,9 @@ class Renderer():
|
|||||||
if categories:
|
if categories:
|
||||||
rendered.append('<h2>Categories</h2><ul class="categories">')
|
rendered.append('<h2>Categories</h2><ul class="categories">')
|
||||||
for category in categories:
|
for category in categories:
|
||||||
rendered.append('<li><a href="{}Category:{}">{}</a></li>'.format(
|
rendered.append('<li><a href="{}Category:{}.html">{}</a></li>'.format(
|
||||||
base,
|
base,
|
||||||
self.linker.translate_page_title(category),
|
reformat_page_title(category),
|
||||||
category
|
category
|
||||||
))
|
))
|
||||||
rendered.append("</ul>")
|
rendered.append("</ul>")
|
||||||
@ -181,9 +181,10 @@ def render (wikitext, base="", linker=None):
|
|||||||
return "".join(rendered).strip().replace("\n\n", "<br /><br />")
|
return "".join(rendered).strip().replace("\n\n", "<br /><br />")
|
||||||
|
|
||||||
class Linker():
|
class Linker():
|
||||||
def __init__ (self, file_namespaces=FILE_NAMESPACES, interwiki_namespaces=INTERWIKI_NAMESPACES):
|
def __init__ (self, file_namespaces=FILE_NAMESPACES, interwiki_namespaces=INTERWIKI_NAMESPACES, directory_names=[]):
|
||||||
self.file_namespaces = file_namespaces
|
self.file_namespaces = file_namespaces
|
||||||
self.interwiki_namespaces = interwiki_namespaces
|
self.interwiki_namespaces = interwiki_namespaces
|
||||||
|
self.directory_names = directory_names
|
||||||
|
|
||||||
def translate_interwiki_title (self, page_title):
|
def translate_interwiki_title (self, page_title):
|
||||||
for namespace, url in self.interwiki_namespaces.items():
|
for namespace, url in self.interwiki_namespaces.items():
|
||||||
@ -194,7 +195,14 @@ class Linker():
|
|||||||
if page_title.startswith(CATEGORY_LINK_NAMESPACE):
|
if page_title.startswith(CATEGORY_LINK_NAMESPACE):
|
||||||
page_title = page_title[1:]
|
page_title = page_title[1:]
|
||||||
|
|
||||||
return reformat_page_title(page_title)
|
directory_name = ""
|
||||||
|
for name in self.directory_names:
|
||||||
|
if page_title.startswith(f"{name}/"):
|
||||||
|
directory_name = name
|
||||||
|
page_title = page_title[len(directory_name) + 1:]
|
||||||
|
break
|
||||||
|
|
||||||
|
return f"{reformat_page_title(directory_name)}{'/' if directory_name else ''}{reformat_page_title(page_title)}.html"
|
||||||
|
|
||||||
def translate_image_title (self, page_title):
|
def translate_image_title (self, page_title):
|
||||||
for namespace in self.file_namespaces:
|
for namespace in self.file_namespaces:
|
||||||
@ -205,7 +213,7 @@ def reformat_page_title (page_title):
|
|||||||
if not page_title:
|
if not page_title:
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
return f"{page_title[0].upper()}{page_title[1:].replace(' ', '_')}"
|
return f"{page_title[0].upper()}{page_title[1:].replace(' ', '_').replace('/', '%2F')}"
|
||||||
|
|
||||||
class Template():
|
class Template():
|
||||||
def __init__ (self, wikicode):
|
def __init__ (self, wikicode):
|
||||||
|
@ -68,7 +68,6 @@ TOPICS_DUMP = "threads.sql"
|
|||||||
# Categories we are not interested in archiving.
|
# Categories we are not interested in archiving.
|
||||||
# `id_cat` in (1, 2)
|
# `id_cat` in (1, 2)
|
||||||
DO_NOT_ARCHIVE_CATEGORIES = [
|
DO_NOT_ARCHIVE_CATEGORIES = [
|
||||||
7, # Links
|
|
||||||
12, # Epsilon: ?????
|
12, # Epsilon: ?????
|
||||||
6, # Sigma: Higher Access
|
6, # Sigma: Higher Access
|
||||||
8 # Omega: Garbage
|
8 # Omega: Garbage
|
||||||
@ -77,21 +76,17 @@ DO_NOT_ARCHIVE_CATEGORIES = [
|
|||||||
# Boards we are not interested in archiving.
|
# Boards we are not interested in archiving.
|
||||||
# `id_board` in (1, 2)
|
# `id_board` in (1, 2)
|
||||||
DO_NOT_ARCHIVE_BOARDS = [
|
DO_NOT_ARCHIVE_BOARDS = [
|
||||||
24, 94, 118, 121 # Links
|
40, # Exclusive Board
|
||||||
40, # Exclusive Board
|
65, # Requests for Moderatorship
|
||||||
65, # Requests for Moderatorship
|
66, # Requests for Membership+
|
||||||
66, # Requests for Membership+
|
67, # Requests for Distinguished Membership
|
||||||
67, # Requests for Distinguished Membership
|
23, # M.A.S.K. HQ (Staff Board)
|
||||||
23, # M.A.S.K. HQ (Staff Board)
|
22, # Admins Only Board
|
||||||
22, # Admins Only Board
|
89, # Test Board
|
||||||
89, # Test Board
|
86, # Omega Archives
|
||||||
86, # Omega Archives
|
51, 37, 79, 26, 47, 44, 99, 93, 119, 96,
|
||||||
51, 37, 79, 26, 47, 44, 45, 99, 93, 119, 96,
|
28, # The Dumpster Out Back
|
||||||
62, # Submit-A-Glitch Archives
|
123 # ?????
|
||||||
3, 4, 5, 57, 58, 59, 38, 54, 63, 64,
|
|
||||||
68, 69, 70, 81, 82, 83,
|
|
||||||
28, # The Dumpster Out Back
|
|
||||||
123 # ?????
|
|
||||||
]
|
]
|
||||||
|
|
||||||
# Regexes for sensitive information
|
# Regexes for sensitive information
|
||||||
|
@ -11,11 +11,7 @@ ul.boards { margin-left: 0; padding-left: 0; }
|
|||||||
.label { font-weight: bold }
|
.label { font-weight: bold }
|
||||||
article { border-top: 1px solid black; }
|
article { border-top: 1px solid black; }
|
||||||
section { margin-top: 15px; margin-bottom: 15px; }
|
section { margin-top: 15px; margin-bottom: 15px; }
|
||||||
|
|
||||||
.next { float: right; }
|
.next { float: right; }
|
||||||
.pagination { margin-bottom: 10px; }
|
|
||||||
.pagination ul { list-style-type: none; margin-left: 0; padding-left: 0; display: inline; }
|
|
||||||
.pagination li { display: inline; }
|
|
||||||
|
|
||||||
.page { padding-top: 15px; }
|
.page { padding-top: 15px; }
|
||||||
.page table { width: 100%; }
|
.page table { width: 100%; }
|
@ -1,9 +1,4 @@
|
|||||||
<div class="pagination">
|
<div class="pagination">
|
||||||
<a class="prev" href="page-{{prev}}.html">Previous Page</a>
|
<a class="prev" href="page-{{prev}}.html">Previous Page</a>
|
||||||
<ul>
|
|
||||||
{{#page_links}}
|
|
||||||
<li><a href="{{link}}">{{label}}</a></li>
|
|
||||||
{{/page_links}}
|
|
||||||
</ul>
|
|
||||||
<a class="next" href="page-{{next}}.html">Next Page</a>
|
<a class="next" href="page-{{next}}.html">Next Page</a>
|
||||||
</div>
|
</div>
|
@ -5,14 +5,12 @@
|
|||||||
<th>Title</th>
|
<th>Title</th>
|
||||||
<th>Poster</th>
|
<th>Poster</th>
|
||||||
<th>Date</th>
|
<th>Date</th>
|
||||||
<th>Replies</th>
|
|
||||||
</tr>
|
</tr>
|
||||||
{{#threads}}
|
{{#threads}}
|
||||||
<tr>
|
<tr>
|
||||||
<td class="thread-subject"><a href="thread-{{id}}">{{subject}}</a></td>
|
<td class="thread-subject"><a href="thread-{{id}}">{{subject}}</a></td>
|
||||||
<td class="thread-poster">{{poster_name}}</td>
|
<td class="thread-poster">{{poster_name}}</td>
|
||||||
<td class="thread-date">{{datetime}}</td>
|
<td class="thread-date">{{datetime}}</td>
|
||||||
<td class="replies">{{num_replies}}</td>
|
|
||||||
</tr>
|
</tr>
|
||||||
{{/threads}}
|
{{/threads}}
|
||||||
</table>
|
</table>
|
||||||
|
Loading…
x
Reference in New Issue
Block a user