mwparserfromhell is used for parsing wikitext but it has no support for rendering to HTML so we'll have to build it manually.
54 lines
2.1 KiB
Python
Executable File
54 lines
2.1 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
from urllib.request import urlopen
|
|
from datetime import date
|
|
import json
|
|
|
|
GCL_URL = "https://glitchcity.info/"
|
|
API_ENDPOINT = "/w/api.php?action=query&generator=categorymembers&gcmtitle=Category:{}&prop=info&gcmlimit=100&format=json"
|
|
|
|
CATEGORIES = [
|
|
"Generation I glitches", "Generation II glitches", "Generation III glitches", "Generation IV glitches",
|
|
"Generation I glitch Pokémon", "Generation II glitch Pokémon", "Generation III glitch Pokémon", "Generation IV glitch Pokémon",
|
|
"Pokémon Red and Blue glitch moves", "Pokémon Yellow glitch moves", "Generation II glitch moves",
|
|
"Generation I glitch items", "Generation II glitch items", "Generation IV glitch items",
|
|
"Generation I glitch Trainers", "Generation II glitch Trainers"
|
|
]
|
|
|
|
FILTER_MODIFIED_SINCE_THEN = date(2020, 3, 31)
|
|
|
|
def get_pages_for_category(url, category):
|
|
category = category.replace(" ", "_").replace("é", "%C3%A9")
|
|
pages = {}
|
|
continue_param = None
|
|
while True:
|
|
api_url = url + API_ENDPOINT.format(category)
|
|
|
|
if continue_param:
|
|
api_url = api_url + "&gcmcontinue=" + continue_param
|
|
|
|
with urlopen(api_url) as result:
|
|
result_object = json.loads(result.read())
|
|
pages.update(result_object['query']['pages'])
|
|
|
|
if not "continue" in result_object:
|
|
break
|
|
|
|
continue_param = result_object['continue']['gcmcontinue']
|
|
return pages
|
|
|
|
def filter_page(page):
|
|
touched = date(*[int(value) for value in page['touched'].split("T")[0].split("-")])
|
|
return touched >= FILTER_MODIFIED_SINCE_THEN
|
|
|
|
all_pages = {}
|
|
for category in CATEGORIES:
|
|
category_pages = get_pages_for_category(GCL_URL, category)
|
|
print("--> Found {} total pages in category {}".format(len(category_pages), category))
|
|
all_pages.update(category_pages)
|
|
|
|
print("----> {} total pages to consider".format(len(all_pages)))
|
|
filtered_pages = [page for page in all_pages.values() if filter_page(page)]
|
|
print("----> {} filtered pages".format(len(filtered_pages)))
|
|
|
|
for page in filtered_pages:
|
|
print(page['title']) |