Add find_pages script to determine through the MediaWiki API which pages need to be looked at.
This commit is contained in:
parent
1a2dbbe65b
commit
191e1ebd13
48
find_pages
Executable file
48
find_pages
Executable file
@ -0,0 +1,48 @@
|
||||
#!/usr/bin/env python3
|
||||
from urllib.request import urlopen
|
||||
from datetime import date
|
||||
import json
|
||||
|
||||
GCL_URL = "https://glitchcity.info/"
|
||||
API_ENDPOINT = "/w/api.php?action=query&generator=categorymembers&gcmtitle=Category:{}&prop=info&gcmlimit=100&format=json"
|
||||
|
||||
CATEGORIES = ["Generation I glitches", "Generation II glitches", "Generation III glitches", "Generation IV glitches"]
|
||||
|
||||
FILTER_MODIFIED_SINCE_THEN = date(2020, 3, 31)
|
||||
|
||||
def get_pages_for_category(url, category):
|
||||
category = category.replace(" ", "_")
|
||||
pages = {}
|
||||
continue_param = None
|
||||
while True:
|
||||
api_url = url + API_ENDPOINT.format(category)
|
||||
|
||||
if continue_param:
|
||||
api_url = api_url + "&gcmcontinue=" + continue_param
|
||||
|
||||
with urlopen(api_url) as result:
|
||||
result_object = json.loads(result.read())
|
||||
pages.update(result_object['query']['pages'])
|
||||
|
||||
if not "continue" in result_object:
|
||||
break
|
||||
|
||||
continue_param = result_object['continue']['gcmcontinue']
|
||||
return pages
|
||||
|
||||
def filter_page(page):
|
||||
touched = date(*[int(value) for value in page['touched'].split("T")[0].split("-")])
|
||||
return touched >= FILTER_MODIFIED_SINCE_THEN
|
||||
|
||||
all_pages = {}
|
||||
for category in CATEGORIES:
|
||||
category_pages = get_pages_for_category(GCL_URL, category)
|
||||
print("--> Found {} total pages in category {}".format(len(category_pages), category))
|
||||
all_pages.update(category_pages)
|
||||
|
||||
print("----> {} total pages to consider".format(len(all_pages)))
|
||||
filtered_pages = [page for page in all_pages.values() if filter_page(page)]
|
||||
print("----> {} filtered pages".format(len(filtered_pages)))
|
||||
|
||||
for page in filtered_pages:
|
||||
print(page['title'])
|
Loading…
x
Reference in New Issue
Block a user