#!/usr/bin/env python3 from urllib.request import urlopen from datetime import date import json GCL_URL = "https://glitchcity.info/" API_ENDPOINT = "/w/api.php?action=query&generator=categorymembers&gcmtitle=Category:{}&prop=info&gcmlimit=100&format=json" CATEGORIES = ["Generation I glitches", "Generation II glitches", "Generation III glitches", "Generation IV glitches"] FILTER_MODIFIED_SINCE_THEN = date(2020, 3, 31) def get_pages_for_category(url, category): category = category.replace(" ", "_") pages = {} continue_param = None while True: api_url = url + API_ENDPOINT.format(category) if continue_param: api_url = api_url + "&gcmcontinue=" + continue_param with urlopen(api_url) as result: result_object = json.loads(result.read()) pages.update(result_object['query']['pages']) if not "continue" in result_object: break continue_param = result_object['continue']['gcmcontinue'] return pages def filter_page(page): touched = date(*[int(value) for value in page['touched'].split("T")[0].split("-")]) return touched >= FILTER_MODIFIED_SINCE_THEN all_pages = {} for category in CATEGORIES: category_pages = get_pages_for_category(GCL_URL, category) print("--> Found {} total pages in category {}".format(len(category_pages), category)) all_pages.update(category_pages) print("----> {} total pages to consider".format(len(all_pages))) filtered_pages = [page for page in all_pages.values() if filter_page(page)] print("----> {} filtered pages".format(len(filtered_pages))) for page in filtered_pages: print(page['title'])