"""The Great Escape is a tool for scraping data from a web forum and exporting it into a format which can be re-imported.""" import argparse from urllib.parse import urlparse from . import scrapers, outputters from .util import sanitize_title def main(): """The Great Escape's entry point.""" parser = argparse.ArgumentParser(description="Forum scraper") parser.add_argument( "--scraper", dest="scraper", help="Scraper to use; if not specified, tries to guess" ) parser.add_argument( "--in", dest="in", required=True, help="URL or file to scrape" ) parser.add_argument( "--out", dest="out", help="Path to output; if not specified, is the target forum's url" ) parser.add_argument( "--outformat", dest="outformat", help="Format to output data out; if not specified, default (JSON-based) format is used" ) args = parser.parse_args() source = vars(args)['in'] print("Source is: {}".format(source)) if args.scraper: scraper = scrapers.get_scraper(args.scraper) print("Using scraper: {}".format(scraper.__name__)) else: scraper = scrapers.guess_scraper(source) print("Guessed scraper: {}".format(scraper.__name__)) scraped = scraper.scrape(source) print(scraped.title) out = args.out if args.out else sanitize_title(scraped.title) outformat = args.outformat if args.outformat else "json" print("Outputting to: {}, using {} outputter".format(out, outformat)) outputter = outputters.get_outputter(outformat) outputter.output(scraped, out)