redo main() so it can work with either the local filesystem or urls. rename --url to --in for consistency.

2016-11-27 02:04:44 -06:00
parent 808677b327
commit c52f472091
1 changed files with 7 additions and 9 deletions
--- a/tge/init.py
+++ b/tge/init.py
@@ -2,30 +2,28 @@ import argparse
 from urllib.parse import urlparse

 from . import scrapers, outputters
+from .util import sanitize_title

 def main ():
    parser = argparse.ArgumentParser(description="Forum scraper")
    parser.add_argument("--scraper", dest="scraper", help="Scraper to use; if not specified, tries to guess")
-    parser.add_argument("--url", dest="url", required=True, help="URL to scrape")
+    parser.add_argument("--in", dest="in", required=True, help="URL or file to scrape")
    parser.add_argument("--out", dest="out", help="Path to output; if not specified, is the target forum's url")
    parser.add_argument("--outformat", dest="outformat", help="Format to output data out; if not specified, default (JSON-based) format is used")
    args = parser.parse_args()

-    url = args.url
-    if not (url.startswith("https://") or url.startswith("http://")):
-        url = "http://{}".format(url)
-
-    print("Target URL is: {}".format(url))
+    source = vars(args)['in']
+    print("Source is: {}".format(source))
    if args.scraper:
        scraper = scrapers.get_scraper(args.scraper)
        print("Using scraper: {}".format(scraper.__name__))
    else:
-        scraper = scrapers.guess_scraper(url)
+        scraper = scrapers.guess_scraper(source)
        print("Guessed scraper: {}".format(scraper.__name__))

-    scraped = scraper.scrape(url)
+    scraped = scraper.scrape(source)

-    out = args.out if args.out else urlparse(url).netloc
+    out = args.out if args.out else sanitize_title(scraped.title)
    outformat = args.outformat if args.outformat else "json"
    print("Outputting to: {}, using {} outputter".format(out, outformat))
    outputter = outputters.get_outputter(outformat)