2016-12-16 00:29:59 -06:00
|
|
|
"""Scrapers accept an input located somewhere (at a URL or local file)
|
|
|
|
and scrape them into objects, which can be dumped by an outputter."""
|
|
|
|
|
2016-11-27 01:58:25 -06:00
|
|
|
from . import yuku, pickle
|
2016-11-26 23:09:12 -06:00
|
|
|
|
2016-12-16 00:29:59 -06:00
|
|
|
SCRAPERS = [yuku, pickle]
|
2016-11-26 23:09:12 -06:00
|
|
|
|
2016-12-16 00:29:59 -06:00
|
|
|
def get_scraper(name):
|
|
|
|
"""Get the scraper with the specified name."""
|
|
|
|
for scraper in SCRAPERS:
|
2016-11-26 23:09:12 -06:00
|
|
|
if scraper.__name__.endswith(".{}".format(name)):
|
|
|
|
return scraper
|
|
|
|
|
|
|
|
raise Exception("Unknown scraper: {}".format(name))
|
|
|
|
|
2016-12-16 00:29:59 -06:00
|
|
|
def guess_scraper(url):
|
|
|
|
"""Attempt to guess the correct scraper for handling the given path or URL."""
|
|
|
|
for scraper in SCRAPERS:
|
2016-11-27 01:58:25 -06:00
|
|
|
if "can_scrape_url" in vars(scraper) and scraper.can_scrape_url(url):
|
2016-11-26 23:09:12 -06:00
|
|
|
return scraper
|
|
|
|
|
|
|
|
raise Exception("Unable to guess scraper for forum url: {}".format(url))
|