From b67ab06b55f35bd0735098ce3d4c0a0ac9ba055c Mon Sep 17 00:00:00 2001 From: Adrian Malacoda Date: Sun, 27 Nov 2016 13:03:13 -0600 Subject: [PATCH] Add exponential backoff for retrying --- tge/scrapers/yuku.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tge/scrapers/yuku.py b/tge/scrapers/yuku.py index 50556ea..3d9ebd3 100644 --- a/tge/scrapers/yuku.py +++ b/tge/scrapers/yuku.py @@ -3,6 +3,7 @@ from urllib.parse import urlparse from time import strptime, mktime import dateutil.parser from pyquery import PyQuery as pq +from retrying import retry time_format = "%b %d %y %I:%M %p" @@ -18,6 +19,7 @@ def scrape (url): elif (not path) or path == "/": return scrape_index(url) +@retry(wait_exponential_multiplier=1000, wait_exponential_max=10000) def scrape_index (url): print("Scraping forum index from url: {}".format(url)) urlparts = urlparse(url) @@ -35,6 +37,7 @@ def scrape_index (url): return forum +@retry(wait_exponential_multiplier=1000, wait_exponential_max=10000) def scrape_board (url): print("Scraping board from url: {}".format(url)) urlparts = urlparse(url) @@ -57,6 +60,7 @@ def scrape_board (url): return board +@retry(wait_exponential_multiplier=1000, wait_exponential_max=10000) def scrape_thread (url): print("Scraping thread from url: {}".format(url)) urlparts = urlparse(url)