From d54f3ec21c616fdf0778529be0d62ef9d7f424f4 Mon Sep 17 00:00:00 2001 From: Adrian Malacoda Date: Sun, 27 Nov 2016 01:19:59 -0600 Subject: [PATCH] there's multiple h1's on the page and the one we want is like .eq(2) or something. But once you start addressing nodes by index like that you get real brittle and can break easily. I don't think we have a problem with just selecting all h1's here. --- tge/scrapers/yuku.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tge/scrapers/yuku.py b/tge/scrapers/yuku.py index 82ebabc..50556ea 100644 --- a/tge/scrapers/yuku.py +++ b/tge/scrapers/yuku.py @@ -41,7 +41,7 @@ def scrape_board (url): baseurl = "{}://{}".format(urlparts.scheme, urlparts.netloc) d = pq(url=url) - board = Board(title=d("h1").eq(0).text()) + board = Board(title=d("h1").text()) for thread_link in d("a[href^='/topic/']").items(): if thread_link.closest(".topic-pager"): continue