there's multiple h1's on the page and the one we want is like .eq(2) or something. But once you start addressing nodes by index like that you get real brittle and can break easily. I don't think we have a problem with just selecting all h1's here.
This commit is contained in:
@@ -41,7 +41,7 @@ def scrape_board (url):
|
|||||||
baseurl = "{}://{}".format(urlparts.scheme, urlparts.netloc)
|
baseurl = "{}://{}".format(urlparts.scheme, urlparts.netloc)
|
||||||
|
|
||||||
d = pq(url=url)
|
d = pq(url=url)
|
||||||
board = Board(title=d("h1").eq(0).text())
|
board = Board(title=d("h1").text())
|
||||||
for thread_link in d("a[href^='/topic/']").items():
|
for thread_link in d("a[href^='/topic/']").items():
|
||||||
if thread_link.closest(".topic-pager"):
|
if thread_link.closest(".topic-pager"):
|
||||||
continue
|
continue
|
||||||
|
Reference in New Issue
Block a user