diff --git a/tge/scrapers/yuku.py b/tge/scrapers/yuku.py index b881821..48e7ed3 100644 --- a/tge/scrapers/yuku.py +++ b/tge/scrapers/yuku.py @@ -65,9 +65,18 @@ def scrape_thread (url): # returns the rest of the thread's contents instead of just that post. # So we need to pick out only the first (username/signature/postbody) # to get around this. + post_content_container = post_entry.find(".post-content-container").eq(0) + signature = post_content_container.find(".signature").eq(0) + post_content_container.remove(".signature") + + if signature: + signature = signature.html().strip() + else: + signature = None + thread.children.append(Post( - author=User(name=post_entry("header > p > a").eq(0).text()), - body=post_entry(".post-content-container").eq(0).text() + author=User(name=post_entry.find("header > p > a").eq(0).text(), signature=signature), + body=post_content_container.html().strip() )) nextlink = d("a[accesskey=n]")