From 5bcb6e88847b913d4a24ab1c93af814b62f71bed Mon Sep 17 00:00:00 2001 From: Adrian Malacoda Date: Sun, 27 Nov 2016 00:48:55 -0600 Subject: [PATCH] add extra post & user info --- tge/scrapers/yuku.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tge/scrapers/yuku.py b/tge/scrapers/yuku.py index 522fb2c..b0407cd 100644 --- a/tge/scrapers/yuku.py +++ b/tge/scrapers/yuku.py @@ -65,7 +65,9 @@ def scrape_thread (url): # returns the rest of the thread's contents instead of just that post. # So we need to pick out only the first (username/signature/postbody) # to get around this. + date_element = post_entry.find(".date").eq(0) post_content_container = post_entry.find(".post-content-container").eq(0) + user_header = post_entry.find("header").eq(0) signature = post_content_container.find(".signature").eq(0) post_content_container.remove(".signature") @@ -75,7 +77,14 @@ def scrape_thread (url): signature = None thread.children.append(Post( - author=User(name=post_entry.find("header > p > a").eq(0).text(), signature=signature), + author=User( + name=user_header.find("p > a").eq(0).text(), + avatar=user_header.find("img[alt='avatar']").attr.src, + title=user_header.find(".auto-title").text(), + subtitle=user_header.find(".custom_title").text(), + signature=signature + ), + timestamp=date_element.text(), body=post_content_container.html().strip() ))