diff --git a/tge/scrapers/yuku.py b/tge/scrapers/yuku.py index b0407cd..d98c933 100644 --- a/tge/scrapers/yuku.py +++ b/tge/scrapers/yuku.py @@ -1,7 +1,11 @@ from ..model import User, Category, Forum, Board, Post, Thread from urllib.parse import urlparse +from time import strptime, mktime +import dateutil.parser from pyquery import PyQuery as pq +time_format = "%b %d %y %I:%M %p" + def can_scrape_url (url): return ".fr.yuku.com" in url @@ -76,6 +80,11 @@ def scrape_thread (url): else: signature = None + if date_element.find("time"): + timestamp = dateutil.parser.parse(date_element.text()).timestamp() + else: + timestamp = mktime(strptime(date_element.text(), time_format)) + thread.children.append(Post( author=User( name=user_header.find("p > a").eq(0).text(), @@ -84,7 +93,7 @@ def scrape_thread (url): subtitle=user_header.find(".custom_title").text(), signature=signature ), - timestamp=date_element.text(), + timestamp=timestamp, body=post_content_container.html().strip() ))