Add image downloader
This commit is contained in:
parent
0307b71016
commit
18b4b91995
46
smeargle.py
Normal file
46
smeargle.py
Normal file
@ -0,0 +1,46 @@
|
||||
import os
|
||||
import lxml
|
||||
import hashlib
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from os.path import join, getsize
|
||||
|
||||
saveDir = "C:\\TRsRockin\\General-Video-Game-Discussion\\"
|
||||
|
||||
for root, dirs, files in os.walk(saveDir):
|
||||
|
||||
for i in files:
|
||||
filePath = root + "\\" + i
|
||||
|
||||
with open(filePath, "r", encoding='utf8') as file:
|
||||
soup = BeautifulSoup(file, "lxml")
|
||||
for j in soup.find_all('img'):
|
||||
|
||||
# First, filter out some URLs we don't want
|
||||
if (j["src"].count("quantserve") < 1) and (j["src"].count("crowdreport") < 1) and (j["src"].count("archive.org") < 1) and (j["src"].count("derpiboo.ru") < 1) and (j["src"].count("ace-attorney.net") < 1) and len(j["src"]) > 0:
|
||||
|
||||
# Change HTTPS to HTTP if necessary
|
||||
if j["src"].count("https://") > 0:
|
||||
j["src"] = j["src"].replace("https://", "http://")
|
||||
|
||||
# Get filetype extension of image
|
||||
filetypeExt = j["src"].split(".")[len(j["src"].split("."))-1]
|
||||
if filetypeExt.count("?") > 0:
|
||||
filetypeExt = filetypeExt.split("?")[0]
|
||||
|
||||
# More error handling
|
||||
if filetypeExt.count("/") < 1:
|
||||
|
||||
# Hash URL for unique filename
|
||||
urlHash = hashlib.md5(bytes(j["src"], encoding='utf8')).hexdigest()
|
||||
|
||||
# Check whether image exists and if not, save it
|
||||
if os.path.isfile("C:\\TRsRockin\\Images\\" + str(urlHash) + "." + filetypeExt) != 1:
|
||||
print("Saving image: " + j["src"])
|
||||
print(" ")
|
||||
r = requests.get(j["src"])
|
||||
with open("C:\\TRsRockin\\Images\\" + str(urlHash) + "." + filetypeExt, "xb") as file:
|
||||
file.write(r.content)
|
||||
|
||||
print("Backup operation completed.")
|
Loading…
x
Reference in New Issue
Block a user