Add image downloader
This commit is contained in:
parent
0307b71016
commit
18b4b91995
46
smeargle.py
Normal file
46
smeargle.py
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
import os
|
||||||
|
import lxml
|
||||||
|
import hashlib
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
from os.path import join, getsize
|
||||||
|
|
||||||
|
saveDir = "C:\\TRsRockin\\General-Video-Game-Discussion\\"
|
||||||
|
|
||||||
|
for root, dirs, files in os.walk(saveDir):
|
||||||
|
|
||||||
|
for i in files:
|
||||||
|
filePath = root + "\\" + i
|
||||||
|
|
||||||
|
with open(filePath, "r", encoding='utf8') as file:
|
||||||
|
soup = BeautifulSoup(file, "lxml")
|
||||||
|
for j in soup.find_all('img'):
|
||||||
|
|
||||||
|
# First, filter out some URLs we don't want
|
||||||
|
if (j["src"].count("quantserve") < 1) and (j["src"].count("crowdreport") < 1) and (j["src"].count("archive.org") < 1) and (j["src"].count("derpiboo.ru") < 1) and (j["src"].count("ace-attorney.net") < 1) and len(j["src"]) > 0:
|
||||||
|
|
||||||
|
# Change HTTPS to HTTP if necessary
|
||||||
|
if j["src"].count("https://") > 0:
|
||||||
|
j["src"] = j["src"].replace("https://", "http://")
|
||||||
|
|
||||||
|
# Get filetype extension of image
|
||||||
|
filetypeExt = j["src"].split(".")[len(j["src"].split("."))-1]
|
||||||
|
if filetypeExt.count("?") > 0:
|
||||||
|
filetypeExt = filetypeExt.split("?")[0]
|
||||||
|
|
||||||
|
# More error handling
|
||||||
|
if filetypeExt.count("/") < 1:
|
||||||
|
|
||||||
|
# Hash URL for unique filename
|
||||||
|
urlHash = hashlib.md5(bytes(j["src"], encoding='utf8')).hexdigest()
|
||||||
|
|
||||||
|
# Check whether image exists and if not, save it
|
||||||
|
if os.path.isfile("C:\\TRsRockin\\Images\\" + str(urlHash) + "." + filetypeExt) != 1:
|
||||||
|
print("Saving image: " + j["src"])
|
||||||
|
print(" ")
|
||||||
|
r = requests.get(j["src"])
|
||||||
|
with open("C:\\TRsRockin\\Images\\" + str(urlHash) + "." + filetypeExt, "xb") as file:
|
||||||
|
file.write(r.content)
|
||||||
|
|
||||||
|
print("Backup operation completed.")
|
Loading…
x
Reference in New Issue
Block a user