Initial commit

This commit is contained in:
Izwzyzx 2020-02-06 15:37:58 -06:00
commit 0307b71016

97
ariados.py Normal file
View File

@ -0,0 +1,97 @@
######################################################
# ARIADOS v1.0 Yuku Forum Backup #
# Because they ruined our forum and we want it back. #
######################################################
import os
import time
import lxml
import requests
from bs4 import BeautifulSoup
# Forum URL
boardURL = "http://trsrockin.fr.yuku.com"
# Backup save location
# Backup will be saved in the following folder structure:
# Main Forum Name > Subforum Name > Thread Name > Page.htm
saveDir = "C:\\"
# ARIADOS will attempt to back up all threads within the following range
threadFirst = 180
threadLast = 6543
# Specify time delay between server requests
timeDelay = 2
# MAIN PROGRAM
print("--------------------------------")
print("ARIADOS v1.0 - Yuku Forum Backup")
print("--------------------------------")
print(" ")
print("ARIADOS will attempt to back up threads " + str(threadFirst) + " through " + str(threadLast))
print(" ")
print("--------------------------------")
print(" ")
iT = threadFirst # Thread iterator
while iT <= threadLast:
iP = 1 # Page iterator
time.sleep(timeDelay)
# Get first page of thread
soup = BeautifulSoup(requests.get(boardURL + "/topic/" + str(iT)).text, "lxml")
# Only process the page if it isn't a 404
if soup.title.string != "We are going to be back soon":
boardIndex = soup.select(".breadcrumb > li:nth-of-type(1) > a")[0]["title"]
currentForum = soup.select(".breadcrumb > li:nth-of-type(2) > a")[0]["title"]
currentThread = soup.select(".breadcrumb > li:nth-of-type(3) > a")[0]["title"]
savePath = saveDir + boardIndex + "\\" + currentForum + "\\" + currentThread
# Check for folders and make them if missing
if os.path.isdir(saveDir + boardIndex) != 1:
os.mkdir(saveDir + boardIndex)
if os.path.isdir(saveDir + boardIndex + "\\" + currentForum) != 1:
os.mkdir(saveDir + boardIndex + "\\" + currentForum)
if os.path.isdir(saveDir + boardIndex + "\\" + currentForum + "\\" + currentThread) != 1:
os.mkdir(saveDir + boardIndex + "\\" + currentForum + "\\" + currentThread)
# Check whether page exists and if not, save it
if os.path.isfile(savePath + "\\" + str(iP) + ".htm") != 1:
print("Accessing thread: " + currentThread)
with open(savePath + "\\" + str(iP) + ".htm", "x", encoding='utf8') as file:
file.write(soup.prettify())
# Check number of pages in thread
if len(soup.select(".pagination")) > 0:
threadEnd = int(soup.select("div.pagination ul li a")[len(soup.select("div.pagination ul li a"))-2].string)
# Loop through thread pages
while iP < threadEnd:
iP += 1
time.sleep(timeDelay)
# Get next page of thread
soup = BeautifulSoup(requests.get(boardURL + "/topic/" + str(iT) + "/?page=" + str(iP)).text, "html.parser")
# Check whether page exists and if not, save it
if os.path.isfile(savePath + "\\" + str(iP) + ".htm") != 1:
print(" Saving page " + str(iP) + " of " + str(threadEnd))
with open(savePath + "\\" + str(iP) + ".htm", "x", encoding='utf8') as file:
file.write(soup.prettify())
print("Thread saved at " + savePath)
print(" ")
with open(saveDir + boardIndex + "\\" + "progress.txt", "a") as file:
file.write(str(iT) + "\r\n")
iT += 1
print("Backup operation completed.")