Initial commit
This commit is contained in:
commit
0307b71016
97
ariados.py
Normal file
97
ariados.py
Normal file
@ -0,0 +1,97 @@
|
||||
######################################################
|
||||
# ARIADOS v1.0 Yuku Forum Backup #
|
||||
# Because they ruined our forum and we want it back. #
|
||||
######################################################
|
||||
|
||||
import os
|
||||
import time
|
||||
import lxml
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
# Forum URL
|
||||
boardURL = "http://trsrockin.fr.yuku.com"
|
||||
|
||||
# Backup save location
|
||||
# Backup will be saved in the following folder structure:
|
||||
# Main Forum Name > Subforum Name > Thread Name > Page.htm
|
||||
saveDir = "C:\\"
|
||||
|
||||
# ARIADOS will attempt to back up all threads within the following range
|
||||
threadFirst = 180
|
||||
threadLast = 6543
|
||||
|
||||
# Specify time delay between server requests
|
||||
timeDelay = 2
|
||||
|
||||
|
||||
|
||||
# MAIN PROGRAM
|
||||
print("--------------------------------")
|
||||
print("ARIADOS v1.0 - Yuku Forum Backup")
|
||||
print("--------------------------------")
|
||||
print(" ")
|
||||
print("ARIADOS will attempt to back up threads " + str(threadFirst) + " through " + str(threadLast))
|
||||
print(" ")
|
||||
print("--------------------------------")
|
||||
print(" ")
|
||||
|
||||
iT = threadFirst # Thread iterator
|
||||
|
||||
while iT <= threadLast:
|
||||
iP = 1 # Page iterator
|
||||
|
||||
time.sleep(timeDelay)
|
||||
|
||||
# Get first page of thread
|
||||
soup = BeautifulSoup(requests.get(boardURL + "/topic/" + str(iT)).text, "lxml")
|
||||
|
||||
# Only process the page if it isn't a 404
|
||||
if soup.title.string != "We are going to be back soon":
|
||||
boardIndex = soup.select(".breadcrumb > li:nth-of-type(1) > a")[0]["title"]
|
||||
currentForum = soup.select(".breadcrumb > li:nth-of-type(2) > a")[0]["title"]
|
||||
currentThread = soup.select(".breadcrumb > li:nth-of-type(3) > a")[0]["title"]
|
||||
|
||||
savePath = saveDir + boardIndex + "\\" + currentForum + "\\" + currentThread
|
||||
|
||||
# Check for folders and make them if missing
|
||||
if os.path.isdir(saveDir + boardIndex) != 1:
|
||||
os.mkdir(saveDir + boardIndex)
|
||||
if os.path.isdir(saveDir + boardIndex + "\\" + currentForum) != 1:
|
||||
os.mkdir(saveDir + boardIndex + "\\" + currentForum)
|
||||
if os.path.isdir(saveDir + boardIndex + "\\" + currentForum + "\\" + currentThread) != 1:
|
||||
os.mkdir(saveDir + boardIndex + "\\" + currentForum + "\\" + currentThread)
|
||||
|
||||
# Check whether page exists and if not, save it
|
||||
if os.path.isfile(savePath + "\\" + str(iP) + ".htm") != 1:
|
||||
print("Accessing thread: " + currentThread)
|
||||
with open(savePath + "\\" + str(iP) + ".htm", "x", encoding='utf8') as file:
|
||||
file.write(soup.prettify())
|
||||
|
||||
# Check number of pages in thread
|
||||
if len(soup.select(".pagination")) > 0:
|
||||
threadEnd = int(soup.select("div.pagination ul li a")[len(soup.select("div.pagination ul li a"))-2].string)
|
||||
|
||||
# Loop through thread pages
|
||||
while iP < threadEnd:
|
||||
iP += 1
|
||||
|
||||
time.sleep(timeDelay)
|
||||
|
||||
# Get next page of thread
|
||||
soup = BeautifulSoup(requests.get(boardURL + "/topic/" + str(iT) + "/?page=" + str(iP)).text, "html.parser")
|
||||
|
||||
# Check whether page exists and if not, save it
|
||||
if os.path.isfile(savePath + "\\" + str(iP) + ".htm") != 1:
|
||||
print(" Saving page " + str(iP) + " of " + str(threadEnd))
|
||||
with open(savePath + "\\" + str(iP) + ".htm", "x", encoding='utf8') as file:
|
||||
file.write(soup.prettify())
|
||||
|
||||
print("Thread saved at " + savePath)
|
||||
print(" ")
|
||||
with open(saveDir + boardIndex + "\\" + "progress.txt", "a") as file:
|
||||
file.write(str(iT) + "\r\n")
|
||||
|
||||
iT += 1
|
||||
|
||||
print("Backup operation completed.")
|
Loading…
x
Reference in New Issue
Block a user