diff --git a/__pycache__/archive.cpython-313.pyc b/__pycache__/archive.cpython-313.pyc index 7ce8541..2decb8c 100644 Binary files a/__pycache__/archive.cpython-313.pyc and b/__pycache__/archive.cpython-313.pyc differ diff --git a/__pycache__/config.cpython-313.pyc b/__pycache__/config.cpython-313.pyc index b4935b5..031482d 100644 Binary files a/__pycache__/config.cpython-313.pyc and b/__pycache__/config.cpython-313.pyc differ diff --git a/archive.py b/archive.py index f63ff4c..1116e79 100644 --- a/archive.py +++ b/archive.py @@ -1,16 +1,15 @@ -import requests, time, random +from waybackpy import WaybackMachineSaveAPI +import time +import random from config import ARCHIVE_URLS, USER_AGENT def archive_url(url): print(f"🌐 Archiving {url} ...") - headers = {"User-Agent": USER_AGENT} try: - resp = requests.get(f"https://web.archive.org/save/{url}", headers=headers, timeout=30) - if resp.status_code == 200: - print(f"✅ Archived {url}") - else: - print(f"⚠️ Failed to archive {url}, status code {resp.status_code}") + save_api = WaybackMachineSaveAPI(url, user_agent=USER_AGENT) + save_api.save() + print(f"✅ Archived {url}") except Exception as e: print(f"⚠️ Exception archiving {url}: {e}") @@ -19,3 +18,7 @@ def archive_all_urls(): delay = 10 + random.uniform(-3, 3) time.sleep(delay) archive_url(url) + +def test_archive(): + test_url = "https://httpbin.org/anything/foo/bar" + archive_url(test_url) diff --git a/config.py b/config.py index 9e8dbb5..5583b9a 100644 --- a/config.py +++ b/config.py @@ -20,7 +20,7 @@ exclude_names = { USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0 Safari/537.36" -BASE_URL = "http://localhost:5000" +BASE_URL = "https://artistgrid.cx/" ARCHIVE_URLS = [ f"{BASE_URL}/", diff --git a/main.py b/main.py index 0d10b71..89b6564 100644 --- a/main.py +++ b/main.py @@ -4,6 +4,7 @@ import threading from config import HTML_FILENAME, CSV_FILENAME, XLSX_FILENAME from update_loop import update_loop +from archive import test_archive app = Flask(__name__) CORS(app) @@ -39,11 +40,4 @@ if __name__ == "__main__": from downloader import download_zip_and_extract_html, download_xlsx from parser import generate_csv - try: - download_zip_and_extract_html() - download_xlsx() - generate_csv() - except Exception as e: - print(f"⚠️ Initial update failed: {e}") - app.run(host="0.0.0.0", port=5000) diff --git a/requirements.txt b/requirements.txt index 5889221..338beb1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,5 @@ Flask requests beautifulsoup4 lxml -flask-cors \ No newline at end of file +flask-cors +waybacKpy \ No newline at end of file