This commit is contained in:
Eduard Prigoana 2025-08-22 04:42:18 +03:00
parent be789cb732
commit c23eb924c3
85 changed files with 7090 additions and 253 deletions

View file

@ -1,32 +1,36 @@
# archive.py
import logging
from waybackpy import WaybackMachineSaveAPI
import time
import random
import time
from typing import List
from waybackpy import WaybackMachineSaveAPI
from config import ARCHIVE_URLS, USER_AGENT
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
def archive_url(url):
def archive_url(url: str):
logger.info(f"🌐 Archiving {url} ...")
try:
save_api = WaybackMachineSaveAPI(url, user_agent=USER_AGENT)
save_api.save()
logger.info(f"✅ Archived {url}")
except Exception as e:
logger.error(f"⚠️ Exception archiving {url}: {e}")
logger.error(f"⚠️ Exception archiving {url}: {e}", exc_info=True)
def archive_all_urls():
logger.info("--- Starting archival process for all URLs ---")
for url in ARCHIVE_URLS:
delay = 10 + random.uniform(-3, 3)
logger.info(f"Waiting {delay:.2f} seconds before next archive...")
time.sleep(delay)
archive_url(url)
logger.info("--- Archival process finished ---")
def test_archive():
test_url = "https://httpbin.org/anything/foo/bar"
archive_url(test_url)
archive_url(test_url)