new
This commit is contained in:
parent
be789cb732
commit
c23eb924c3
85 changed files with 7090 additions and 253 deletions
24
archive.py
24
archive.py
|
|
@ -1,32 +1,36 @@
|
|||
# archive.py
|
||||
import logging
|
||||
from waybackpy import WaybackMachineSaveAPI
|
||||
import time
|
||||
import random
|
||||
import time
|
||||
from typing import List
|
||||
|
||||
from waybackpy import WaybackMachineSaveAPI
|
||||
|
||||
from config import ARCHIVE_URLS, USER_AGENT
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def archive_url(url):
|
||||
|
||||
def archive_url(url: str):
|
||||
logger.info(f"🌐 Archiving {url} ...")
|
||||
try:
|
||||
save_api = WaybackMachineSaveAPI(url, user_agent=USER_AGENT)
|
||||
save_api.save()
|
||||
logger.info(f"✅ Archived {url}")
|
||||
except Exception as e:
|
||||
logger.error(f"⚠️ Exception archiving {url}: {e}")
|
||||
logger.error(f"⚠️ Exception archiving {url}: {e}", exc_info=True)
|
||||
|
||||
|
||||
def archive_all_urls():
|
||||
logger.info("--- Starting archival process for all URLs ---")
|
||||
for url in ARCHIVE_URLS:
|
||||
delay = 10 + random.uniform(-3, 3)
|
||||
logger.info(f"Waiting {delay:.2f} seconds before next archive...")
|
||||
time.sleep(delay)
|
||||
archive_url(url)
|
||||
logger.info("--- Archival process finished ---")
|
||||
|
||||
|
||||
def test_archive():
|
||||
test_url = "https://httpbin.org/anything/foo/bar"
|
||||
archive_url(test_url)
|
||||
archive_url(test_url)
|
||||
Loading…
Add table
Add a link
Reference in a new issue