s

2025-07-22 10:22:51 +03:00 · 2025-07-22 10:22:51 +03:00 · 0c82b20e94
commit 0c82b20e94
parent c6556a0534
2 changed files with 37 additions and 19 deletions
--- a/archive.py
+++ b/archive.py
@ -1,17 +1,25 @@
 import logging
 from waybackpy import WaybackMachineSaveAPI
 import time
 import random
 from config import ARCHIVE_URLS, USER_AGENT
 # Configure logging
 logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
 )
 logger = logging.getLogger(__name__)
 def archive_url(url):
-    print(f"🌐 Archiving {url} ...")
+    logger.info(f"🌐 Archiving {url} ...")
    try:
        save_api = WaybackMachineSaveAPI(url, user_agent=USER_AGENT)
        save_api.save()
-        print(f"✅ Archived {url}")
+        logger.info(f"✅ Archived {url}")
    except Exception as e:
-        print(f"⚠️ Exception archiving {url}: {e}")
+        logger.error(f"⚠️ Exception archiving {url}: {e}")
 def archive_all_urls():
    for url in ARCHIVE_URLS:
--- a/downloader.py
+++ b/downloader.py
@ -4,23 +4,33 @@ from config import ZIP_URL, ZIP_FILENAME, HTML_FILENAME, XLSX_URL, XLSX_FILENAME
 def download_zip_and_extract_html():
    print("🔄 Downloading ZIP...")
-    r = requests.get(ZIP_URL)
+    try:
-    r.raise_for_status()
+        with requests.get(ZIP_URL, timeout=30) as r:
-    with open(ZIP_FILENAME, "wb") as f:
+            r.raise_for_status()
-        f.write(r.content)
+            with open(ZIP_FILENAME, "wb") as f:
-    print(f"✅ Saved ZIP as {ZIP_FILENAME}")
+                f.write(r.content)
        print(f"✅ Saved ZIP as {ZIP_FILENAME}")
    except requests.RequestException as e:
        print(f"❌ Failed to download ZIP: {e}")
        return
-    with zipfile.ZipFile(ZIP_FILENAME, "r") as z:
+    try:
-        with z.open(HTML_FILENAME) as html_file:
+        with zipfile.ZipFile(ZIP_FILENAME, "r") as z:
-            html_content = html_file.read()
+            with z.open(HTML_FILENAME) as html_file:
-        with open(HTML_FILENAME, "wb") as f:
+                html_content = html_file.read()
-            f.write(html_content)
+            with open(HTML_FILENAME, "wb") as f:
-    print(f"✅ Extracted {HTML_FILENAME}")
+                f.write(html_content)
        print(f"✅ Extracted {HTML_FILENAME}")
    except (zipfile.BadZipFile, KeyError) as e:
        print(f"❌ Failed to extract {HTML_FILENAME}: {e}")
 def download_xlsx():
    print("🔄 Downloading XLSX...")
-    r = requests.get(XLSX_URL)
+    try:
-    r.raise_for_status()
+        with requests.get(XLSX_URL, timeout=30) as r:
-    with open(XLSX_FILENAME, "wb") as f:
+            r.raise_for_status()
-        f.write(r.content)
+            with open(XLSX_FILENAME, "wb") as f:
-    print(f"✅ Saved XLSX as {XLSX_FILENAME}")
+                f.write(r.content)
        print(f"✅ Saved XLSX as {XLSX_FILENAME}")
    except requests.RequestException as e:
        print(f"❌ Failed to download XLSX: {e}")