new

2025-08-22 04:42:18 +03:00 · 2025-08-22 04:42:18 +03:00 · c23eb924c3
commit c23eb924c3
parent be789cb732
85 changed files with 7090 additions and 253 deletions
--- a/update_loop.py
+++ b/update_loop.py
@ -1,76 +1,97 @@
+# update_loop.py
 import json
+import logging
 import os
 import time
-from datetime import datetime
+from datetime import datetime, timezone

-from downloader import download_zip_and_extract_html, download_xlsx
-from parser import generate_csv
-from diff import read_csv_to_dict, detect_changes
 from archive import archive_all_urls
+from config import CSV_FILENAME, HTML_FILENAME, XLSX_FILENAME
+from diff import detect_changes, read_csv_to_dict
+from downloader import download_xlsx, download_zip_and_extract_html
 from notify import send_discord_message
+from parser import generate_csv
 from utils import hash_file

+logger = logging.getLogger(__name__)
+
 last_html_hash = None
 last_csv_data = {}
-INFO_PATH = "info/status.json"
+INFO_PATH = os.path.join("info", "status.json")
+UPDATE_INTERVAL_SECONDS = 600

-def write_info(html_hash, csv_hash, xlsx_hash):
+
+def write_info(html_hash: str, csv_hash: str, xlsx_hash: str, is_archived: bool):
    os.makedirs("info", exist_ok=True)
-    info = {
-        "last_updated": datetime.utcnow().isoformat() + "Z",
-        "files": {
-            "Artists.html": {
-                "hash": html_hash,
-                "last_archived": datetime.utcnow().isoformat() + "Z"
-            },
-            "artists.csv": {
-                "hash": csv_hash
-            },
-            "artists.xlsx": {
-                "hash": xlsx_hash
-            }
-        }
-    }
+    now_iso = datetime.now(timezone.utc).isoformat()
+
+    try:
+        with open(INFO_PATH, "r") as f:
+            info = json.load(f)
+    except (FileNotFoundError, json.JSONDecodeError):
+        info = {"files": {HTML_FILENAME: {}}}
+
+    info["last_updated"] = now_iso
+    info["files"][HTML_FILENAME]["hash"] = html_hash
+    if is_archived:
+        info["files"][HTML_FILENAME]["last_archived"] = now_iso
+
+    info["files"][CSV_FILENAME] = {"hash": csv_hash}
+    info["files"][XLSX_FILENAME] = {"hash": xlsx_hash}
+
    with open(INFO_PATH, "w") as f:
        json.dump(info, f, indent=2)

+
 def update_loop():
    global last_html_hash, last_csv_data

    while True:
+        logger.info("--- Starting update cycle ---")
        try:
            download_zip_and_extract_html()
            download_xlsx()
            generate_csv()

-            html_hash = hash_file("Artists.html")
-            csv_hash = hash_file("artists.csv")
-            xlsx_hash = hash_file("artists.xlsx")
+            if not all(
+                os.path.exists(f) for f in [HTML_FILENAME, CSV_FILENAME, XLSX_FILENAME]
+            ):
+                logger.warning(
+                    "One or more files are missing after download/parse. Skipping this cycle."
+                )
+                time.sleep(UPDATE_INTERVAL_SECONDS)
+                continue

-            current_data = read_csv_to_dict("artists.csv")
+            html_hash = hash_file(HTML_FILENAME)
+            csv_hash = hash_file(CSV_FILENAME)
+            xlsx_hash = hash_file(XLSX_FILENAME)
+            current_csv_data = read_csv_to_dict(CSV_FILENAME)

+            archived_this_cycle = False
            if last_html_hash is None:
-                print("ℹ️ Initial HTML hash stored.")
+                logger.info("First run: storing initial file hashes.")
            elif html_hash != last_html_hash:
-                print("🔔 Artists.html has changed! Archiving URLs...")
-
-                changes = detect_changes(last_csv_data, current_data)
+                logger.info("🔔 Artists.html has changed! Checking for data differences.")
+                changes = detect_changes(last_csv_data, current_csv_data)
                if changes:
-                    message = "**CSV Update Detected:**\n" + "\n".join(changes)
+                    message = "**Tracker Update Detected:**\n" + "\n".join(changes)
                    send_discord_message(message)
+                    archive_all_urls()
+                    archived_this_cycle = True
                else:
-                    print("ℹ️ No detectable content changes found in CSV.")
-
-                archive_all_urls()
+                    logger.info("ℹ️ HTML hash changed, but no data differences found.")
            else:
-                print("ℹ️ Artists.html unchanged. No archiving needed.")
-
-            write_info(html_hash, csv_hash, xlsx_hash)
+                logger.info("ℹ️ Artists.html is unchanged.")

+            write_info(html_hash, csv_hash, xlsx_hash, is_archived=archived_this_cycle)
            last_html_hash = html_hash
-            last_csv_data = current_data
+            last_csv_data = current_csv_data
+            logger.info("--- Update cycle finished ---")

        except Exception as e:
-            print(f"⚠️ Error updating files: {e}")
+            logger.critical(
+                f"An unexpected error occurred in the update loop: {e}", exc_info=True
+            )

-        time.sleep(600)
+        logger.info(f"Sleeping for {UPDATE_INTERVAL_SECONDS} seconds...")
+        time.sleep(UPDATE_INTERVAL_SECONDS)