new
This commit is contained in:
parent
be789cb732
commit
c23eb924c3
85 changed files with 7090 additions and 253 deletions
|
|
@ -1,76 +1,97 @@
|
|||
# update_loop.py
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from downloader import download_zip_and_extract_html, download_xlsx
|
||||
from parser import generate_csv
|
||||
from diff import read_csv_to_dict, detect_changes
|
||||
from archive import archive_all_urls
|
||||
from config import CSV_FILENAME, HTML_FILENAME, XLSX_FILENAME
|
||||
from diff import detect_changes, read_csv_to_dict
|
||||
from downloader import download_xlsx, download_zip_and_extract_html
|
||||
from notify import send_discord_message
|
||||
from parser import generate_csv
|
||||
from utils import hash_file
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
last_html_hash = None
|
||||
last_csv_data = {}
|
||||
INFO_PATH = "info/status.json"
|
||||
INFO_PATH = os.path.join("info", "status.json")
|
||||
UPDATE_INTERVAL_SECONDS = 600
|
||||
|
||||
def write_info(html_hash, csv_hash, xlsx_hash):
|
||||
|
||||
def write_info(html_hash: str, csv_hash: str, xlsx_hash: str, is_archived: bool):
|
||||
os.makedirs("info", exist_ok=True)
|
||||
info = {
|
||||
"last_updated": datetime.utcnow().isoformat() + "Z",
|
||||
"files": {
|
||||
"Artists.html": {
|
||||
"hash": html_hash,
|
||||
"last_archived": datetime.utcnow().isoformat() + "Z"
|
||||
},
|
||||
"artists.csv": {
|
||||
"hash": csv_hash
|
||||
},
|
||||
"artists.xlsx": {
|
||||
"hash": xlsx_hash
|
||||
}
|
||||
}
|
||||
}
|
||||
now_iso = datetime.now(timezone.utc).isoformat()
|
||||
|
||||
try:
|
||||
with open(INFO_PATH, "r") as f:
|
||||
info = json.load(f)
|
||||
except (FileNotFoundError, json.JSONDecodeError):
|
||||
info = {"files": {HTML_FILENAME: {}}}
|
||||
|
||||
info["last_updated"] = now_iso
|
||||
info["files"][HTML_FILENAME]["hash"] = html_hash
|
||||
if is_archived:
|
||||
info["files"][HTML_FILENAME]["last_archived"] = now_iso
|
||||
|
||||
info["files"][CSV_FILENAME] = {"hash": csv_hash}
|
||||
info["files"][XLSX_FILENAME] = {"hash": xlsx_hash}
|
||||
|
||||
with open(INFO_PATH, "w") as f:
|
||||
json.dump(info, f, indent=2)
|
||||
|
||||
|
||||
def update_loop():
|
||||
global last_html_hash, last_csv_data
|
||||
|
||||
while True:
|
||||
logger.info("--- Starting update cycle ---")
|
||||
try:
|
||||
download_zip_and_extract_html()
|
||||
download_xlsx()
|
||||
generate_csv()
|
||||
|
||||
html_hash = hash_file("Artists.html")
|
||||
csv_hash = hash_file("artists.csv")
|
||||
xlsx_hash = hash_file("artists.xlsx")
|
||||
if not all(
|
||||
os.path.exists(f) for f in [HTML_FILENAME, CSV_FILENAME, XLSX_FILENAME]
|
||||
):
|
||||
logger.warning(
|
||||
"One or more files are missing after download/parse. Skipping this cycle."
|
||||
)
|
||||
time.sleep(UPDATE_INTERVAL_SECONDS)
|
||||
continue
|
||||
|
||||
current_data = read_csv_to_dict("artists.csv")
|
||||
html_hash = hash_file(HTML_FILENAME)
|
||||
csv_hash = hash_file(CSV_FILENAME)
|
||||
xlsx_hash = hash_file(XLSX_FILENAME)
|
||||
current_csv_data = read_csv_to_dict(CSV_FILENAME)
|
||||
|
||||
archived_this_cycle = False
|
||||
if last_html_hash is None:
|
||||
print("ℹ️ Initial HTML hash stored.")
|
||||
logger.info("First run: storing initial file hashes.")
|
||||
elif html_hash != last_html_hash:
|
||||
print("🔔 Artists.html has changed! Archiving URLs...")
|
||||
|
||||
changes = detect_changes(last_csv_data, current_data)
|
||||
logger.info("🔔 Artists.html has changed! Checking for data differences.")
|
||||
changes = detect_changes(last_csv_data, current_csv_data)
|
||||
if changes:
|
||||
message = "**CSV Update Detected:**\n" + "\n".join(changes)
|
||||
message = "**Tracker Update Detected:**\n" + "\n".join(changes)
|
||||
send_discord_message(message)
|
||||
archive_all_urls()
|
||||
archived_this_cycle = True
|
||||
else:
|
||||
print("ℹ️ No detectable content changes found in CSV.")
|
||||
|
||||
archive_all_urls()
|
||||
logger.info("ℹ️ HTML hash changed, but no data differences found.")
|
||||
else:
|
||||
print("ℹ️ Artists.html unchanged. No archiving needed.")
|
||||
|
||||
write_info(html_hash, csv_hash, xlsx_hash)
|
||||
logger.info("ℹ️ Artists.html is unchanged.")
|
||||
|
||||
write_info(html_hash, csv_hash, xlsx_hash, is_archived=archived_this_cycle)
|
||||
last_html_hash = html_hash
|
||||
last_csv_data = current_data
|
||||
last_csv_data = current_csv_data
|
||||
logger.info("--- Update cycle finished ---")
|
||||
|
||||
except Exception as e:
|
||||
print(f"⚠️ Error updating files: {e}")
|
||||
logger.critical(
|
||||
f"An unexpected error occurred in the update loop: {e}", exc_info=True
|
||||
)
|
||||
|
||||
time.sleep(600)
|
||||
logger.info(f"Sleeping for {UPDATE_INTERVAL_SECONDS} seconds...")
|
||||
time.sleep(UPDATE_INTERVAL_SECONDS)
|
||||
Loading…
Add table
Add a link
Reference in a new issue