This commit is contained in:
Eduard Prigoana 2025-07-22 21:04:28 +03:00
parent 0c82b20e94
commit 3f901b942e
3 changed files with 102 additions and 6 deletions

View file

@ -1,4 +1,7 @@
import json
import os
import time
from datetime import datetime
from downloader import download_zip_and_extract_html, download_xlsx
from parser import generate_csv
@ -9,6 +12,27 @@ from utils import hash_file
last_html_hash = None
last_csv_data = {}
INFO_PATH = "info/status.json"
def write_info(html_hash, csv_hash, xlsx_hash):
os.makedirs("info", exist_ok=True)
info = {
"last_updated": datetime.utcnow().isoformat() + "Z",
"files": {
"Artists.html": {
"hash": html_hash,
"last_archived": datetime.utcnow().isoformat() + "Z"
},
"artists.csv": {
"hash": csv_hash
},
"artists.xlsx": {
"hash": xlsx_hash
}
}
}
with open(INFO_PATH, "w") as f:
json.dump(info, f, indent=2)
def update_loop():
global last_html_hash, last_csv_data
@ -19,13 +43,15 @@ def update_loop():
download_xlsx()
generate_csv()
# Hash the Artists.html instead of artists.csv
current_hash = hash_file("Artists.html")
html_hash = hash_file("Artists.html")
csv_hash = hash_file("artists.csv")
xlsx_hash = hash_file("artists.xlsx")
current_data = read_csv_to_dict("artists.csv")
if last_html_hash is None:
print(" Initial HTML hash stored.")
elif current_hash != last_html_hash:
elif html_hash != last_html_hash:
print("🔔 Artists.html has changed! Archiving URLs...")
changes = detect_changes(last_csv_data, current_data)
@ -39,7 +65,9 @@ def update_loop():
else:
print(" Artists.html unchanged. No archiving needed.")
last_html_hash = current_hash
write_info(html_hash, csv_hash, xlsx_hash)
last_html_hash = html_hash
last_csv_data = current_data
except Exception as e: