info
This commit is contained in:
parent
0c82b20e94
commit
3f901b942e
3 changed files with 102 additions and 6 deletions
|
|
@ -1,4 +1,7 @@
|
|||
import json
|
||||
import os
|
||||
import time
|
||||
from datetime import datetime
|
||||
|
||||
from downloader import download_zip_and_extract_html, download_xlsx
|
||||
from parser import generate_csv
|
||||
|
|
@ -9,6 +12,27 @@ from utils import hash_file
|
|||
|
||||
last_html_hash = None
|
||||
last_csv_data = {}
|
||||
INFO_PATH = "info/status.json"
|
||||
|
||||
def write_info(html_hash, csv_hash, xlsx_hash):
|
||||
os.makedirs("info", exist_ok=True)
|
||||
info = {
|
||||
"last_updated": datetime.utcnow().isoformat() + "Z",
|
||||
"files": {
|
||||
"Artists.html": {
|
||||
"hash": html_hash,
|
||||
"last_archived": datetime.utcnow().isoformat() + "Z"
|
||||
},
|
||||
"artists.csv": {
|
||||
"hash": csv_hash
|
||||
},
|
||||
"artists.xlsx": {
|
||||
"hash": xlsx_hash
|
||||
}
|
||||
}
|
||||
}
|
||||
with open(INFO_PATH, "w") as f:
|
||||
json.dump(info, f, indent=2)
|
||||
|
||||
def update_loop():
|
||||
global last_html_hash, last_csv_data
|
||||
|
|
@ -19,13 +43,15 @@ def update_loop():
|
|||
download_xlsx()
|
||||
generate_csv()
|
||||
|
||||
# Hash the Artists.html instead of artists.csv
|
||||
current_hash = hash_file("Artists.html")
|
||||
html_hash = hash_file("Artists.html")
|
||||
csv_hash = hash_file("artists.csv")
|
||||
xlsx_hash = hash_file("artists.xlsx")
|
||||
|
||||
current_data = read_csv_to_dict("artists.csv")
|
||||
|
||||
if last_html_hash is None:
|
||||
print("ℹ️ Initial HTML hash stored.")
|
||||
elif current_hash != last_html_hash:
|
||||
elif html_hash != last_html_hash:
|
||||
print("🔔 Artists.html has changed! Archiving URLs...")
|
||||
|
||||
changes = detect_changes(last_csv_data, current_data)
|
||||
|
|
@ -39,7 +65,9 @@ def update_loop():
|
|||
else:
|
||||
print("ℹ️ Artists.html unchanged. No archiving needed.")
|
||||
|
||||
last_html_hash = current_hash
|
||||
write_info(html_hash, csv_hash, xlsx_hash)
|
||||
|
||||
last_html_hash = html_hash
|
||||
last_csv_data = current_data
|
||||
|
||||
except Exception as e:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue