This commit is contained in:
Eduard Prigoana 2025-07-22 21:04:28 +03:00
parent 0c82b20e94
commit 3f901b942e
3 changed files with 102 additions and 6 deletions

15
info/status.json Normal file
View file

@ -0,0 +1,15 @@
{
"last_updated": "2025-07-22T18:03:27.822459Z",
"files": {
"Artists.html": {
"hash": "a84bdea4d1a9c64637bb3237f0f9cc7eef6aa54be17561c2ba2c605c6e9a3579",
"last_archived": "2025-07-22T18:03:27.822475Z"
},
"artists.csv": {
"hash": "86dddcdc83796c995216a1e8019ed8e4e5864ef05962bf9f93113e60d6836e4c"
},
"artists.xlsx": {
"hash": "01b93bd2653b3fff4621fc5a173fe80ac2f9729304b6d54b0d98ee7e6b0efe7d"
}
}
}

57
main.py
View file

@ -1,14 +1,16 @@
from flask import Flask, send_file, send_from_directory
from flask import Flask, send_file, send_from_directory, jsonify
from flask_cors import CORS
import threading
import os
import json
from config import HTML_FILENAME, CSV_FILENAME, XLSX_FILENAME
from update_loop import update_loop
from archive import test_archive
app = Flask(__name__)
CORS(app)
# Serve main files
@app.route("/artists.html")
def serve_artists_html():
return send_file(HTML_FILENAME, mimetype="text/html")
@ -21,6 +23,7 @@ def serve_artists_csv():
def serve_artists_xlsx():
return send_file(XLSX_FILENAME, mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
# Serve index and frontend assets
@app.route("/")
@app.route("/index")
@app.route("/index.html")
@ -31,13 +34,63 @@ def serve_index():
def serve_next_static(filename):
return send_from_directory("templates/_next", filename)
# Serve /info JSON
@app.route("/info")
def info_json():
info_path = os.path.join("info", "status.json")
if os.path.exists(info_path):
with open(info_path) as f:
return jsonify(json.load(f))
return {"error": "Info not available"}, 404
# Serve /info HTML
@app.route("/info/html")
def info_html():
info_path = os.path.join("info", "status.json")
if os.path.exists(info_path):
with open(info_path) as f:
data = json.load(f)
html = f"""
<html>
<head><title>File Info</title></head>
<body>
<h1>Latest File Info</h1>
<p><strong>Last Updated:</strong> {data.get('last_updated')}</p>
<ul>
<li><strong>Artists.html</strong><br>
Hash: {data['files']['Artists.html']['hash']}<br>
Archived: {data['files']['Artists.html']['last_archived']}
</li>
<li><strong>artists.csv</strong><br>
Hash: {data['files']['artists.csv']['hash']}
</li>
<li><strong>artists.xlsx</strong><br>
Hash: {data['files']['artists.xlsx']['hash']}
</li>
</ul>
</body>
</html>
"""
return html
return "<p>Status info not available.</p>", 404
# 404 page
@app.errorhandler(404)
def page_not_found(e):
return send_file("templates/404.html", mimetype="text/html"), 404
# Start app and updater
if __name__ == "__main__":
# Run update loop in background
threading.Thread(target=update_loop, daemon=True).start()
# Optional: perform initial download/generation if needed
from downloader import download_zip_and_extract_html, download_xlsx
from parser import generate_csv
# Uncomment below if you want to do initial sync before serving
# download_zip_and_extract_html()
# download_xlsx()
# generate_csv()
app.run(host="0.0.0.0", port=5000)

View file

@ -1,4 +1,7 @@
import json
import os
import time
from datetime import datetime
from downloader import download_zip_and_extract_html, download_xlsx
from parser import generate_csv
@ -9,6 +12,27 @@ from utils import hash_file
last_html_hash = None
last_csv_data = {}
INFO_PATH = "info/status.json"
def write_info(html_hash, csv_hash, xlsx_hash):
os.makedirs("info", exist_ok=True)
info = {
"last_updated": datetime.utcnow().isoformat() + "Z",
"files": {
"Artists.html": {
"hash": html_hash,
"last_archived": datetime.utcnow().isoformat() + "Z"
},
"artists.csv": {
"hash": csv_hash
},
"artists.xlsx": {
"hash": xlsx_hash
}
}
}
with open(INFO_PATH, "w") as f:
json.dump(info, f, indent=2)
def update_loop():
global last_html_hash, last_csv_data
@ -19,13 +43,15 @@ def update_loop():
download_xlsx()
generate_csv()
# Hash the Artists.html instead of artists.csv
current_hash = hash_file("Artists.html")
html_hash = hash_file("Artists.html")
csv_hash = hash_file("artists.csv")
xlsx_hash = hash_file("artists.xlsx")
current_data = read_csv_to_dict("artists.csv")
if last_html_hash is None:
print(" Initial HTML hash stored.")
elif current_hash != last_html_hash:
elif html_hash != last_html_hash:
print("🔔 Artists.html has changed! Archiving URLs...")
changes = detect_changes(last_csv_data, current_data)
@ -39,7 +65,9 @@ def update_loop():
else:
print(" Artists.html unchanged. No archiving needed.")
last_html_hash = current_hash
write_info(html_hash, csv_hash, xlsx_hash)
last_html_hash = html_hash
last_csv_data = current_data
except Exception as e: