sheets/main.py
2025-07-20 20:01:01 +03:00

291 lines
9.2 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import requests
import zipfile
import threading
import time
import random
import hashlib
from bs4 import BeautifulSoup
import csv
import re
from flask import Flask, send_file, send_from_directory, abort
import os
import json
from flask_cors import CORS
app = Flask(__name__)
CORS(app)
ZIP_URL = "https://docs.google.com/spreadsheets/d/1S6WwM05O277npQbaiNk-jZlXK3TdooSyWtqaWUvAI78/export?format=zip"
XLSX_URL = "https://docs.google.com/spreadsheets/d/1S6WwM05O277npQbaiNk-jZlXK3TdooSyWtqaWUvAI78/export?format=xlsx"
ZIP_FILENAME = "Trackerhub.zip"
HTML_FILENAME = "Artists.html"
CSV_FILENAME = "artists.csv"
XLSX_FILENAME = "artists.xlsx"
exclude_names = {
"AI Models",
"Lawson",
"BPM Tracker",
"Worst Comps & Edits",
"Allegations",
"Rap Disses Timeline",
"Underground Artists",
}
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0 Safari/537.36"
# URLs to archive on changes — update these to your actual hosted domain
BASE_URL = "http://localhost:5000" # Change this to your public domain when deployed
ARCHIVE_URLS = [
f"{BASE_URL}/",
f"{BASE_URL}/index.html/",
f"{BASE_URL}/artists.html",
f"{BASE_URL}/artists.csv",
f"{BASE_URL}/artists.xlsx",
]
DISCORD_WEBHOOK_URL = os.getenv("DISCORD_WEBHOOK_URL")
def clean_artist_name(text):
return re.sub(r'[⭐🤖🎭\u2B50\uFE0F]', '', text).strip()
def force_star_flag(starred=True):
return "Yes" if starred else "No"
def download_zip_and_extract_html():
print("🔄 Downloading ZIP...")
r = requests.get(ZIP_URL)
r.raise_for_status()
with open(ZIP_FILENAME, "wb") as f:
f.write(r.content)
print(f"✅ Saved ZIP as {ZIP_FILENAME}")
with zipfile.ZipFile(ZIP_FILENAME, "r") as z:
with z.open(HTML_FILENAME) as html_file:
html_content = html_file.read()
with open(HTML_FILENAME, "wb") as f:
f.write(html_content)
print(f"✅ Extracted {HTML_FILENAME}")
def download_xlsx():
print("🔄 Downloading XLSX...")
r = requests.get(XLSX_URL)
r.raise_for_status()
with open(XLSX_FILENAME, "wb") as f:
f.write(r.content)
print(f"✅ Saved XLSX as {XLSX_FILENAME}")
def generate_csv():
print("📝 Generating CSV...")
with open(HTML_FILENAME, "r", encoding="utf-8") as f:
soup = BeautifulSoup(f, "html.parser")
rows = soup.select("table.waffle tbody tr")[3:] # skip headers and Discord
data = []
starring = True
for row in rows:
cells = row.find_all("td")
if len(cells) < 4:
continue
link_tag = cells[0].find("a")
artist_name_raw = link_tag.get_text(strip=True) if link_tag else cells[0].get_text(strip=True)
artist_url = link_tag["href"] if link_tag else ""
if not artist_url:
continue
if "AI Models" in artist_name_raw:
starring = False
artist_name_clean = clean_artist_name(artist_name_raw)
if artist_name_clean in exclude_names:
continue
if "🚩" in artist_name_raw:
continue
best = force_star_flag(starring)
credit = cells[1].get_text(strip=True)
updated = cells[2].get_text(strip=True)
links_work = cells[3].get_text(strip=True)
data.append([artist_name_clean, artist_url, credit, links_work, updated, best])
with open(CSV_FILENAME, "w", newline='', encoding="utf-8") as csvfile:
writer = csv.writer(csvfile, quoting=csv.QUOTE_ALL)
writer.writerow(["Artist Name", "URL", "Credit", "Links Work", "Updated", "Best"])
writer.writerows(data)
print(f"✅ CSV saved as {CSV_FILENAME}")
def hash_file(filename):
hasher = hashlib.sha256()
with open(filename, "rb") as f:
buf = f.read()
hasher.update(buf)
return hasher.hexdigest()
def archive_url(url):
print(f"🌐 Archiving {url} ...")
headers = {"User-Agent": USER_AGENT}
try:
resp = requests.get(f"https://web.archive.org/save/{url}", headers=headers, timeout=30)
if resp.status_code == 200:
print(f"✅ Archived {url}")
else:
print(f"⚠️ Failed to archive {url}, status code {resp.status_code}")
except Exception as e:
print(f"⚠️ Exception archiving {url}: {e}")
def archive_all_urls():
for url in ARCHIVE_URLS:
delay = 10 + random.uniform(-3, 3)
time.sleep(delay)
archive_url(url)
def read_csv_to_dict(filename):
"""Read CSV into dict with artist_name as key, storing relevant fields."""
d = {}
with open(filename, newline='', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
d[row["Artist Name"]] = row
return d
def detect_changes(old_data, new_data):
"""
Compare old and new data dictionaries.
Returns a list of strings describing changes.
"""
changes = []
old_keys = set(old_data.keys())
new_keys = set(new_data.keys())
removed = old_keys - new_keys
added = new_keys - old_keys
common = old_keys & new_keys
for artist in removed:
changes.append(f"❌ Removed: **{artist}**")
for artist in added:
changes.append(f" Added: **{artist}**")
for artist in common:
old_row = old_data[artist]
new_row = new_data[artist]
# Check if URL changed
if old_row["URL"] != new_row["URL"]:
changes.append(f"🔗 Link changed for **{artist}**")
# Check other fields if needed (Credit, Updated, etc.)
if old_row["Credit"] != new_row["Credit"]:
changes.append(f"✏️ Credit changed for **{artist}**")
if old_row["Links Work"] != new_row["Links Work"]:
changes.append(f"🔄 Links Work status changed for **{artist}**")
if old_row["Updated"] != new_row["Updated"]:
changes.append(f"🕒 Updated date changed for **{artist}**")
if old_row["Best"] != new_row["Best"]:
changes.append(f"⭐ Best flag changed for **{artist}**")
return changes
def send_discord_message(content):
if not DISCORD_WEBHOOK_URL:
print("⚠️ Discord webhook URL not set in env")
return
headers = {"Content-Type": "application/json"}
data = {"content": content}
try:
resp = requests.post(DISCORD_WEBHOOK_URL, headers=headers, data=json.dumps(data), timeout=10)
if resp.status_code in (200, 204):
print("✅ Discord notification sent")
else:
print(f"⚠️ Failed to send Discord notification, status code {resp.status_code}")
except Exception as e:
print(f"⚠️ Exception sending Discord notification: {e}")
def update_loop():
last_csv_hash = None
last_csv_data = {}
while True:
try:
download_zip_and_extract_html()
download_xlsx()
generate_csv()
current_hash = hash_file(CSV_FILENAME)
current_data = read_csv_to_dict(CSV_FILENAME)
if last_csv_hash is None:
print(" Initial CSV hash stored.")
elif current_hash != last_csv_hash:
print("🔔 CSV has changed! Archiving URLs...")
changes = detect_changes(last_csv_data, current_data)
if changes:
message = "**CSV Update Detected:**\n" + "\n".join(changes)
send_discord_message(message)
else:
print(" No detectable content changes found.")
archive_all_urls()
else:
print(" CSV unchanged. No archiving needed.")
last_csv_hash = current_hash
last_csv_data = current_data
except Exception as e:
print(f"⚠️ Error updating files: {e}")
time.sleep(600) # 10 minutes
@app.route("/artists.html")
def serve_artists_html():
return send_file(HTML_FILENAME, mimetype="text/html")
@app.route("/artists.csv")
def serve_artists_csv():
return send_file(CSV_FILENAME, mimetype="text/csv")
@app.route("/artists.xlsx")
def serve_artists_xlsx():
return send_file(XLSX_FILENAME, mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
# Serve index.html at "/", "/index", and "/index.html"
@app.route("/")
@app.route("/index")
@app.route("/index.html")
def serve_index():
return send_file("templates/index.html", mimetype="text/html")
# Serve static files from templates/_next/ as /_next/...
@app.route("/_next/<path:filename>")
def serve_next_static(filename):
return send_from_directory("templates/_next", filename)
# Custom 404 error page
@app.errorhandler(404)
def page_not_found(e):
return send_file("templates/404.html", mimetype="text/html"), 404
if __name__ == "__main__":
threading.Thread(target=update_loop, daemon=True).start()
try:
download_zip_and_extract_html()
download_xlsx()
generate_csv()
except Exception as e:
print(f"⚠️ Initial update failed: {e}")
app.run(host="0.0.0.0", port=5000)