modularize
This commit is contained in:
parent
f22ad478eb
commit
9552369760
18 changed files with 259 additions and 250 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -2,3 +2,4 @@ artists.csv
|
||||||
Artists.html
|
Artists.html
|
||||||
artists.xlsx
|
artists.xlsx
|
||||||
Trackerhub.zip
|
Trackerhub.zip
|
||||||
|
.env
|
||||||
|
|
|
||||||
BIN
__pycache__/archive.cpython-313.pyc
Normal file
BIN
__pycache__/archive.cpython-313.pyc
Normal file
Binary file not shown.
BIN
__pycache__/config.cpython-313.pyc
Normal file
BIN
__pycache__/config.cpython-313.pyc
Normal file
Binary file not shown.
BIN
__pycache__/diff.cpython-313.pyc
Normal file
BIN
__pycache__/diff.cpython-313.pyc
Normal file
Binary file not shown.
BIN
__pycache__/downloader.cpython-313.pyc
Normal file
BIN
__pycache__/downloader.cpython-313.pyc
Normal file
Binary file not shown.
BIN
__pycache__/notify.cpython-313.pyc
Normal file
BIN
__pycache__/notify.cpython-313.pyc
Normal file
Binary file not shown.
BIN
__pycache__/parser.cpython-313.pyc
Normal file
BIN
__pycache__/parser.cpython-313.pyc
Normal file
Binary file not shown.
BIN
__pycache__/update_loop.cpython-313.pyc
Normal file
BIN
__pycache__/update_loop.cpython-313.pyc
Normal file
Binary file not shown.
BIN
__pycache__/utils.cpython-313.pyc
Normal file
BIN
__pycache__/utils.cpython-313.pyc
Normal file
Binary file not shown.
21
archive.py
Normal file
21
archive.py
Normal file
|
|
@ -0,0 +1,21 @@
|
||||||
|
import requests, time, random
|
||||||
|
|
||||||
|
from config import ARCHIVE_URLS, USER_AGENT
|
||||||
|
|
||||||
|
def archive_url(url):
|
||||||
|
print(f"🌐 Archiving {url} ...")
|
||||||
|
headers = {"User-Agent": USER_AGENT}
|
||||||
|
try:
|
||||||
|
resp = requests.get(f"https://web.archive.org/save/{url}", headers=headers, timeout=30)
|
||||||
|
if resp.status_code == 200:
|
||||||
|
print(f"✅ Archived {url}")
|
||||||
|
else:
|
||||||
|
print(f"⚠️ Failed to archive {url}, status code {resp.status_code}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"⚠️ Exception archiving {url}: {e}")
|
||||||
|
|
||||||
|
def archive_all_urls():
|
||||||
|
for url in ARCHIVE_URLS:
|
||||||
|
delay = 10 + random.uniform(-3, 3)
|
||||||
|
time.sleep(delay)
|
||||||
|
archive_url(url)
|
||||||
33
config.py
Normal file
33
config.py
Normal file
|
|
@ -0,0 +1,33 @@
|
||||||
|
import os
|
||||||
|
|
||||||
|
ZIP_URL = "https://docs.google.com/spreadsheets/d/1S6WwM05O277npQbaiNk-jZlXK3TdooSyWtqaWUvAI78/export?format=zip"
|
||||||
|
XLSX_URL = "https://docs.google.com/spreadsheets/d/1S6WwM05O277npQbaiNk-jZlXK3TdooSyWtqaWUvAI78/export?format=xlsx"
|
||||||
|
|
||||||
|
ZIP_FILENAME = "Trackerhub.zip"
|
||||||
|
HTML_FILENAME = "Artists.html"
|
||||||
|
CSV_FILENAME = "artists.csv"
|
||||||
|
XLSX_FILENAME = "artists.xlsx"
|
||||||
|
|
||||||
|
exclude_names = {
|
||||||
|
"AI Models",
|
||||||
|
"Lawson",
|
||||||
|
"BPM Tracker",
|
||||||
|
"Worst Comps & Edits",
|
||||||
|
"Allegations",
|
||||||
|
"Rap Disses Timeline",
|
||||||
|
"Underground Artists",
|
||||||
|
}
|
||||||
|
|
||||||
|
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0 Safari/537.36"
|
||||||
|
|
||||||
|
BASE_URL = "http://localhost:5000"
|
||||||
|
|
||||||
|
ARCHIVE_URLS = [
|
||||||
|
f"{BASE_URL}/",
|
||||||
|
f"{BASE_URL}/index.html/",
|
||||||
|
f"{BASE_URL}/artists.html",
|
||||||
|
f"{BASE_URL}/artists.csv",
|
||||||
|
f"{BASE_URL}/artists.xlsx",
|
||||||
|
]
|
||||||
|
|
||||||
|
DISCORD_WEBHOOK_URL = os.getenv("DISCORD_WEBHOOK_URL")
|
||||||
42
diff.py
Normal file
42
diff.py
Normal file
|
|
@ -0,0 +1,42 @@
|
||||||
|
import csv
|
||||||
|
|
||||||
|
def read_csv_to_dict(filename):
|
||||||
|
d = {}
|
||||||
|
with open(filename, newline='', encoding='utf-8') as f:
|
||||||
|
reader = csv.DictReader(f)
|
||||||
|
for row in reader:
|
||||||
|
d[row["Artist Name"]] = row
|
||||||
|
return d
|
||||||
|
|
||||||
|
def detect_changes(old_data, new_data):
|
||||||
|
changes = []
|
||||||
|
|
||||||
|
old_keys = set(old_data.keys())
|
||||||
|
new_keys = set(new_data.keys())
|
||||||
|
|
||||||
|
removed = old_keys - new_keys
|
||||||
|
added = new_keys - old_keys
|
||||||
|
common = old_keys & new_keys
|
||||||
|
|
||||||
|
for artist in removed:
|
||||||
|
changes.append(f"❌ Removed: **{artist}**")
|
||||||
|
|
||||||
|
for artist in added:
|
||||||
|
changes.append(f"➕ Added: **{artist}**")
|
||||||
|
|
||||||
|
for artist in common:
|
||||||
|
old_row = old_data[artist]
|
||||||
|
new_row = new_data[artist]
|
||||||
|
|
||||||
|
if old_row["URL"] != new_row["URL"]:
|
||||||
|
changes.append(f"🔗 Link changed for **{artist}**")
|
||||||
|
if old_row["Credit"] != new_row["Credit"]:
|
||||||
|
changes.append(f"✏️ Credit changed for **{artist}**")
|
||||||
|
if old_row["Links Work"] != new_row["Links Work"]:
|
||||||
|
changes.append(f"🔄 Links Work status changed for **{artist}**")
|
||||||
|
if old_row["Updated"] != new_row["Updated"]:
|
||||||
|
changes.append(f"🕒 Updated date changed for **{artist}**")
|
||||||
|
if old_row["Best"] != new_row["Best"]:
|
||||||
|
changes.append(f"⭐ Best flag changed for **{artist}**")
|
||||||
|
|
||||||
|
return changes
|
||||||
26
downloader.py
Normal file
26
downloader.py
Normal file
|
|
@ -0,0 +1,26 @@
|
||||||
|
import requests, zipfile
|
||||||
|
|
||||||
|
from config import ZIP_URL, ZIP_FILENAME, HTML_FILENAME, XLSX_URL, XLSX_FILENAME
|
||||||
|
|
||||||
|
def download_zip_and_extract_html():
|
||||||
|
print("🔄 Downloading ZIP...")
|
||||||
|
r = requests.get(ZIP_URL)
|
||||||
|
r.raise_for_status()
|
||||||
|
with open(ZIP_FILENAME, "wb") as f:
|
||||||
|
f.write(r.content)
|
||||||
|
print(f"✅ Saved ZIP as {ZIP_FILENAME}")
|
||||||
|
|
||||||
|
with zipfile.ZipFile(ZIP_FILENAME, "r") as z:
|
||||||
|
with z.open(HTML_FILENAME) as html_file:
|
||||||
|
html_content = html_file.read()
|
||||||
|
with open(HTML_FILENAME, "wb") as f:
|
||||||
|
f.write(html_content)
|
||||||
|
print(f"✅ Extracted {HTML_FILENAME}")
|
||||||
|
|
||||||
|
def download_xlsx():
|
||||||
|
print("🔄 Downloading XLSX...")
|
||||||
|
r = requests.get(XLSX_URL)
|
||||||
|
r.raise_for_status()
|
||||||
|
with open(XLSX_FILENAME, "wb") as f:
|
||||||
|
f.write(r.content)
|
||||||
|
print(f"✅ Saved XLSX as {XLSX_FILENAME}")
|
||||||
258
main.py
258
main.py
|
|
@ -1,254 +1,13 @@
|
||||||
import requests
|
from flask import Flask, send_file, send_from_directory
|
||||||
import zipfile
|
|
||||||
import threading
|
|
||||||
import time
|
|
||||||
import random
|
|
||||||
import hashlib
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
import csv
|
|
||||||
import re
|
|
||||||
from flask import Flask, send_file, send_from_directory, abort
|
|
||||||
import os
|
|
||||||
import json
|
|
||||||
from flask_cors import CORS
|
from flask_cors import CORS
|
||||||
|
import threading
|
||||||
|
|
||||||
|
from config import HTML_FILENAME, CSV_FILENAME, XLSX_FILENAME
|
||||||
|
from update_loop import update_loop
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
CORS(app)
|
CORS(app)
|
||||||
|
|
||||||
ZIP_URL = "https://docs.google.com/spreadsheets/d/1S6WwM05O277npQbaiNk-jZlXK3TdooSyWtqaWUvAI78/export?format=zip"
|
|
||||||
XLSX_URL = "https://docs.google.com/spreadsheets/d/1S6WwM05O277npQbaiNk-jZlXK3TdooSyWtqaWUvAI78/export?format=xlsx"
|
|
||||||
|
|
||||||
ZIP_FILENAME = "Trackerhub.zip"
|
|
||||||
HTML_FILENAME = "Artists.html"
|
|
||||||
CSV_FILENAME = "artists.csv"
|
|
||||||
XLSX_FILENAME = "artists.xlsx"
|
|
||||||
|
|
||||||
exclude_names = {
|
|
||||||
"AI Models",
|
|
||||||
"Lawson",
|
|
||||||
"BPM Tracker",
|
|
||||||
"Worst Comps & Edits",
|
|
||||||
"Allegations",
|
|
||||||
"Rap Disses Timeline",
|
|
||||||
"Underground Artists",
|
|
||||||
}
|
|
||||||
|
|
||||||
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0 Safari/537.36"
|
|
||||||
|
|
||||||
# URLs to archive on changes — update these to your actual hosted domain
|
|
||||||
BASE_URL = "http://localhost:5000" # Change this to your public domain when deployed
|
|
||||||
|
|
||||||
ARCHIVE_URLS = [
|
|
||||||
f"{BASE_URL}/",
|
|
||||||
f"{BASE_URL}/index.html/",
|
|
||||||
f"{BASE_URL}/artists.html",
|
|
||||||
f"{BASE_URL}/artists.csv",
|
|
||||||
f"{BASE_URL}/artists.xlsx",
|
|
||||||
]
|
|
||||||
|
|
||||||
DISCORD_WEBHOOK_URL = os.getenv("DISCORD_WEBHOOK_URL")
|
|
||||||
|
|
||||||
def clean_artist_name(text):
|
|
||||||
return re.sub(r'[⭐🤖🎭\u2B50\uFE0F]', '', text).strip()
|
|
||||||
|
|
||||||
def force_star_flag(starred=True):
|
|
||||||
return "Yes" if starred else "No"
|
|
||||||
|
|
||||||
def download_zip_and_extract_html():
|
|
||||||
print("🔄 Downloading ZIP...")
|
|
||||||
r = requests.get(ZIP_URL)
|
|
||||||
r.raise_for_status()
|
|
||||||
with open(ZIP_FILENAME, "wb") as f:
|
|
||||||
f.write(r.content)
|
|
||||||
print(f"✅ Saved ZIP as {ZIP_FILENAME}")
|
|
||||||
|
|
||||||
with zipfile.ZipFile(ZIP_FILENAME, "r") as z:
|
|
||||||
with z.open(HTML_FILENAME) as html_file:
|
|
||||||
html_content = html_file.read()
|
|
||||||
with open(HTML_FILENAME, "wb") as f:
|
|
||||||
f.write(html_content)
|
|
||||||
print(f"✅ Extracted {HTML_FILENAME}")
|
|
||||||
|
|
||||||
def download_xlsx():
|
|
||||||
print("🔄 Downloading XLSX...")
|
|
||||||
r = requests.get(XLSX_URL)
|
|
||||||
r.raise_for_status()
|
|
||||||
with open(XLSX_FILENAME, "wb") as f:
|
|
||||||
f.write(r.content)
|
|
||||||
print(f"✅ Saved XLSX as {XLSX_FILENAME}")
|
|
||||||
|
|
||||||
def generate_csv():
|
|
||||||
print("📝 Generating CSV...")
|
|
||||||
with open(HTML_FILENAME, "r", encoding="utf-8") as f:
|
|
||||||
soup = BeautifulSoup(f, "html.parser")
|
|
||||||
|
|
||||||
rows = soup.select("table.waffle tbody tr")[3:] # skip headers and Discord
|
|
||||||
|
|
||||||
data = []
|
|
||||||
starring = True
|
|
||||||
|
|
||||||
for row in rows:
|
|
||||||
cells = row.find_all("td")
|
|
||||||
if len(cells) < 4:
|
|
||||||
continue
|
|
||||||
|
|
||||||
link_tag = cells[0].find("a")
|
|
||||||
artist_name_raw = link_tag.get_text(strip=True) if link_tag else cells[0].get_text(strip=True)
|
|
||||||
artist_url = link_tag["href"] if link_tag else ""
|
|
||||||
if not artist_url:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if "AI Models" in artist_name_raw:
|
|
||||||
starring = False
|
|
||||||
|
|
||||||
artist_name_clean = clean_artist_name(artist_name_raw)
|
|
||||||
if artist_name_clean in exclude_names:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if "🚩" in artist_name_raw:
|
|
||||||
continue
|
|
||||||
|
|
||||||
best = force_star_flag(starring)
|
|
||||||
credit = cells[1].get_text(strip=True)
|
|
||||||
updated = cells[2].get_text(strip=True)
|
|
||||||
links_work = cells[3].get_text(strip=True)
|
|
||||||
|
|
||||||
data.append([artist_name_clean, artist_url, credit, links_work, updated, best])
|
|
||||||
|
|
||||||
with open(CSV_FILENAME, "w", newline='', encoding="utf-8") as csvfile:
|
|
||||||
writer = csv.writer(csvfile, quoting=csv.QUOTE_ALL)
|
|
||||||
writer.writerow(["Artist Name", "URL", "Credit", "Links Work", "Updated", "Best"])
|
|
||||||
writer.writerows(data)
|
|
||||||
|
|
||||||
|
|
||||||
print(f"✅ CSV saved as {CSV_FILENAME}")
|
|
||||||
|
|
||||||
def hash_file(filename):
|
|
||||||
hasher = hashlib.sha256()
|
|
||||||
with open(filename, "rb") as f:
|
|
||||||
buf = f.read()
|
|
||||||
hasher.update(buf)
|
|
||||||
return hasher.hexdigest()
|
|
||||||
|
|
||||||
def archive_url(url):
|
|
||||||
print(f"🌐 Archiving {url} ...")
|
|
||||||
headers = {"User-Agent": USER_AGENT}
|
|
||||||
try:
|
|
||||||
resp = requests.get(f"https://web.archive.org/save/{url}", headers=headers, timeout=30)
|
|
||||||
if resp.status_code == 200:
|
|
||||||
print(f"✅ Archived {url}")
|
|
||||||
else:
|
|
||||||
print(f"⚠️ Failed to archive {url}, status code {resp.status_code}")
|
|
||||||
except Exception as e:
|
|
||||||
print(f"⚠️ Exception archiving {url}: {e}")
|
|
||||||
|
|
||||||
def archive_all_urls():
|
|
||||||
for url in ARCHIVE_URLS:
|
|
||||||
delay = 10 + random.uniform(-3, 3)
|
|
||||||
time.sleep(delay)
|
|
||||||
archive_url(url)
|
|
||||||
|
|
||||||
def read_csv_to_dict(filename):
|
|
||||||
"""Read CSV into dict with artist_name as key, storing relevant fields."""
|
|
||||||
d = {}
|
|
||||||
with open(filename, newline='', encoding='utf-8') as f:
|
|
||||||
reader = csv.DictReader(f)
|
|
||||||
for row in reader:
|
|
||||||
d[row["Artist Name"]] = row
|
|
||||||
return d
|
|
||||||
|
|
||||||
def detect_changes(old_data, new_data):
|
|
||||||
"""
|
|
||||||
Compare old and new data dictionaries.
|
|
||||||
Returns a list of strings describing changes.
|
|
||||||
"""
|
|
||||||
changes = []
|
|
||||||
|
|
||||||
old_keys = set(old_data.keys())
|
|
||||||
new_keys = set(new_data.keys())
|
|
||||||
|
|
||||||
removed = old_keys - new_keys
|
|
||||||
added = new_keys - old_keys
|
|
||||||
common = old_keys & new_keys
|
|
||||||
|
|
||||||
for artist in removed:
|
|
||||||
changes.append(f"❌ Removed: **{artist}**")
|
|
||||||
|
|
||||||
for artist in added:
|
|
||||||
changes.append(f"➕ Added: **{artist}**")
|
|
||||||
|
|
||||||
for artist in common:
|
|
||||||
old_row = old_data[artist]
|
|
||||||
new_row = new_data[artist]
|
|
||||||
# Check if URL changed
|
|
||||||
if old_row["URL"] != new_row["URL"]:
|
|
||||||
changes.append(f"🔗 Link changed for **{artist}**")
|
|
||||||
# Check other fields if needed (Credit, Updated, etc.)
|
|
||||||
if old_row["Credit"] != new_row["Credit"]:
|
|
||||||
changes.append(f"✏️ Credit changed for **{artist}**")
|
|
||||||
if old_row["Links Work"] != new_row["Links Work"]:
|
|
||||||
changes.append(f"🔄 Links Work status changed for **{artist}**")
|
|
||||||
if old_row["Updated"] != new_row["Updated"]:
|
|
||||||
changes.append(f"🕒 Updated date changed for **{artist}**")
|
|
||||||
if old_row["Best"] != new_row["Best"]:
|
|
||||||
changes.append(f"⭐ Best flag changed for **{artist}**")
|
|
||||||
|
|
||||||
return changes
|
|
||||||
|
|
||||||
def send_discord_message(content):
|
|
||||||
if not DISCORD_WEBHOOK_URL:
|
|
||||||
print("⚠️ Discord webhook URL not set in env")
|
|
||||||
return
|
|
||||||
|
|
||||||
headers = {"Content-Type": "application/json"}
|
|
||||||
data = {"content": content}
|
|
||||||
|
|
||||||
try:
|
|
||||||
resp = requests.post(DISCORD_WEBHOOK_URL, headers=headers, data=json.dumps(data), timeout=10)
|
|
||||||
if resp.status_code in (200, 204):
|
|
||||||
print("✅ Discord notification sent")
|
|
||||||
else:
|
|
||||||
print(f"⚠️ Failed to send Discord notification, status code {resp.status_code}")
|
|
||||||
except Exception as e:
|
|
||||||
print(f"⚠️ Exception sending Discord notification: {e}")
|
|
||||||
|
|
||||||
def update_loop():
|
|
||||||
last_csv_hash = None
|
|
||||||
last_csv_data = {}
|
|
||||||
|
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
download_zip_and_extract_html()
|
|
||||||
download_xlsx()
|
|
||||||
generate_csv()
|
|
||||||
|
|
||||||
current_hash = hash_file(CSV_FILENAME)
|
|
||||||
current_data = read_csv_to_dict(CSV_FILENAME)
|
|
||||||
|
|
||||||
if last_csv_hash is None:
|
|
||||||
print("ℹ️ Initial CSV hash stored.")
|
|
||||||
elif current_hash != last_csv_hash:
|
|
||||||
print("🔔 CSV has changed! Archiving URLs...")
|
|
||||||
|
|
||||||
changes = detect_changes(last_csv_data, current_data)
|
|
||||||
if changes:
|
|
||||||
message = "**CSV Update Detected:**\n" + "\n".join(changes)
|
|
||||||
send_discord_message(message)
|
|
||||||
else:
|
|
||||||
print("ℹ️ No detectable content changes found.")
|
|
||||||
|
|
||||||
archive_all_urls()
|
|
||||||
else:
|
|
||||||
print("ℹ️ CSV unchanged. No archiving needed.")
|
|
||||||
|
|
||||||
last_csv_hash = current_hash
|
|
||||||
last_csv_data = current_data
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f"⚠️ Error updating files: {e}")
|
|
||||||
|
|
||||||
time.sleep(600) # 10 minutes
|
|
||||||
|
|
||||||
@app.route("/artists.html")
|
@app.route("/artists.html")
|
||||||
def serve_artists_html():
|
def serve_artists_html():
|
||||||
return send_file(HTML_FILENAME, mimetype="text/html")
|
return send_file(HTML_FILENAME, mimetype="text/html")
|
||||||
|
|
@ -261,26 +20,25 @@ def serve_artists_csv():
|
||||||
def serve_artists_xlsx():
|
def serve_artists_xlsx():
|
||||||
return send_file(XLSX_FILENAME, mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
|
return send_file(XLSX_FILENAME, mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
|
||||||
|
|
||||||
# Serve index.html at "/", "/index", and "/index.html"
|
|
||||||
@app.route("/")
|
@app.route("/")
|
||||||
@app.route("/index")
|
@app.route("/index")
|
||||||
@app.route("/index.html")
|
@app.route("/index.html")
|
||||||
def serve_index():
|
def serve_index():
|
||||||
return send_file("templates/index.html", mimetype="text/html")
|
return send_file("templates/index.html", mimetype="text/html")
|
||||||
|
|
||||||
# Serve static files from templates/_next/ as /_next/...
|
|
||||||
@app.route("/_next/<path:filename>")
|
@app.route("/_next/<path:filename>")
|
||||||
def serve_next_static(filename):
|
def serve_next_static(filename):
|
||||||
return send_from_directory("templates/_next", filename)
|
return send_from_directory("templates/_next", filename)
|
||||||
|
|
||||||
# Custom 404 error page
|
|
||||||
@app.errorhandler(404)
|
@app.errorhandler(404)
|
||||||
def page_not_found(e):
|
def page_not_found(e):
|
||||||
return send_file("templates/404.html", mimetype="text/html"), 404
|
return send_file("templates/404.html", mimetype="text/html"), 404
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
threading.Thread(target=update_loop, daemon=True).start()
|
threading.Thread(target=update_loop, daemon=True).start()
|
||||||
|
from downloader import download_zip_and_extract_html, download_xlsx
|
||||||
|
from parser import generate_csv
|
||||||
|
|
||||||
try:
|
try:
|
||||||
download_zip_and_extract_html()
|
download_zip_and_extract_html()
|
||||||
download_xlsx()
|
download_xlsx()
|
||||||
|
|
|
||||||
20
notify.py
Normal file
20
notify.py
Normal file
|
|
@ -0,0 +1,20 @@
|
||||||
|
import os, json, requests
|
||||||
|
|
||||||
|
from config import DISCORD_WEBHOOK_URL
|
||||||
|
|
||||||
|
def send_discord_message(content):
|
||||||
|
if not DISCORD_WEBHOOK_URL:
|
||||||
|
print("⚠️ Discord webhook URL not set in env")
|
||||||
|
return
|
||||||
|
|
||||||
|
headers = {"Content-Type": "application/json"}
|
||||||
|
data = {"content": content}
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp = requests.post(DISCORD_WEBHOOK_URL, headers=headers, data=json.dumps(data), timeout=10)
|
||||||
|
if resp.status_code in (200, 204):
|
||||||
|
print("✅ Discord notification sent")
|
||||||
|
else:
|
||||||
|
print(f"⚠️ Failed to send Discord notification, status code {resp.status_code}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"⚠️ Exception sending Discord notification: {e}")
|
||||||
47
parser.py
Normal file
47
parser.py
Normal file
|
|
@ -0,0 +1,47 @@
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import csv
|
||||||
|
|
||||||
|
from config import HTML_FILENAME, CSV_FILENAME, exclude_names
|
||||||
|
from utils import clean_artist_name, force_star_flag
|
||||||
|
|
||||||
|
def generate_csv():
|
||||||
|
print("📝 Generating CSV...")
|
||||||
|
with open(HTML_FILENAME, "r", encoding="utf-8") as f:
|
||||||
|
soup = BeautifulSoup(f, "html.parser")
|
||||||
|
|
||||||
|
rows = soup.select("table.waffle tbody tr")[3:]
|
||||||
|
|
||||||
|
data = []
|
||||||
|
starring = True
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
cells = row.find_all("td")
|
||||||
|
if len(cells) < 4:
|
||||||
|
continue
|
||||||
|
|
||||||
|
link_tag = cells[0].find("a")
|
||||||
|
artist_name_raw = link_tag.get_text(strip=True) if link_tag else cells[0].get_text(strip=True)
|
||||||
|
artist_url = link_tag["href"] if link_tag else ""
|
||||||
|
if not artist_url:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if "AI Models" in artist_name_raw:
|
||||||
|
starring = False
|
||||||
|
|
||||||
|
artist_name_clean = clean_artist_name(artist_name_raw)
|
||||||
|
if artist_name_clean in exclude_names or "🚩" in artist_name_raw:
|
||||||
|
continue
|
||||||
|
|
||||||
|
best = force_star_flag(starring)
|
||||||
|
credit = cells[1].get_text(strip=True)
|
||||||
|
updated = cells[2].get_text(strip=True)
|
||||||
|
links_work = cells[3].get_text(strip=True)
|
||||||
|
|
||||||
|
data.append([artist_name_clean, artist_url, credit, links_work, updated, best])
|
||||||
|
|
||||||
|
with open(CSV_FILENAME, "w", newline='', encoding="utf-8") as csvfile:
|
||||||
|
writer = csv.writer(csvfile, quoting=csv.QUOTE_ALL)
|
||||||
|
writer.writerow(["Artist Name", "URL", "Credit", "Links Work", "Updated", "Best"])
|
||||||
|
writer.writerows(data)
|
||||||
|
|
||||||
|
print(f"✅ CSV saved as {CSV_FILENAME}")
|
||||||
47
update_loop.py
Normal file
47
update_loop.py
Normal file
|
|
@ -0,0 +1,47 @@
|
||||||
|
import time
|
||||||
|
|
||||||
|
from downloader import download_zip_and_extract_html, download_xlsx
|
||||||
|
from parser import generate_csv
|
||||||
|
from diff import read_csv_to_dict, detect_changes
|
||||||
|
from archive import archive_all_urls
|
||||||
|
from notify import send_discord_message
|
||||||
|
from utils import hash_file
|
||||||
|
|
||||||
|
last_csv_hash = None
|
||||||
|
last_csv_data = {}
|
||||||
|
|
||||||
|
def update_loop():
|
||||||
|
global last_csv_hash, last_csv_data
|
||||||
|
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
download_zip_and_extract_html()
|
||||||
|
download_xlsx()
|
||||||
|
generate_csv()
|
||||||
|
|
||||||
|
current_hash = hash_file("artists.csv")
|
||||||
|
current_data = read_csv_to_dict("artists.csv")
|
||||||
|
|
||||||
|
if last_csv_hash is None:
|
||||||
|
print("ℹ️ Initial CSV hash stored.")
|
||||||
|
elif current_hash != last_csv_hash:
|
||||||
|
print("🔔 CSV has changed! Archiving URLs...")
|
||||||
|
|
||||||
|
changes = detect_changes(last_csv_data, current_data)
|
||||||
|
if changes:
|
||||||
|
message = "**CSV Update Detected:**\n" + "\n".join(changes)
|
||||||
|
send_discord_message(message)
|
||||||
|
else:
|
||||||
|
print("ℹ️ No detectable content changes found.")
|
||||||
|
|
||||||
|
archive_all_urls()
|
||||||
|
else:
|
||||||
|
print("ℹ️ CSV unchanged. No archiving needed.")
|
||||||
|
|
||||||
|
last_csv_hash = current_hash
|
||||||
|
last_csv_data = current_data
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"⚠️ Error updating files: {e}")
|
||||||
|
|
||||||
|
time.sleep(600)
|
||||||
14
utils.py
Normal file
14
utils.py
Normal file
|
|
@ -0,0 +1,14 @@
|
||||||
|
import re, hashlib
|
||||||
|
|
||||||
|
def clean_artist_name(text):
|
||||||
|
return re.sub(r'[⭐🤖🎭\u2B50\uFE0F]', '', text).strip()
|
||||||
|
|
||||||
|
def force_star_flag(starred=True):
|
||||||
|
return "Yes" if starred else "No"
|
||||||
|
|
||||||
|
def hash_file(filename):
|
||||||
|
hasher = hashlib.sha256()
|
||||||
|
with open(filename, "rb") as f:
|
||||||
|
buf = f.read()
|
||||||
|
hasher.update(buf)
|
||||||
|
return hasher.hexdigest()
|
||||||
Loading…
Add table
Add a link
Reference in a new issue