go rewrite
This commit is contained in:
parent
e8b828384b
commit
90a797b888
15 changed files with 760 additions and 523 deletions
36
archive.py
36
archive.py
|
|
@ -1,36 +0,0 @@
|
||||||
# archive.py
|
|
||||||
import logging
|
|
||||||
import random
|
|
||||||
import time
|
|
||||||
from typing import List
|
|
||||||
|
|
||||||
from waybackpy import WaybackMachineSaveAPI
|
|
||||||
|
|
||||||
from config import ARCHIVE_URLS, USER_AGENT
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def archive_url(url: str):
|
|
||||||
logger.info(f"🌐 Archiving {url} ...")
|
|
||||||
try:
|
|
||||||
save_api = WaybackMachineSaveAPI(url, user_agent=USER_AGENT)
|
|
||||||
save_api.save()
|
|
||||||
logger.info(f"✅ Archived {url}")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"⚠️ Exception archiving {url}: {e}", exc_info=True)
|
|
||||||
|
|
||||||
|
|
||||||
def archive_all_urls():
|
|
||||||
logger.info("--- Starting archival process for all URLs ---")
|
|
||||||
for url in ARCHIVE_URLS:
|
|
||||||
delay = 10 + random.uniform(-3, 3)
|
|
||||||
logger.info(f"Waiting {delay:.2f} seconds before next archive...")
|
|
||||||
time.sleep(delay)
|
|
||||||
archive_url(url)
|
|
||||||
logger.info("--- Archival process finished ---")
|
|
||||||
|
|
||||||
|
|
||||||
def test_archive():
|
|
||||||
test_url = "https://httpbin.org/anything/foo/bar"
|
|
||||||
archive_url(test_url)
|
|
||||||
43
config.py
43
config.py
|
|
@ -1,43 +0,0 @@
|
||||||
import os
|
|
||||||
|
|
||||||
SHEET_URL = "https://docs.google.com/spreadsheets/d/1Z8aANbxXbnUGoZPRvJfWL3gz6jrzPPrwVt3d0c1iJ_4"
|
|
||||||
ZIP_URL = SHEET_URL + "/export?format=zip"
|
|
||||||
XLSX_URL = SHEET_URL + "/export?format=xlsx"
|
|
||||||
|
|
||||||
|
|
||||||
ZIP_FILENAME = "Trackerhub.zip"
|
|
||||||
HTML_FILENAME = "Artists.html"
|
|
||||||
CSV_FILENAME = "artists.csv"
|
|
||||||
XLSX_FILENAME = "artists.xlsx"
|
|
||||||
|
|
||||||
exclude_names = {
|
|
||||||
"🎹Worst Comps & Edits"
|
|
||||||
"K4$H K4$$!n0",
|
|
||||||
"K4HKn0",
|
|
||||||
"AI Models",
|
|
||||||
"🎹 BPM & Key Tracker",
|
|
||||||
"🎹Comps & Edits"
|
|
||||||
"🎹 Worst Comps & Edits",
|
|
||||||
"🎹Worst Comps & Edits",
|
|
||||||
"🎹 Yedits",
|
|
||||||
"🎹Comps & Edits",
|
|
||||||
"Allegations",
|
|
||||||
"Rap Disses Timeline",
|
|
||||||
"Underground Artists",
|
|
||||||
"🎹 Comps & Edits",
|
|
||||||
"🎹 Worst Comps & Edits"
|
|
||||||
}
|
|
||||||
|
|
||||||
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0 Safari/537.36"
|
|
||||||
|
|
||||||
BASE_URL = "https://sheets.artistgrid.cx"
|
|
||||||
|
|
||||||
ARCHIVE_URLS = [
|
|
||||||
f"{BASE_URL}/",
|
|
||||||
f"{BASE_URL}/artists.html",
|
|
||||||
f"{BASE_URL}/artists.csv",
|
|
||||||
f"{BASE_URL}/artists.xlsx",
|
|
||||||
f"https://artistgrid.cx",
|
|
||||||
]
|
|
||||||
|
|
||||||
DISCORD_WEBHOOK_URL = os.getenv("DISCORD_WEBHOOK_URL")
|
|
||||||
57
diff.py
57
diff.py
|
|
@ -1,57 +0,0 @@
|
||||||
# diff.py
|
|
||||||
import csv
|
|
||||||
import logging
|
|
||||||
from typing import Dict, List
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def read_csv_to_dict(filename: str) -> Dict[str, Dict[str, str]]:
|
|
||||||
data = {}
|
|
||||||
try:
|
|
||||||
with open(filename, newline="", encoding="utf-8") as f:
|
|
||||||
reader = csv.DictReader(f)
|
|
||||||
for row in reader:
|
|
||||||
if "Artist Name" in row and row["Artist Name"]:
|
|
||||||
data[row["Artist Name"]] = row
|
|
||||||
except FileNotFoundError:
|
|
||||||
logger.warning(f"CSV file not found: {filename}")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error reading CSV file {filename}: {e}", exc_info=True)
|
|
||||||
return data
|
|
||||||
|
|
||||||
|
|
||||||
def detect_changes(
|
|
||||||
old_data: Dict[str, Dict[str, str]], new_data: Dict[str, Dict[str, str]]
|
|
||||||
) -> List[str]:
|
|
||||||
changes = []
|
|
||||||
|
|
||||||
old_keys = set(old_data.keys())
|
|
||||||
new_keys = set(new_data.keys())
|
|
||||||
|
|
||||||
removed = sorted(list(old_keys - new_keys))
|
|
||||||
added = sorted(list(new_keys - old_keys))
|
|
||||||
common = sorted(list(old_keys & new_keys))
|
|
||||||
|
|
||||||
for artist in removed:
|
|
||||||
changes.append(f"❌ Removed: **{artist}**")
|
|
||||||
|
|
||||||
for artist in added:
|
|
||||||
changes.append(f"➕ Added: **{artist}**")
|
|
||||||
|
|
||||||
for artist in common:
|
|
||||||
old_row = old_data[artist]
|
|
||||||
new_row = new_data[artist]
|
|
||||||
|
|
||||||
if old_row.get("URL") != new_row.get("URL"):
|
|
||||||
changes.append(f"🔗 Link changed for **{artist}**")
|
|
||||||
if old_row.get("Credit") != new_row.get("Credit"):
|
|
||||||
changes.append(f"✏️ Credit changed for **{artist}**")
|
|
||||||
if old_row.get("Links Work") != new_row.get("Links Work"):
|
|
||||||
changes.append(f"🔄 Links Work status changed for **{artist}**")
|
|
||||||
if old_row.get("Updated") != new_row.get("Updated"):
|
|
||||||
changes.append(f"🕒 Updated date changed for **{artist}**")
|
|
||||||
if old_row.get("Best") != new_row.get("Best"):
|
|
||||||
changes.append(f"⭐ Best flag changed for **{artist}**")
|
|
||||||
|
|
||||||
return changes
|
|
||||||
|
|
@ -1,42 +0,0 @@
|
||||||
# downloader.py
|
|
||||||
import logging
|
|
||||||
import zipfile
|
|
||||||
|
|
||||||
import requests
|
|
||||||
|
|
||||||
from config import HTML_FILENAME, XLSX_FILENAME, XLSX_URL, ZIP_FILENAME, ZIP_URL
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def _download_file(url: str, filename: str, timeout: int = 30) -> bool:
|
|
||||||
logger.info(f"🔄 Downloading {filename}...")
|
|
||||||
try:
|
|
||||||
with requests.get(url, timeout=timeout) as r:
|
|
||||||
r.raise_for_status()
|
|
||||||
with open(filename, "wb") as f:
|
|
||||||
f.write(r.content)
|
|
||||||
logger.info(f"✅ Saved {filename}")
|
|
||||||
return True
|
|
||||||
except requests.RequestException as e:
|
|
||||||
logger.error(f"❌ Failed to download {filename}: {e}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def download_zip_and_extract_html():
|
|
||||||
if not _download_file(ZIP_URL, ZIP_FILENAME):
|
|
||||||
return
|
|
||||||
|
|
||||||
logger.info(f"📦 Extracting {HTML_FILENAME} from {ZIP_FILENAME}...")
|
|
||||||
try:
|
|
||||||
with zipfile.ZipFile(ZIP_FILENAME, "r") as z:
|
|
||||||
html_content = z.read(HTML_FILENAME)
|
|
||||||
with open(HTML_FILENAME, "wb") as f:
|
|
||||||
f.write(html_content)
|
|
||||||
logger.info(f"✅ Extracted {HTML_FILENAME}")
|
|
||||||
except (zipfile.BadZipFile, KeyError, FileNotFoundError) as e:
|
|
||||||
logger.error(f"❌ Failed to extract {HTML_FILENAME}: {e}")
|
|
||||||
|
|
||||||
|
|
||||||
def download_xlsx():
|
|
||||||
_download_file(XLSX_URL, XLSX_FILENAME)
|
|
||||||
|
|
@ -1349,6 +1349,7 @@
|
||||||
"integrity": "sha512-EhBeSYX0Y6ye8pNebpKrwFJq7BoQ8J5SO6NlvNwwHjSj6adXJViPQrKlsyPw7hLBLvckEMO1yxeGdR82YBBlDg==",
|
"integrity": "sha512-EhBeSYX0Y6ye8pNebpKrwFJq7BoQ8J5SO6NlvNwwHjSj6adXJViPQrKlsyPw7hLBLvckEMO1yxeGdR82YBBlDg==",
|
||||||
"devOptional": true,
|
"devOptional": true,
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
|
"peer": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"csstype": "^3.0.2"
|
"csstype": "^3.0.2"
|
||||||
}
|
}
|
||||||
|
|
@ -1409,6 +1410,7 @@
|
||||||
"integrity": "sha512-jCNyAuXx8dr5KJMkecGmZ8KI61KBUhkCob+SD+C+I5+Y1FWI2Y3QmY4/cxMCC5WAsZqoEtEETVhUiUMIGCf6Bw==",
|
"integrity": "sha512-jCNyAuXx8dr5KJMkecGmZ8KI61KBUhkCob+SD+C+I5+Y1FWI2Y3QmY4/cxMCC5WAsZqoEtEETVhUiUMIGCf6Bw==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
|
"peer": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@typescript-eslint/scope-manager": "8.40.0",
|
"@typescript-eslint/scope-manager": "8.40.0",
|
||||||
"@typescript-eslint/types": "8.40.0",
|
"@typescript-eslint/types": "8.40.0",
|
||||||
|
|
@ -1926,6 +1928,7 @@
|
||||||
"integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
|
"integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
|
"peer": true,
|
||||||
"bin": {
|
"bin": {
|
||||||
"acorn": "bin/acorn"
|
"acorn": "bin/acorn"
|
||||||
},
|
},
|
||||||
|
|
@ -2822,6 +2825,7 @@
|
||||||
"integrity": "sha512-TS9bTNIryDzStCpJN93aC5VRSW3uTx9sClUn4B87pwiCaJh220otoI0X8mJKr+VcPtniMdN8GKjlwgWGUv5ZKA==",
|
"integrity": "sha512-TS9bTNIryDzStCpJN93aC5VRSW3uTx9sClUn4B87pwiCaJh220otoI0X8mJKr+VcPtniMdN8GKjlwgWGUv5ZKA==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
|
"peer": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@eslint-community/eslint-utils": "^4.2.0",
|
"@eslint-community/eslint-utils": "^4.2.0",
|
||||||
"@eslint-community/regexpp": "^4.12.1",
|
"@eslint-community/regexpp": "^4.12.1",
|
||||||
|
|
@ -2996,6 +3000,7 @@
|
||||||
"integrity": "sha512-whOE1HFo/qJDyX4SnXzP4N6zOWn79WhnCUY/iDR0mPfQZO8wcYE4JClzI2oZrhBnnMUCBCHZhO6VQyoBU95mZA==",
|
"integrity": "sha512-whOE1HFo/qJDyX4SnXzP4N6zOWn79WhnCUY/iDR0mPfQZO8wcYE4JClzI2oZrhBnnMUCBCHZhO6VQyoBU95mZA==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
|
"peer": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@rtsao/scc": "^1.1.0",
|
"@rtsao/scc": "^1.1.0",
|
||||||
"array-includes": "^3.1.9",
|
"array-includes": "^3.1.9",
|
||||||
|
|
@ -5139,6 +5144,7 @@
|
||||||
"resolved": "https://registry.npmjs.org/react/-/react-19.1.0.tgz",
|
"resolved": "https://registry.npmjs.org/react/-/react-19.1.0.tgz",
|
||||||
"integrity": "sha512-FS+XFBNvn3GTAWq26joslQgWNoFu08F4kl0J4CgdNKADkdSGXQyTCnKteIAJy96Br6YbpEU1LSzV5dYtjMkMDg==",
|
"integrity": "sha512-FS+XFBNvn3GTAWq26joslQgWNoFu08F4kl0J4CgdNKADkdSGXQyTCnKteIAJy96Br6YbpEU1LSzV5dYtjMkMDg==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
|
"peer": true,
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=0.10.0"
|
"node": ">=0.10.0"
|
||||||
}
|
}
|
||||||
|
|
@ -5148,6 +5154,7 @@
|
||||||
"resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.1.0.tgz",
|
"resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.1.0.tgz",
|
||||||
"integrity": "sha512-Xs1hdnE+DyKgeHJeJznQmYMIBG3TKIHJJT95Q58nHLSrElKlGQqDTR2HQ9fx5CN/Gk6Vh/kupBTDLU11/nDk/g==",
|
"integrity": "sha512-Xs1hdnE+DyKgeHJeJznQmYMIBG3TKIHJJT95Q58nHLSrElKlGQqDTR2HQ9fx5CN/Gk6Vh/kupBTDLU11/nDk/g==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
|
"peer": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"scheduler": "^0.26.0"
|
"scheduler": "^0.26.0"
|
||||||
},
|
},
|
||||||
|
|
@ -5858,6 +5865,7 @@
|
||||||
"integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
|
"integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
|
"peer": true,
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=12"
|
"node": ">=12"
|
||||||
},
|
},
|
||||||
|
|
@ -6017,6 +6025,7 @@
|
||||||
"integrity": "sha512-CWBzXQrc/qOkhidw1OzBTQuYRbfyxDXJMVJ1XNwUHGROVmuaeiEm3OslpZ1RV96d7SKKjZKrSJu3+t/xlw3R9A==",
|
"integrity": "sha512-CWBzXQrc/qOkhidw1OzBTQuYRbfyxDXJMVJ1XNwUHGROVmuaeiEm3OslpZ1RV96d7SKKjZKrSJu3+t/xlw3R9A==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "Apache-2.0",
|
"license": "Apache-2.0",
|
||||||
|
"peer": true,
|
||||||
"bin": {
|
"bin": {
|
||||||
"tsc": "bin/tsc",
|
"tsc": "bin/tsc",
|
||||||
"tsserver": "bin/tsserver"
|
"tsserver": "bin/tsserver"
|
||||||
|
|
|
||||||
24
go.mod
Normal file
24
go.mod
Normal file
|
|
@ -0,0 +1,24 @@
|
||||||
|
module artistgrid
|
||||||
|
|
||||||
|
go 1.21
|
||||||
|
|
||||||
|
require (
|
||||||
|
github.com/PuerkitoBio/goquery v1.8.1
|
||||||
|
github.com/gofiber/fiber/v2 v2.52.0
|
||||||
|
)
|
||||||
|
|
||||||
|
require (
|
||||||
|
github.com/andybalholm/brotli v1.0.5 // indirect
|
||||||
|
github.com/andybalholm/cascadia v1.3.1 // indirect
|
||||||
|
github.com/google/uuid v1.5.0 // indirect
|
||||||
|
github.com/klauspost/compress v1.17.0 // indirect
|
||||||
|
github.com/mattn/go-colorable v0.1.13 // indirect
|
||||||
|
github.com/mattn/go-isatty v0.0.20 // indirect
|
||||||
|
github.com/mattn/go-runewidth v0.0.15 // indirect
|
||||||
|
github.com/rivo/uniseg v0.2.0 // indirect
|
||||||
|
github.com/valyala/bytebufferpool v1.0.0 // indirect
|
||||||
|
github.com/valyala/fasthttp v1.51.0 // indirect
|
||||||
|
github.com/valyala/tcplisten v1.0.0 // indirect
|
||||||
|
golang.org/x/net v0.17.0 // indirect
|
||||||
|
golang.org/x/sys v0.15.0 // indirect
|
||||||
|
)
|
||||||
63
go.sum
Normal file
63
go.sum
Normal file
|
|
@ -0,0 +1,63 @@
|
||||||
|
github.com/PuerkitoBio/goquery v1.8.1 h1:uQxhNlArOIdbrH1tr0UXwdVFgDcZDrZVdcpygAcwmWM=
|
||||||
|
github.com/PuerkitoBio/goquery v1.8.1/go.mod h1:Q8ICL1kNUJ2sXGoAhPGUdYDJvgQgHzJsnnd3H7Ho5jQ=
|
||||||
|
github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs=
|
||||||
|
github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
|
||||||
|
github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
|
||||||
|
github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
|
||||||
|
github.com/gofiber/fiber/v2 v2.52.0 h1:S+qXi7y+/Pgvqq4DrSmREGiFwtB7Bu6+QFLuIHYw/UE=
|
||||||
|
github.com/gofiber/fiber/v2 v2.52.0/go.mod h1:KEOE+cXMhXG0zHc9d8+E38hoX+ZN7bhOtgeF2oT6jrQ=
|
||||||
|
github.com/google/uuid v1.5.0 h1:1p67kYwdtXjb0gL0BPiP1Av9wiZPo5A8z2cWkTZ+eyU=
|
||||||
|
github.com/google/uuid v1.5.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||||
|
github.com/klauspost/compress v1.17.0 h1:Rnbp4K9EjcDuVuHtd0dgA4qNuv9yKDYKK1ulpJwgrqM=
|
||||||
|
github.com/klauspost/compress v1.17.0/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
|
||||||
|
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
|
||||||
|
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
|
||||||
|
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
|
||||||
|
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
|
||||||
|
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
||||||
|
github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U=
|
||||||
|
github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
|
||||||
|
github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
|
||||||
|
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
|
||||||
|
github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
|
||||||
|
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
|
||||||
|
github.com/valyala/fasthttp v1.51.0 h1:8b30A5JlZ6C7AS81RsWjYMQmrZG6feChmgAolCl1SqA=
|
||||||
|
github.com/valyala/fasthttp v1.51.0/go.mod h1:oI2XroL+lI7vdXyYoQk03bXBThfFl2cVdIA3Xl7cH8g=
|
||||||
|
github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVSA8=
|
||||||
|
github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc=
|
||||||
|
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
|
||||||
|
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||||
|
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
|
||||||
|
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
|
||||||
|
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||||
|
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
|
||||||
|
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
|
||||||
|
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
|
||||||
|
golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
|
||||||
|
golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM=
|
||||||
|
golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
|
||||||
|
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||||
|
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||||
|
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||||
|
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
|
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
|
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
golang.org/x/sys v0.15.0 h1:h48lPFYpsTvQJZF4EKyI4aLHaev3CxivZmv7yZig9pc=
|
||||||
|
golang.org/x/sys v0.15.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||||
|
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||||
|
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
|
||||||
|
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
|
||||||
|
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||||
|
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||||
|
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||||
|
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
|
||||||
|
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
|
||||||
|
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||||
|
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||||
|
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
|
||||||
|
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||||
|
|
@ -1,15 +1,14 @@
|
||||||
{
|
{
|
||||||
"last_updated": "2025-10-04T15:56:39.361555+00:00",
|
"last_updated": "2025-11-01T17:07:11Z",
|
||||||
"files": {
|
"files": {
|
||||||
"Artists.html": {
|
"Artists.html": {
|
||||||
"hash": "84e3e5797fabbe28261922dddcbb628fdb17135bd6978218d0c352f06e5fade9",
|
"hash": "cc693c4b529fbe15e175afb10b052fb27fc36ca1184e4e0f8a39c4f94920c435"
|
||||||
"last_archived": "2025-08-19T05:14:17.304886Z"
|
|
||||||
},
|
},
|
||||||
"artists.csv": {
|
"artists.csv": {
|
||||||
"hash": "77ecb4f1291bbb20a58c6693fe96ff00289e883549c72c2ba035bb611594d716"
|
"hash": "ac2941316ca0fab4807a0a22b9bc60154fff36383521acc93d188341d5f7263c"
|
||||||
},
|
},
|
||||||
"artists.xlsx": {
|
"artists.xlsx": {
|
||||||
"hash": "05813caf0e1f6fc991cd7e9334a3dc50faf2b5a7c878e17cf73282fc385ffbb1"
|
"hash": "6ccb6f8d9c9247b9d5fe3c2f0cc4a0a84eeb5cfa7c0d2d840abd43b129ea85b4"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
660
main.go
Normal file
660
main.go
Normal file
|
|
@ -0,0 +1,660 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"archive/zip"
|
||||||
|
"bytes"
|
||||||
|
"crypto/sha256"
|
||||||
|
"encoding/csv"
|
||||||
|
"encoding/hex"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"log"
|
||||||
|
"net/http"
|
||||||
|
"os"
|
||||||
|
"regexp"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/PuerkitoBio/goquery"
|
||||||
|
"github.com/gofiber/fiber/v2"
|
||||||
|
"github.com/gofiber/fiber/v2/middleware/cors"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
SheetURL = "https://docs.google.com/spreadsheets/d/1Z8aANbxXbnUGoZPRvJfWL3gz6jrzPPrwVt3d0c1iJ_4"
|
||||||
|
ZipURL = SheetURL + "/export?format=zip"
|
||||||
|
XlsxURL = SheetURL + "/export?format=xlsx"
|
||||||
|
|
||||||
|
ZipFilename = "Trackerhub.zip"
|
||||||
|
HTMLFilename = "Artists.html"
|
||||||
|
CSVFilename = "artists.csv"
|
||||||
|
XlsxFilename = "artists.xlsx"
|
||||||
|
|
||||||
|
UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0 Safari/537.36"
|
||||||
|
BaseURL = "https://sheets.artistgrid.cx"
|
||||||
|
|
||||||
|
UpdateIntervalSeconds = 600
|
||||||
|
InfoPath = "info/status.json"
|
||||||
|
|
||||||
|
DEV_MODE = false
|
||||||
|
)
|
||||||
|
|
||||||
|
var ExcludeNames = map[string]bool{
|
||||||
|
"🎹Worst Comps & Edits": true,
|
||||||
|
"K4$H K4$$!n0": true,
|
||||||
|
"K4HKn0": true,
|
||||||
|
"AI Models": true,
|
||||||
|
"🎹 BPM & Key Tracker": true,
|
||||||
|
"🎹Comps & Edits": true,
|
||||||
|
"🎹 Worst Comps & Edits": true,
|
||||||
|
"🎹 Yedits": true,
|
||||||
|
"Allegations": true,
|
||||||
|
"Rap Disses Timeline": true,
|
||||||
|
"Underground Artists": true,
|
||||||
|
}
|
||||||
|
|
||||||
|
var ManualCSVRows = [][]string{
|
||||||
|
{"Kanye West", "https://docs.google.com/spreadsheets/d/1oGgQrlUxxoGNAiKa_98vhjxR96pxJ3OdvVHKqpvb29w/", "p4, @kiwieater, Maker, Bobby, SamV1sion, @comptonrapper, Rose, Dr Wolf, Oreo Eater, Arco, @Free The Robots, @Alek, @Commandtechno, Snoop Dogg, Awesomefied, @rocky, @flab, Shadow, Reuben🇮🇪, @razacosmica, @Marcemaire, Solidus Jack, Marin, garfiiieeelld", "Yes", "Yes", "Yes"},
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
lastHTMLHash string
|
||||||
|
lastCSVData ArtistData
|
||||||
|
emojiRegex = regexp.MustCompile(`[\p{So}\p{Sk}\x{FE0F}\x{FE0E}\x{200D}⭐🤖🎭︎]+`)
|
||||||
|
)
|
||||||
|
|
||||||
|
type ArtistData map[string]map[string]string
|
||||||
|
|
||||||
|
type FileInfo struct {
|
||||||
|
Hash string `json:"hash"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type StatusInfo struct {
|
||||||
|
LastUpdated string `json:"last_updated"`
|
||||||
|
Files map[string]FileInfo `json:"files"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type DiscordMessage struct {
|
||||||
|
Content string `json:"content"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func cleanArtistName(text string) string {
|
||||||
|
cleaned := emojiRegex.ReplaceAllString(text, "")
|
||||||
|
cleaned = strings.TrimSpace(cleaned)
|
||||||
|
cleaned = strings.TrimPrefix(cleaned, " ")
|
||||||
|
return cleaned
|
||||||
|
}
|
||||||
|
|
||||||
|
func forceStarFlag(starred bool) string {
|
||||||
|
if starred {
|
||||||
|
return "Yes"
|
||||||
|
}
|
||||||
|
return "No"
|
||||||
|
}
|
||||||
|
|
||||||
|
func hashFile(filename string) (string, error) {
|
||||||
|
f, err := os.Open(filename)
|
||||||
|
if err != nil {
|
||||||
|
return "file_not_found", err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
hasher := sha256.New()
|
||||||
|
if _, err := io.Copy(hasher, f); err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
return hex.EncodeToString(hasher.Sum(nil)), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func downloadFile(url, filename string, timeout time.Duration) bool {
|
||||||
|
log.Printf("Downloading %s...\n", filename)
|
||||||
|
|
||||||
|
client := &http.Client{Timeout: timeout}
|
||||||
|
resp, err := client.Get(url)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("ERROR: Failed to download %s: %v\n", filename, err)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
log.Printf("ERROR: Failed to download %s: status %d\n", filename, resp.StatusCode)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
out, err := os.Create(filename)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("ERROR: Failed to create file %s: %v\n", filename, err)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
defer out.Close()
|
||||||
|
|
||||||
|
_, err = io.Copy(out, resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("ERROR: Failed to write file %s: %v\n", filename, err)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Printf("SUCCESS: Saved %s\n", filename)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func downloadZipAndExtractHTML() {
|
||||||
|
if !downloadFile(ZipURL, ZipFilename, 30*time.Second) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Printf("Extracting %s from %s...\n", HTMLFilename, ZipFilename)
|
||||||
|
|
||||||
|
r, err := zip.OpenReader(ZipFilename)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("ERROR: Failed to open zip file: %v\n", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer r.Close()
|
||||||
|
|
||||||
|
for _, f := range r.File {
|
||||||
|
if f.Name == HTMLFilename {
|
||||||
|
rc, err := f.Open()
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("ERROR: Failed to open file in zip: %v\n", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer rc.Close()
|
||||||
|
|
||||||
|
content, err := io.ReadAll(rc)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("ERROR: Failed to read file from zip: %v\n", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
err = os.WriteFile(HTMLFilename, content, 0644)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("ERROR: Failed to write extracted file: %v\n", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Printf("SUCCESS: Extracted %s\n", HTMLFilename)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Printf("ERROR: %s not found in zip archive\n", HTMLFilename)
|
||||||
|
}
|
||||||
|
|
||||||
|
func downloadXLSX() {
|
||||||
|
downloadFile(XlsxURL, XlsxFilename, 30*time.Second)
|
||||||
|
}
|
||||||
|
|
||||||
|
func quoteCSVField(field string) string {
|
||||||
|
escaped := strings.ReplaceAll(field, `"`, `""`)
|
||||||
|
return `"` + escaped + `"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeCSVRow(w io.Writer, fields []string) error {
|
||||||
|
quotedFields := make([]string, len(fields))
|
||||||
|
for i, field := range fields {
|
||||||
|
quotedFields[i] = quoteCSVField(field)
|
||||||
|
}
|
||||||
|
_, err := w.Write([]byte(strings.Join(quotedFields, ",") + "\n"))
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func generateCSV() {
|
||||||
|
log.Printf("Generating %s from %s...\n", CSVFilename, HTMLFilename)
|
||||||
|
|
||||||
|
f, err := os.Open(HTMLFilename)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("ERROR: %s not found. Cannot generate CSV.\n", HTMLFilename)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
doc, err := goquery.NewDocumentFromReader(f)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("ERROR: Failed to parse HTML: %v\n", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
tableBody := doc.Find("table.waffle tbody")
|
||||||
|
if tableBody.Length() == 0 {
|
||||||
|
log.Println("ERROR: Could not find the table body in HTML. Cannot generate CSV.")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
rows := tableBody.Find("tr")
|
||||||
|
var data [][]string
|
||||||
|
starringSection := true
|
||||||
|
existingArtists := make(map[string]bool)
|
||||||
|
|
||||||
|
rows.Each(func(i int, row *goquery.Selection) {
|
||||||
|
if i < 3 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
cells := row.Find("td")
|
||||||
|
if cells.Length() < 4 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
artistNameRaw := cells.Eq(0).Text()
|
||||||
|
artistNameRaw = strings.TrimSpace(artistNameRaw)
|
||||||
|
|
||||||
|
artistURL, _ := cells.Eq(0).Find("a").Attr("href")
|
||||||
|
|
||||||
|
if artistNameRaw == "" || artistURL == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if strings.Contains(artistNameRaw, "AI Models") {
|
||||||
|
starringSection = false
|
||||||
|
}
|
||||||
|
|
||||||
|
artistNameClean := cleanArtistName(artistNameRaw)
|
||||||
|
if ExcludeNames[artistNameClean] || strings.Contains(artistNameRaw, "🚩") {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
credit := strings.TrimSpace(cells.Eq(1).Text())
|
||||||
|
linksWork := strings.TrimSpace(cells.Eq(3).Text())
|
||||||
|
updated := strings.TrimSpace(cells.Eq(2).Text())
|
||||||
|
best := forceStarFlag(starringSection)
|
||||||
|
|
||||||
|
data = append(data, []string{
|
||||||
|
artistNameClean,
|
||||||
|
artistURL,
|
||||||
|
credit,
|
||||||
|
linksWork,
|
||||||
|
updated,
|
||||||
|
best,
|
||||||
|
})
|
||||||
|
existingArtists[artistNameClean] = true
|
||||||
|
})
|
||||||
|
|
||||||
|
for _, manualRow := range ManualCSVRows {
|
||||||
|
if len(manualRow) >= 6 {
|
||||||
|
artistName := manualRow[0]
|
||||||
|
if !existingArtists[artistName] {
|
||||||
|
data = append(data, manualRow)
|
||||||
|
existingArtists[artistName] = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sort.Slice(data, func(i, j int) bool {
|
||||||
|
bestI := data[i][5]
|
||||||
|
bestJ := data[j][5]
|
||||||
|
nameI := data[i][0]
|
||||||
|
nameJ := data[j][0]
|
||||||
|
|
||||||
|
if bestI != bestJ {
|
||||||
|
return bestI > bestJ
|
||||||
|
}
|
||||||
|
return strings.ToLower(nameI) < strings.ToLower(nameJ)
|
||||||
|
})
|
||||||
|
|
||||||
|
csvFile, err := os.Create(CSVFilename)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("ERROR: Failed to create CSV file %s: %v\n", CSVFilename, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer csvFile.Close()
|
||||||
|
|
||||||
|
header := []string{"Artist Name", "URL", "Credit", "Links Work", "Updated", "Best"}
|
||||||
|
if err := writeCSVRow(csvFile, header); err != nil {
|
||||||
|
log.Printf("ERROR: Failed to write CSV header: %v\n", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, record := range data {
|
||||||
|
if err := writeCSVRow(csvFile, record); err != nil {
|
||||||
|
log.Printf("ERROR: Failed to write CSV row: %v\n", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Printf("SUCCESS: Generated %s with %d rows.\n", CSVFilename, len(data))
|
||||||
|
}
|
||||||
|
|
||||||
|
func readCSVToDict(filename string) ArtistData {
|
||||||
|
data := make(ArtistData)
|
||||||
|
|
||||||
|
f, err := os.Open(filename)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("WARNING: CSV file not found: %s\n", filename)
|
||||||
|
return data
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
reader := csv.NewReader(f)
|
||||||
|
records, err := reader.ReadAll()
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("ERROR: Error reading CSV file %s: %v\n", filename, err)
|
||||||
|
return data
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(records) == 0 {
|
||||||
|
return data
|
||||||
|
}
|
||||||
|
|
||||||
|
headers := records[0]
|
||||||
|
for _, record := range records[1:] {
|
||||||
|
if len(record) < len(headers) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
row := make(map[string]string)
|
||||||
|
for i, header := range headers {
|
||||||
|
row[header] = record[i]
|
||||||
|
}
|
||||||
|
|
||||||
|
if artistName, ok := row["Artist Name"]; ok && artistName != "" {
|
||||||
|
data[artistName] = row
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return data
|
||||||
|
}
|
||||||
|
|
||||||
|
func detectChanges(oldData, newData ArtistData) []string {
|
||||||
|
var changes []string
|
||||||
|
|
||||||
|
oldKeys := make(map[string]bool)
|
||||||
|
newKeys := make(map[string]bool)
|
||||||
|
|
||||||
|
for k := range oldData {
|
||||||
|
oldKeys[k] = true
|
||||||
|
}
|
||||||
|
for k := range newData {
|
||||||
|
newKeys[k] = true
|
||||||
|
}
|
||||||
|
|
||||||
|
var removed []string
|
||||||
|
for k := range oldKeys {
|
||||||
|
if !newKeys[k] {
|
||||||
|
removed = append(removed, k)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sort.Strings(removed)
|
||||||
|
|
||||||
|
var added []string
|
||||||
|
for k := range newKeys {
|
||||||
|
if !oldKeys[k] {
|
||||||
|
added = append(added, k)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sort.Strings(added)
|
||||||
|
|
||||||
|
var common []string
|
||||||
|
for k := range oldKeys {
|
||||||
|
if newKeys[k] {
|
||||||
|
common = append(common, k)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sort.Strings(common)
|
||||||
|
|
||||||
|
for _, artist := range removed {
|
||||||
|
changes = append(changes, "REMOVED: **"+artist+"**")
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, artist := range added {
|
||||||
|
changes = append(changes, "ADDED: **"+artist+"**")
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, artist := range common {
|
||||||
|
oldRow := oldData[artist]
|
||||||
|
newRow := newData[artist]
|
||||||
|
|
||||||
|
if oldRow["URL"] != newRow["URL"] {
|
||||||
|
changes = append(changes, "LINK CHANGED: **"+artist+"**")
|
||||||
|
}
|
||||||
|
if oldRow["Credit"] != newRow["Credit"] {
|
||||||
|
changes = append(changes, "CREDIT CHANGED: **"+artist+"**")
|
||||||
|
}
|
||||||
|
if oldRow["Links Work"] != newRow["Links Work"] {
|
||||||
|
changes = append(changes, "LINKS WORK STATUS CHANGED: **"+artist+"**")
|
||||||
|
}
|
||||||
|
if oldRow["Updated"] != newRow["Updated"] {
|
||||||
|
changes = append(changes, "UPDATED DATE CHANGED: **"+artist+"**")
|
||||||
|
}
|
||||||
|
if oldRow["Best"] != newRow["Best"] {
|
||||||
|
changes = append(changes, "BEST FLAG CHANGED: **"+artist+"**")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return changes
|
||||||
|
}
|
||||||
|
|
||||||
|
func sendDiscordMessage(content string) {
|
||||||
|
webhookURL := os.Getenv("DISCORD_WEBHOOK_URL")
|
||||||
|
if webhookURL == "" {
|
||||||
|
log.Println("WARNING: Discord webhook URL not set. Skipping notification.")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(content) > 2000 {
|
||||||
|
content = content[:1990] + "\n... (truncated)"
|
||||||
|
}
|
||||||
|
|
||||||
|
message := DiscordMessage{Content: content}
|
||||||
|
jsonData, err := json.Marshal(message)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("WARNING: Failed to marshal Discord message: %v\n", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := http.Post(webhookURL, "application/json", bytes.NewBuffer(jsonData))
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("WARNING: Exception sending Discord notification: %v\n", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode >= 200 && resp.StatusCode < 300 {
|
||||||
|
log.Println("SUCCESS: Discord notification sent successfully.")
|
||||||
|
} else {
|
||||||
|
log.Printf("WARNING: Discord notification failed with status: %d\n", resp.StatusCode)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeInfo(htmlHash, csvHash, xlsxHash string) {
|
||||||
|
os.MkdirAll("info", 0755)
|
||||||
|
nowISO := time.Now().UTC().Format(time.RFC3339)
|
||||||
|
|
||||||
|
var info StatusInfo
|
||||||
|
|
||||||
|
data, err := os.ReadFile(InfoPath)
|
||||||
|
if err == nil {
|
||||||
|
json.Unmarshal(data, &info)
|
||||||
|
}
|
||||||
|
|
||||||
|
if info.Files == nil {
|
||||||
|
info.Files = make(map[string]FileInfo)
|
||||||
|
}
|
||||||
|
|
||||||
|
info.LastUpdated = nowISO
|
||||||
|
info.Files[HTMLFilename] = FileInfo{Hash: htmlHash}
|
||||||
|
info.Files[CSVFilename] = FileInfo{Hash: csvHash}
|
||||||
|
info.Files[XlsxFilename] = FileInfo{Hash: xlsxHash}
|
||||||
|
|
||||||
|
jsonData, err := json.MarshalIndent(info, "", " ")
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("WARNING: Failed to marshal status info: %v\n", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
os.WriteFile(InfoPath, jsonData, 0644)
|
||||||
|
}
|
||||||
|
|
||||||
|
func runDevTests() {
|
||||||
|
log.Println("=== DEVELOPMENT MODE - Running Tests ===")
|
||||||
|
|
||||||
|
log.Println("\nTesting Discord Webhook...")
|
||||||
|
testMessage := fmt.Sprintf("**Development Mode Test**\nTimestamp: %s\nWebhook is working correctly!", time.Now().Format(time.RFC3339))
|
||||||
|
sendDiscordMessage(testMessage)
|
||||||
|
|
||||||
|
log.Println("\nDevelopment tests completed!")
|
||||||
|
log.Println("=========================================\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
func updateLoop() {
|
||||||
|
for {
|
||||||
|
log.Println("--- Starting update cycle ---")
|
||||||
|
|
||||||
|
downloadZipAndExtractHTML()
|
||||||
|
downloadXLSX()
|
||||||
|
generateCSV()
|
||||||
|
|
||||||
|
files := []string{HTMLFilename, CSVFilename, XlsxFilename}
|
||||||
|
allExist := true
|
||||||
|
for _, f := range files {
|
||||||
|
if _, err := os.Stat(f); os.IsNotExist(err) {
|
||||||
|
allExist = false
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !allExist {
|
||||||
|
log.Println("WARNING: One or more files are missing after download/parse. Skipping this cycle.")
|
||||||
|
time.Sleep(UpdateIntervalSeconds * time.Second)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
htmlHash, _ := hashFile(HTMLFilename)
|
||||||
|
csvHash, _ := hashFile(CSVFilename)
|
||||||
|
xlsxHash, _ := hashFile(XlsxFilename)
|
||||||
|
currentCSVData := readCSVToDict(CSVFilename)
|
||||||
|
|
||||||
|
if lastHTMLHash == "" {
|
||||||
|
log.Println("INFO: First run: storing initial file hashes.")
|
||||||
|
} else if htmlHash != lastHTMLHash {
|
||||||
|
log.Println("ALERT: Artists.html has changed! Checking for data differences.")
|
||||||
|
changes := detectChanges(lastCSVData, currentCSVData)
|
||||||
|
if len(changes) > 0 {
|
||||||
|
message := "**Tracker Update Detected:**\n" + strings.Join(changes, "\n")
|
||||||
|
sendDiscordMessage(message)
|
||||||
|
} else {
|
||||||
|
log.Println("INFO: HTML hash changed, but no data differences found.")
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
log.Println("INFO: Artists.html is unchanged.")
|
||||||
|
}
|
||||||
|
|
||||||
|
writeInfo(htmlHash, csvHash, xlsxHash)
|
||||||
|
lastHTMLHash = htmlHash
|
||||||
|
lastCSVData = currentCSVData
|
||||||
|
|
||||||
|
log.Println("--- Update cycle finished ---")
|
||||||
|
log.Printf("Sleeping for %d seconds...\n", UpdateIntervalSeconds)
|
||||||
|
time.Sleep(UpdateIntervalSeconds * time.Second)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func getStatusData() (*StatusInfo, error) {
|
||||||
|
data, err := os.ReadFile(InfoPath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var status StatusInfo
|
||||||
|
err = json.Unmarshal(data, &status)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return &status, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
log.SetFlags(log.LstdFlags | log.Lshortfile)
|
||||||
|
|
||||||
|
if DEV_MODE {
|
||||||
|
runDevTests()
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Println("Starting background update goroutine...")
|
||||||
|
go updateLoop()
|
||||||
|
|
||||||
|
app := fiber.New()
|
||||||
|
app.Use(cors.New())
|
||||||
|
|
||||||
|
app.Get("/", func(c *fiber.Ctx) error {
|
||||||
|
return c.SendFile("templates/index.html")
|
||||||
|
})
|
||||||
|
|
||||||
|
app.Get("/artists.html", func(c *fiber.Ctx) error {
|
||||||
|
return c.SendFile(HTMLFilename)
|
||||||
|
})
|
||||||
|
|
||||||
|
app.Get("/artists.csv", func(c *fiber.Ctx) error {
|
||||||
|
return c.SendFile(CSVFilename)
|
||||||
|
})
|
||||||
|
|
||||||
|
app.Get("/artists.xlsx", func(c *fiber.Ctx) error {
|
||||||
|
return c.SendFile(XlsxFilename)
|
||||||
|
})
|
||||||
|
|
||||||
|
app.Static("/_next", "templates/_next")
|
||||||
|
|
||||||
|
app.Get("/info", func(c *fiber.Ctx) error {
|
||||||
|
data, err := getStatusData()
|
||||||
|
if err != nil {
|
||||||
|
return c.Status(404).JSON(fiber.Map{"error": "Info not available"})
|
||||||
|
}
|
||||||
|
return c.JSON(data)
|
||||||
|
})
|
||||||
|
|
||||||
|
app.Get("/info/html", func(c *fiber.Ctx) error {
|
||||||
|
data, err := getStatusData()
|
||||||
|
if err != nil {
|
||||||
|
c.Set("Content-Type", "text/html")
|
||||||
|
return c.Status(404).SendString("<p>Status info not available.</p>")
|
||||||
|
}
|
||||||
|
|
||||||
|
htmlInfo := data.Files[HTMLFilename]
|
||||||
|
csvInfo := data.Files[CSVFilename]
|
||||||
|
xlsxInfo := data.Files[XlsxFilename]
|
||||||
|
|
||||||
|
html := fmt.Sprintf(`
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<title>File Info</title>
|
||||||
|
<style>body { font-family: sans-serif; } li { margin-bottom: 1em; }</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>Latest File Info</h1>
|
||||||
|
<p><strong>Last Updated:</strong> %s</p>
|
||||||
|
<ul>
|
||||||
|
<li><strong>%s</strong><br>
|
||||||
|
Hash: %s
|
||||||
|
</li>
|
||||||
|
<li><strong>%s</strong><br>
|
||||||
|
Hash: %s
|
||||||
|
</li>
|
||||||
|
<li><strong>%s</strong><br>
|
||||||
|
Hash: %s
|
||||||
|
</li>
|
||||||
|
</ul>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
`, data.LastUpdated,
|
||||||
|
HTMLFilename, htmlInfo.Hash,
|
||||||
|
CSVFilename, csvInfo.Hash,
|
||||||
|
XlsxFilename, xlsxInfo.Hash)
|
||||||
|
|
||||||
|
c.Set("Content-Type", "text/html")
|
||||||
|
return c.SendString(html)
|
||||||
|
})
|
||||||
|
|
||||||
|
app.Use(func(c *fiber.Ctx) error {
|
||||||
|
return c.Status(404).SendFile("templates/404.html")
|
||||||
|
})
|
||||||
|
|
||||||
|
log.Println("Starting Fiber server on :5000...")
|
||||||
|
log.Fatal(app.Listen(":5000"))
|
||||||
|
}
|
||||||
115
main.py
115
main.py
|
|
@ -1,115 +0,0 @@
|
||||||
# main.py
|
|
||||||
import json
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
import threading
|
|
||||||
|
|
||||||
from flask import Flask, jsonify, send_file, send_from_directory
|
|
||||||
from flask_cors import CORS
|
|
||||||
|
|
||||||
from config import CSV_FILENAME, HTML_FILENAME, XLSX_FILENAME
|
|
||||||
from update_loop import update_loop
|
|
||||||
|
|
||||||
logging.basicConfig(
|
|
||||||
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
|
||||||
)
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
app = Flask(__name__)
|
|
||||||
CORS(app)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/")
|
|
||||||
def serve_index():
|
|
||||||
return send_file("templates/index.html")
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/artists.html")
|
|
||||||
def serve_artists_html():
|
|
||||||
return send_file(HTML_FILENAME)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/artists.csv")
|
|
||||||
def serve_artists_csv():
|
|
||||||
return send_file(CSV_FILENAME)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/artists.xlsx")
|
|
||||||
def serve_artists_xlsx():
|
|
||||||
return send_file(XLSX_FILENAME)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/_next/<path:filename>")
|
|
||||||
def serve_next_static(filename):
|
|
||||||
return send_from_directory("templates/_next", filename)
|
|
||||||
|
|
||||||
|
|
||||||
def get_status_data():
|
|
||||||
info_path = os.path.join("info", "status.json")
|
|
||||||
if not os.path.exists(info_path):
|
|
||||||
return None
|
|
||||||
try:
|
|
||||||
with open(info_path, "r") as f:
|
|
||||||
return json.load(f)
|
|
||||||
except (IOError, json.JSONDecodeError) as e:
|
|
||||||
logger.error(f"Failed to read or parse status.json: {e}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/info")
|
|
||||||
def info_json():
|
|
||||||
data = get_status_data()
|
|
||||||
if data:
|
|
||||||
return jsonify(data)
|
|
||||||
return jsonify({"error": "Info not available"}), 404
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/info/html")
|
|
||||||
def info_html():
|
|
||||||
data = get_status_data()
|
|
||||||
if not data:
|
|
||||||
return "<p>Status info not available.</p>", 404
|
|
||||||
|
|
||||||
files_info = data.get("files", {})
|
|
||||||
html_info = files_info.get(HTML_FILENAME, {})
|
|
||||||
csv_info = files_info.get(CSV_FILENAME, {})
|
|
||||||
xlsx_info = files_info.get(XLSX_FILENAME, {})
|
|
||||||
|
|
||||||
return f"""
|
|
||||||
<!DOCTYPE html>
|
|
||||||
<html lang="en">
|
|
||||||
<head>
|
|
||||||
<meta charset="UTF-8">
|
|
||||||
<title>File Info</title>
|
|
||||||
<style>body {{ font-family: sans-serif; }} li {{ margin-bottom: 1em; }}</style>
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
<h1>Latest File Info</h1>
|
|
||||||
<p><strong>Last Updated:</strong> {data.get('last_updated', 'N/A')}</p>
|
|
||||||
<ul>
|
|
||||||
<li><strong>{HTML_FILENAME}</strong><br>
|
|
||||||
Hash: {html_info.get('hash', 'N/A')}<br>
|
|
||||||
Archived: {html_info.get('last_archived', 'N/A')}
|
|
||||||
</li>
|
|
||||||
<li><strong>{CSV_FILENAME}</strong><br>
|
|
||||||
Hash: {csv_info.get('hash', 'N/A')}
|
|
||||||
</li>
|
|
||||||
<li><strong>{XLSX_FILENAME}</strong><br>
|
|
||||||
Hash: {xlsx_info.get('hash', 'N/A')}
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
@app.errorhandler(404)
|
|
||||||
def page_not_found(e):
|
|
||||||
return send_file("templates/404.html"), 404
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
logger.info("Starting background update thread...")
|
|
||||||
threading.Thread(target=update_loop, daemon=True).start()
|
|
||||||
logger.info("Starting Flask server...")
|
|
||||||
app.run(host="0.0.0.0", port=5000)
|
|
||||||
30
notify.py
30
notify.py
|
|
@ -1,30 +0,0 @@
|
||||||
# notify.py
|
|
||||||
import json
|
|
||||||
import logging
|
|
||||||
|
|
||||||
import requests
|
|
||||||
|
|
||||||
from config import DISCORD_WEBHOOK_URL
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def send_discord_message(content: str):
|
|
||||||
if not DISCORD_WEBHOOK_URL:
|
|
||||||
logger.warning("Discord webhook URL not set. Skipping notification.")
|
|
||||||
return
|
|
||||||
|
|
||||||
if len(content) > 2000:
|
|
||||||
content = content[:1990] + "\n... (truncated)"
|
|
||||||
|
|
||||||
headers = {"Content-Type": "application/json"}
|
|
||||||
data = {"content": content}
|
|
||||||
|
|
||||||
try:
|
|
||||||
response = requests.post(
|
|
||||||
DISCORD_WEBHOOK_URL, headers=headers, data=json.dumps(data), timeout=10
|
|
||||||
)
|
|
||||||
response.raise_for_status()
|
|
||||||
logger.info("✅ Discord notification sent successfully.")
|
|
||||||
except requests.RequestException as e:
|
|
||||||
logger.error(f"⚠️ Exception sending Discord notification: {e}")
|
|
||||||
70
parser.py
70
parser.py
|
|
@ -1,70 +0,0 @@
|
||||||
# parser.py
|
|
||||||
import csv
|
|
||||||
import logging
|
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
|
|
||||||
from config import CSV_FILENAME, HTML_FILENAME, exclude_names
|
|
||||||
from utils import clean_artist_name, force_star_flag
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def generate_csv():
|
|
||||||
logger.info(f"📝 Generating {CSV_FILENAME} from {HTML_FILENAME}...")
|
|
||||||
try:
|
|
||||||
with open(HTML_FILENAME, "r", encoding="utf-8") as f:
|
|
||||||
soup = BeautifulSoup(f, "html.parser")
|
|
||||||
except FileNotFoundError:
|
|
||||||
logger.error(f"❌ {HTML_FILENAME} not found. Cannot generate CSV.")
|
|
||||||
return
|
|
||||||
|
|
||||||
table_body = soup.select_one("table.waffle tbody")
|
|
||||||
if not table_body:
|
|
||||||
logger.error("❌ Could not find the table body in HTML. Cannot generate CSV.")
|
|
||||||
return
|
|
||||||
|
|
||||||
rows = table_body.select("tr")
|
|
||||||
data = []
|
|
||||||
starring_section = True
|
|
||||||
|
|
||||||
for row in rows[3:]:
|
|
||||||
cells = row.find_all("td")
|
|
||||||
if len(cells) < 4:
|
|
||||||
continue
|
|
||||||
|
|
||||||
artist_name_raw = cells[0].get_text(strip=True)
|
|
||||||
link_tag = cells[0].find("a")
|
|
||||||
artist_url = link_tag.get("href") if link_tag else ""
|
|
||||||
|
|
||||||
if not artist_name_raw or not artist_url:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if "AI Models" in artist_name_raw:
|
|
||||||
starring_section = False
|
|
||||||
|
|
||||||
artist_name_clean = clean_artist_name(artist_name_raw)
|
|
||||||
if artist_name_clean in exclude_names or "🚩" in artist_name_raw:
|
|
||||||
continue
|
|
||||||
|
|
||||||
data.append(
|
|
||||||
[
|
|
||||||
artist_name_clean,
|
|
||||||
artist_url,
|
|
||||||
cells[1].get_text(strip=True),
|
|
||||||
cells[3].get_text(strip=True),
|
|
||||||
cells[2].get_text(strip=True),
|
|
||||||
force_star_flag(starring_section),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
|
||||||
with open(CSV_FILENAME, "w", newline="", encoding="utf-8") as csvfile:
|
|
||||||
writer = csv.writer(csvfile, quoting=csv.QUOTE_ALL)
|
|
||||||
writer.writerow(
|
|
||||||
["Artist Name", "URL", "Credit", "Links Work", "Updated", "Best"]
|
|
||||||
)
|
|
||||||
writer.writerows(data)
|
|
||||||
logger.info(f"✅ Generated {CSV_FILENAME} with {len(data)} rows.")
|
|
||||||
except IOError as e:
|
|
||||||
logger.error(f"❌ Failed to write CSV file {CSV_FILENAME}: {e}")
|
|
||||||
|
|
@ -1,6 +0,0 @@
|
||||||
Flask
|
|
||||||
requests
|
|
||||||
beautifulsoup4
|
|
||||||
lxml
|
|
||||||
flask-cors
|
|
||||||
waybacKpy
|
|
||||||
|
|
@ -1,97 +0,0 @@
|
||||||
# update_loop.py
|
|
||||||
import json
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
import time
|
|
||||||
from datetime import datetime, timezone
|
|
||||||
|
|
||||||
from archive import archive_all_urls
|
|
||||||
from config import CSV_FILENAME, HTML_FILENAME, XLSX_FILENAME
|
|
||||||
from diff import detect_changes, read_csv_to_dict
|
|
||||||
from downloader import download_xlsx, download_zip_and_extract_html
|
|
||||||
from notify import send_discord_message
|
|
||||||
from parser import generate_csv
|
|
||||||
from utils import hash_file
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
last_html_hash = None
|
|
||||||
last_csv_data = {}
|
|
||||||
INFO_PATH = os.path.join("info", "status.json")
|
|
||||||
UPDATE_INTERVAL_SECONDS = 600
|
|
||||||
|
|
||||||
|
|
||||||
def write_info(html_hash: str, csv_hash: str, xlsx_hash: str, is_archived: bool):
|
|
||||||
os.makedirs("info", exist_ok=True)
|
|
||||||
now_iso = datetime.now(timezone.utc).isoformat()
|
|
||||||
|
|
||||||
try:
|
|
||||||
with open(INFO_PATH, "r") as f:
|
|
||||||
info = json.load(f)
|
|
||||||
except (FileNotFoundError, json.JSONDecodeError):
|
|
||||||
info = {"files": {HTML_FILENAME: {}}}
|
|
||||||
|
|
||||||
info["last_updated"] = now_iso
|
|
||||||
info["files"][HTML_FILENAME]["hash"] = html_hash
|
|
||||||
if is_archived:
|
|
||||||
info["files"][HTML_FILENAME]["last_archived"] = now_iso
|
|
||||||
|
|
||||||
info["files"][CSV_FILENAME] = {"hash": csv_hash}
|
|
||||||
info["files"][XLSX_FILENAME] = {"hash": xlsx_hash}
|
|
||||||
|
|
||||||
with open(INFO_PATH, "w") as f:
|
|
||||||
json.dump(info, f, indent=2)
|
|
||||||
|
|
||||||
|
|
||||||
def update_loop():
|
|
||||||
global last_html_hash, last_csv_data
|
|
||||||
|
|
||||||
while True:
|
|
||||||
logger.info("--- Starting update cycle ---")
|
|
||||||
try:
|
|
||||||
download_zip_and_extract_html()
|
|
||||||
download_xlsx()
|
|
||||||
generate_csv()
|
|
||||||
|
|
||||||
if not all(
|
|
||||||
os.path.exists(f) for f in [HTML_FILENAME, CSV_FILENAME, XLSX_FILENAME]
|
|
||||||
):
|
|
||||||
logger.warning(
|
|
||||||
"One or more files are missing after download/parse. Skipping this cycle."
|
|
||||||
)
|
|
||||||
time.sleep(UPDATE_INTERVAL_SECONDS)
|
|
||||||
continue
|
|
||||||
|
|
||||||
html_hash = hash_file(HTML_FILENAME)
|
|
||||||
csv_hash = hash_file(CSV_FILENAME)
|
|
||||||
xlsx_hash = hash_file(XLSX_FILENAME)
|
|
||||||
current_csv_data = read_csv_to_dict(CSV_FILENAME)
|
|
||||||
|
|
||||||
archived_this_cycle = False
|
|
||||||
if last_html_hash is None:
|
|
||||||
logger.info("First run: storing initial file hashes.")
|
|
||||||
elif html_hash != last_html_hash:
|
|
||||||
logger.info("🔔 Artists.html has changed! Checking for data differences.")
|
|
||||||
changes = detect_changes(last_csv_data, current_csv_data)
|
|
||||||
if changes:
|
|
||||||
message = "**Tracker Update Detected:**\n" + "\n".join(changes)
|
|
||||||
send_discord_message(message)
|
|
||||||
archive_all_urls()
|
|
||||||
archived_this_cycle = True
|
|
||||||
else:
|
|
||||||
logger.info("ℹ️ HTML hash changed, but no data differences found.")
|
|
||||||
else:
|
|
||||||
logger.info("ℹ️ Artists.html is unchanged.")
|
|
||||||
|
|
||||||
write_info(html_hash, csv_hash, xlsx_hash, is_archived=archived_this_cycle)
|
|
||||||
last_html_hash = html_hash
|
|
||||||
last_csv_data = current_csv_data
|
|
||||||
logger.info("--- Update cycle finished ---")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.critical(
|
|
||||||
f"An unexpected error occurred in the update loop: {e}", exc_info=True
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.info(f"Sleeping for {UPDATE_INTERVAL_SECONDS} seconds...")
|
|
||||||
time.sleep(UPDATE_INTERVAL_SECONDS)
|
|
||||||
22
utils.py
22
utils.py
|
|
@ -1,22 +0,0 @@
|
||||||
# utils.py
|
|
||||||
import hashlib
|
|
||||||
import re
|
|
||||||
|
|
||||||
|
|
||||||
def clean_artist_name(text: str) -> str:
|
|
||||||
return re.sub(r"[⭐🤖🎭\u2B50\uFE0F]", "", text).strip()
|
|
||||||
|
|
||||||
|
|
||||||
def force_star_flag(starred: bool = True) -> str:
|
|
||||||
return "Yes" if starred else "No"
|
|
||||||
|
|
||||||
|
|
||||||
def hash_file(filename: str, block_size: int = 65536) -> str:
|
|
||||||
hasher = hashlib.sha256()
|
|
||||||
try:
|
|
||||||
with open(filename, "rb") as f:
|
|
||||||
for block in iter(lambda: f.read(block_size), b""):
|
|
||||||
hasher.update(block)
|
|
||||||
except FileNotFoundError:
|
|
||||||
return "file_not_found"
|
|
||||||
return hasher.hexdigest()
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue