This commit is contained in:
Eduard Prigoana 2025-07-20 18:23:49 +03:00
parent fb499a72c4
commit 4b708ba090
13 changed files with 272 additions and 141 deletions

394
main.py
View file

@ -1,168 +1,288 @@
import os
import threading
import time
import requests
import zipfile
import threading
import time
import random
import hashlib
from bs4 import BeautifulSoup
import csv
import re
from bs4 import BeautifulSoup
from flask import Flask, send_file, render_template, send_from_directory
from flask_cors import CORS
from flask import Flask, send_file
import os
import json
app = Flask(__name__, template_folder="templates")
CORS(app) # ✅ ENABLE CORS FOR ALL ROUTES
app = Flask(__name__)
# Constants
ZIP_URL = "https://docs.google.com/spreadsheets/d/1S6WwM05O277npQbaiNk-jZlXK3TdooSyWtqaWUvAI78/export?format=zip"
XLSX_URL = "https://docs.google.com/spreadsheets/d/1S6WwM05O277npQbaiNk-jZlXK3TdooSyWtqaWUvAI78/export?format=xlsx"
ZIP_FILE = "Trackerhub Reup.zip"
XLSX_FILE = "artists.xlsx"
EXTRACT_FOLDER = "sheet"
HTML_FILE = os.path.join(EXTRACT_FOLDER, "Artists.html")
CSV_FILE = "artists.csv"
# Artist names to exclude (no emojis, trimmed)
EXCLUDE_ARTISTS = {
"🤖 AI Models",
"🤖 Lawson",
"Comps & Edits",
ZIP_FILENAME = "Trackerhub.zip"
HTML_FILENAME = "Artists.html"
CSV_FILENAME = "artists.csv"
XLSX_FILENAME = "artists.xlsx"
exclude_names = {
"AI Models",
"Lawson",
"BPM Tracker",
"Worst Comps & Edits",
"Yedits",
"Allegations",
"Rap Disses Timeline",
"Underground Artists"
"Underground Artists",
}
def remove_emojis(text):
emoji_pattern = re.compile(
r'\s*['
'\U0001F600-\U0001F64F'
'\U0001F300-\U0001F5FF'
'\U0001F680-\U0001F6FF'
'\U0001F1E0-\U0001F1FF'
'\u2702-\u27B0'
'\u24C2-\U0001F251'
']\s*',
flags=re.UNICODE
)
cleaned_text = emoji_pattern.sub('', text)
return cleaned_text.strip()
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0 Safari/537.36"
def fetch_and_process():
# URLs to archive on changes — update these to your actual hosted domain
BASE_URL = "http://localhost:5000" # Change this to your public domain when deployed
ARCHIVE_URLS = [
f"{BASE_URL}/",
f"{BASE_URL}/index.html/",
f"{BASE_URL}/artists.html",
f"{BASE_URL}/artists.csv",
f"{BASE_URL}/artists.xlsx",
]
DISCORD_WEBHOOK_URL = os.getenv("DISCORD_WEBHOOK_URL")
def clean_artist_name(text):
return re.sub(r'[⭐🤖🎭\u2B50\uFE0F]', '', text).strip()
def force_star_flag(starred=True):
return "Yes" if starred else "No"
def download_zip_and_extract_html():
print("🔄 Downloading ZIP...")
r = requests.get(ZIP_URL)
r.raise_for_status()
with open(ZIP_FILENAME, "wb") as f:
f.write(r.content)
print(f"✅ Saved ZIP as {ZIP_FILENAME}")
with zipfile.ZipFile(ZIP_FILENAME, "r") as z:
with z.open(HTML_FILENAME) as html_file:
html_content = html_file.read()
with open(HTML_FILENAME, "wb") as f:
f.write(html_content)
print(f"✅ Extracted {HTML_FILENAME}")
def download_xlsx():
print("🔄 Downloading XLSX...")
r = requests.get(XLSX_URL)
r.raise_for_status()
with open(XLSX_FILENAME, "wb") as f:
f.write(r.content)
print(f"✅ Saved XLSX as {XLSX_FILENAME}")
def generate_csv():
print("📝 Generating CSV...")
with open(HTML_FILENAME, "r", encoding="utf-8") as f:
soup = BeautifulSoup(f, "html.parser")
rows = soup.select("table.waffle tbody tr")[3:] # skip headers and Discord
data = []
starring = True
for row in rows:
cells = row.find_all("td")
if len(cells) < 4:
continue
link_tag = cells[0].find("a")
artist_name_raw = link_tag.get_text(strip=True) if link_tag else cells[0].get_text(strip=True)
artist_url = link_tag["href"] if link_tag else ""
if not artist_url:
continue
if "AI Models" in artist_name_raw:
starring = False
artist_name_clean = clean_artist_name(artist_name_raw)
if artist_name_clean in exclude_names:
continue
if "🚩" in artist_name_raw:
continue
best = force_star_flag(starring)
credit = cells[1].get_text(strip=True)
updated = cells[2].get_text(strip=True)
links_work = cells[3].get_text(strip=True)
data.append([artist_name_clean, artist_url, credit, links_work, updated, best])
with open(CSV_FILENAME, "w", newline='', encoding="utf-8") as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["Artist Name", "URL", "Credit", "Links Work", "Updated", "Best"])
writer.writerows(data)
print(f"✅ CSV saved as {CSV_FILENAME}")
def hash_file(filename):
hasher = hashlib.sha256()
with open(filename, "rb") as f:
buf = f.read()
hasher.update(buf)
return hasher.hexdigest()
def archive_url(url):
print(f"🌐 Archiving {url} ...")
headers = {"User-Agent": USER_AGENT}
try:
print("[*] Downloading ZIP...")
r = requests.get(ZIP_URL)
with open(ZIP_FILE, "wb") as f:
f.write(r.content)
print("[*] Extracting ZIP...")
with zipfile.ZipFile(ZIP_FILE, 'r') as zip_ref:
zip_ref.extractall(EXTRACT_FOLDER)
print("[*] Downloading XLSX...")
r = requests.get(XLSX_URL)
with open(XLSX_FILE, "wb") as f:
f.write(r.content)
print("[*] Parsing HTML...")
with open(HTML_FILE, "r", encoding="utf-8") as f:
soup = BeautifulSoup(f, "html.parser")
table = soup.find("table", class_="waffle")
if not table:
print("[!] Table not found.")
return
rows = table.find_all("tr")[4:]
data = []
for row in rows:
cols = row.find_all("td")
if len(cols) < 4:
continue
artist_cell = cols[0]
a_tag = artist_cell.find("a")
artist_name_raw = a_tag.text.strip() if a_tag else artist_cell.text.strip()
artist_name_clean = remove_emojis(artist_name_raw.replace('"', '')).strip()
if artist_name_clean in EXCLUDE_ARTISTS:
continue
artist_url = a_tag['href'] if a_tag and a_tag.has_attr('href') else ""
credits = cols[1].get_text(strip=True)
updated = cols[2].get_text(strip=True)
links_work = cols[3].get_text(strip=True)
cleaned_row = [
artist_name_clean,
remove_emojis(artist_url.replace('"', '')),
remove_emojis(credits.replace('"', '')),
remove_emojis(updated.replace('"', '')),
remove_emojis(links_work.replace('"', ''))
]
if all(cell for cell in cleaned_row):
data.append(cleaned_row)
data.sort(key=lambda row: row[0].lower())
print(f"[*] Writing {len(data)} rows to CSV...")
with open(CSV_FILE, "w", newline="", encoding="utf-8") as f:
writer = csv.writer(f)
writer.writerow(["artist name", "URL", "credits", "updated", "links work"])
writer.writerows(data)
print("[✓] Done! CSV and XLSX updated.")
resp = requests.get(f"https://web.archive.org/save/{url}", headers=headers, timeout=30)
if resp.status_code == 200:
print(f"✅ Archived {url}")
else:
print(f"⚠️ Failed to archive {url}, status code {resp.status_code}")
except Exception as e:
print(f"[!] Error: {e}")
print(f"⚠️ Exception archiving {url}: {e}")
def archive_all_urls():
for url in ARCHIVE_URLS:
delay = 10 + random.uniform(-3, 3)
time.sleep(delay)
archive_url(url)
def read_csv_to_dict(filename):
"""Read CSV into dict with artist_name as key, storing relevant fields."""
d = {}
with open(filename, newline='', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
d[row["Artist Name"]] = row
return d
def detect_changes(old_data, new_data):
"""
Compare old and new data dictionaries.
Returns a list of strings describing changes.
"""
changes = []
old_keys = set(old_data.keys())
new_keys = set(new_data.keys())
removed = old_keys - new_keys
added = new_keys - old_keys
common = old_keys & new_keys
for artist in removed:
changes.append(f"❌ Removed: **{artist}**")
for artist in added:
changes.append(f" Added: **{artist}**")
for artist in common:
old_row = old_data[artist]
new_row = new_data[artist]
# Check if URL changed
if old_row["URL"] != new_row["URL"]:
changes.append(f"🔗 Link changed for **{artist}**")
# Check other fields if needed (Credit, Updated, etc.)
if old_row["Credit"] != new_row["Credit"]:
changes.append(f"✏️ Credit changed for **{artist}**")
if old_row["Links Work"] != new_row["Links Work"]:
changes.append(f"🔄 Links Work status changed for **{artist}**")
if old_row["Updated"] != new_row["Updated"]:
changes.append(f"🕒 Updated date changed for **{artist}**")
if old_row["Best"] != new_row["Best"]:
changes.append(f"⭐ Best flag changed for **{artist}**")
return changes
def send_discord_message(content):
if not DISCORD_WEBHOOK_URL:
print("⚠️ Discord webhook URL not set in env")
return
headers = {"Content-Type": "application/json"}
data = {"content": content}
try:
resp = requests.post(DISCORD_WEBHOOK_URL, headers=headers, data=json.dumps(data), timeout=10)
if resp.status_code in (200, 204):
print("✅ Discord notification sent")
else:
print(f"⚠️ Failed to send Discord notification, status code {resp.status_code}")
except Exception as e:
print(f"⚠️ Exception sending Discord notification: {e}")
def update_loop():
last_csv_hash = None
last_csv_data = {}
def background_updater():
while True:
fetch_and_process()
time.sleep(600)
try:
download_zip_and_extract_html()
download_xlsx()
generate_csv()
# Routes
@app.route("/")
@app.route("/index")
@app.route("/index.html")
def index():
return render_template("index.html")
current_hash = hash_file(CSV_FILENAME)
current_data = read_csv_to_dict(CSV_FILENAME)
@app.route('/favicon.png')
def serve_favicon():
return send_from_directory(app.template_folder, 'favicon.png')
if last_csv_hash is None:
print(" Initial CSV hash stored.")
elif current_hash != last_csv_hash:
print("🔔 CSV has changed! Archiving URLs...")
@app.route("/artists.csv")
def serve_csv():
if os.path.exists(CSV_FILE):
return send_file(CSV_FILE, mimetype="text/csv", as_attachment=False)
return "CSV not ready yet.", 503
changes = detect_changes(last_csv_data, current_data)
if changes:
message = "**CSV Update Detected:**\n" + "\n".join(changes)
send_discord_message(message)
else:
print(" No detectable content changes found.")
@app.route("/artists.xlsx")
def serve_xlsx():
if os.path.exists(XLSX_FILE):
return send_file(XLSX_FILE, mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", as_attachment=False)
return "XLSX not ready yet.", 503
archive_all_urls()
else:
print(" CSV unchanged. No archiving needed.")
last_csv_hash = current_hash
last_csv_data = current_data
except Exception as e:
print(f"⚠️ Error updating files: {e}")
time.sleep(600) # 10 minutes
@app.route("/artists.html")
def serve_artists_html():
if os.path.exists(HTML_FILE):
return send_file(HTML_FILE, mimetype="text/html")
return "HTML file not found.", 404
return send_file(HTML_FILENAME, mimetype="text/html")
@app.route("/<path:path>")
def catch_all(path):
if os.path.exists(CSV_FILE):
return send_file(CSV_FILE, mimetype="text/csv", as_attachment=False)
return "CSV not ready yet.", 503
@app.route("/artists.csv")
def serve_artists_csv():
return send_file(CSV_FILENAME, mimetype="text/csv")
@app.route('/_next/<path:filename>')
def serve_next(filename):
return send_from_directory(os.path.join(app.template_folder, '_next'), filename)
@app.route("/artists.xlsx")
def serve_artists_xlsx():
return send_file(XLSX_FILENAME, mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
@app.route("/")
@app.route("/index.html/")
def serve_index():
# Simple index page linking to your files
return """
<html>
<head><title>Artists Data</title></head>
<body>
<h1>Artists Data</h1>
<ul>
<li><a href="/artists.html">Artists.html</a></li>
<li><a href="/artists.csv">artists.csv</a></li>
<li><a href="/artists.xlsx">artists.xlsx</a></li>
</ul>
</body>
</html>
"""
if __name__ == "__main__":
thread = threading.Thread(target=background_updater, daemon=True)
thread.start()
threading.Thread(target=update_loop, daemon=True).start()
try:
download_zip_and_extract_html()
download_xlsx()
generate_csv()
except Exception as e:
print(f"⚠️ Initial update failed: {e}")
app.run(host="0.0.0.0", port=5000)

View file

@ -1,4 +1,4 @@
flask
flask-cors
Flask
requests
beautifulsoup4
lxml

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[492],{3632:(e,t,r)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),Object.defineProperty(t,"default",{enumerable:!0,get:function(){return o}});let l=r(5155),n=r(6395);function o(){return(0,l.jsx)(n.HTTPAccessErrorFallback,{status:404,message:"This page could not be found."})}("function"==typeof t.default||"object"==typeof t.default&&null!==t.default)&&void 0===t.default.__esModule&&(Object.defineProperty(t.default,"__esModule",{value:!0}),Object.assign(t.default,t),e.exports=t.default)},3868:(e,t,r)=>{(window.__NEXT_P=window.__NEXT_P||[]).push(["/_not-found/page",function(){return r(3632)}])},6395:(e,t,r)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),Object.defineProperty(t,"HTTPAccessErrorFallback",{enumerable:!0,get:function(){return o}}),r(8229);let l=r(5155);r(2115);let n={error:{fontFamily:'system-ui,"Segoe UI",Roboto,Helvetica,Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji"',height:"100vh",textAlign:"center",display:"flex",flexDirection:"column",alignItems:"center",justifyContent:"center"},desc:{display:"inline-block"},h1:{display:"inline-block",margin:"0 20px 0 0",padding:"0 23px 0 0",fontSize:24,fontWeight:500,verticalAlign:"top",lineHeight:"49px"},h2:{fontSize:14,fontWeight:400,lineHeight:"49px",margin:0}};function o(e){let{status:t,message:r}=e;return(0,l.jsxs)(l.Fragment,{children:[(0,l.jsx)("title",{children:t+": "+r}),(0,l.jsx)("div",{style:n.error,children:(0,l.jsxs)("div",{children:[(0,l.jsx)("style",{dangerouslySetInnerHTML:{__html:"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}),(0,l.jsx)("h1",{className:"next-error-h1",style:n.h1,children:t}),(0,l.jsx)("div",{style:n.desc,children:(0,l.jsx)("h2",{style:n.h2,children:r})})]})})]})}("function"==typeof t.default||"object"==typeof t.default&&null!==t.default)&&void 0===t.default.__esModule&&(Object.defineProperty(t.default,"__esModule",{value:!0}),Object.assign(t.default,t),e.exports=t.default)}},e=>{var t=t=>e(e.s=t);e.O(0,[441,684,358],()=>t(3868)),_N_E=e.O()}]);

View file

@ -0,0 +1 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[177],{2705:(e,t,r)=>{Promise.resolve().then(r.bind(r,7780)),Promise.resolve().then(r.t.bind(r,9840,23)),Promise.resolve().then(r.t.bind(r,9324,23))},7780:(e,t,r)=>{"use strict";r.d(t,{ThemeProvider:()=>b});var n=r(5155),s=r(2115),a=(e,t,r,n,s,a,l,o)=>{let c=document.documentElement,i=["light","dark"];function m(t){var r;(Array.isArray(e)?e:[e]).forEach(e=>{let r="class"===e,n=r&&a?s.map(e=>a[e]||e):s;r?(c.classList.remove(...n),c.classList.add(a&&a[t]?a[t]:t)):c.setAttribute(e,t)}),r=t,o&&i.includes(r)&&(c.style.colorScheme=r)}if(n)m(n);else try{let e=localStorage.getItem(t)||r,n=l&&"system"===e?window.matchMedia("(prefers-color-scheme: dark)").matches?"dark":"light":e;m(n)}catch(e){}},l=["light","dark"],o="(prefers-color-scheme: dark)",c=s.createContext(void 0),i=e=>s.useContext(c)?s.createElement(s.Fragment,null,e.children):s.createElement(d,{...e}),m=["light","dark"],d=e=>{let{forcedTheme:t,disableTransitionOnChange:r=!1,enableSystem:n=!0,enableColorScheme:a=!0,storageKey:i="theme",themes:d=m,defaultTheme:b=n?"system":"light",attribute:p="data-theme",value:v,children:g,nonce:E,scriptProps:S}=e,[k,w]=s.useState(()=>h(i,b)),[C,T]=s.useState(()=>"system"===k?f():k),_=v?Object.values(v):d,L=s.useCallback(e=>{let t=e;if(!t)return;"system"===e&&n&&(t=f());let s=v?v[t]:t,o=r?y(E):null,c=document.documentElement,i=e=>{"class"===e?(c.classList.remove(..._),s&&c.classList.add(s)):e.startsWith("data-")&&(s?c.setAttribute(e,s):c.removeAttribute(e))};if(Array.isArray(p)?p.forEach(i):i(p),a){let e=l.includes(b)?b:null,r=l.includes(t)?t:e;c.style.colorScheme=r}null==o||o()},[E]),A=s.useCallback(e=>{let t="function"==typeof e?e(k):e;w(t);try{localStorage.setItem(i,t)}catch(e){}},[k]),P=s.useCallback(e=>{T(f(e)),"system"===k&&n&&!t&&L("system")},[k,t]);s.useEffect(()=>{let e=window.matchMedia(o);return e.addListener(P),P(e),()=>e.removeListener(P)},[P]),s.useEffect(()=>{let e=e=>{e.key===i&&(e.newValue?w(e.newValue):A(b))};return window.addEventListener("storage",e),()=>window.removeEventListener("storage",e)},[A]),s.useEffect(()=>{L(null!=t?t:k)},[t,k]);let N=s.useMemo(()=>({theme:k,setTheme:A,forcedTheme:t,resolvedTheme:"system"===k?C:k,themes:n?[...d,"system"]:d,systemTheme:n?C:void 0}),[k,A,t,C,n,d]);return s.createElement(c.Provider,{value:N},s.createElement(u,{forcedTheme:t,storageKey:i,attribute:p,enableSystem:n,enableColorScheme:a,defaultTheme:b,value:v,themes:d,nonce:E,scriptProps:S}),g)},u=s.memo(e=>{let{forcedTheme:t,storageKey:r,attribute:n,enableSystem:l,enableColorScheme:o,defaultTheme:c,value:i,themes:m,nonce:d,scriptProps:u}=e,h=JSON.stringify([n,r,c,t,m,i,l,o]).slice(1,-1);return s.createElement("script",{...u,suppressHydrationWarning:!0,nonce:"",dangerouslySetInnerHTML:{__html:"(".concat(a.toString(),")(").concat(h,")")}})}),h=(e,t)=>{let r;try{r=localStorage.getItem(e)||void 0}catch(e){}return r||t},y=e=>{let t=document.createElement("style");return e&&t.setAttribute("nonce",e),t.appendChild(document.createTextNode("*,*::before,*::after{-webkit-transition:none!important;-moz-transition:none!important;-o-transition:none!important;-ms-transition:none!important;transition:none!important}")),document.head.appendChild(t),()=>{window.getComputedStyle(document.body),setTimeout(()=>{document.head.removeChild(t)},1)}},f=e=>(e||(e=window.matchMedia(o)),e.matches?"dark":"light");function b(e){let{children:t,...r}=e;return(0,n.jsx)(i,{...r,children:t})}},9324:()=>{},9840:e=>{e.exports={style:{fontFamily:"'Inter', 'Inter Fallback'",fontStyle:"normal"},className:"__className_e8ce0c"}}},e=>{var t=t=>e(e.s=t);e.O(0,[385,441,684,358],()=>t(2705)),_N_E=e.O()}]);

View file

@ -0,0 +1 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[974],{4441:()=>{}},_=>{var e=e=>_(_.s=e);_.O(0,[441,684,358],()=>e(4441)),_N_E=_.O()}]);

View file

@ -0,0 +1 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[358],{9941:(e,s,n)=>{Promise.resolve().then(n.t.bind(n,894,23)),Promise.resolve().then(n.t.bind(n,4970,23)),Promise.resolve().then(n.t.bind(n,6614,23)),Promise.resolve().then(n.t.bind(n,6975,23)),Promise.resolve().then(n.t.bind(n,7555,23)),Promise.resolve().then(n.t.bind(n,4911,23)),Promise.resolve().then(n.t.bind(n,9665,23)),Promise.resolve().then(n.t.bind(n,1295,23))}},e=>{var s=s=>e(e.s=s);e.O(0,[441,684],()=>(s(5415),s(9941))),_N_E=e.O()}]);

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1 @@
self.__BUILD_MANIFEST=function(e,r,t){return{__rewrites:{afterFiles:[],beforeFiles:[],fallback:[]},__routerFilterStatic:{numItems:2,errorRate:1e-4,numBits:39,numHashes:14,bitArray:[0,1,1,0,r,e,e,r,r,e,e,r,e,e,e,r,r,e,e,e,e,r,e,r,r,r,r,e,e,e,r,e,r,e,r,e,e,e,r]},__routerFilterDynamic:{numItems:r,errorRate:1e-4,numBits:r,numHashes:null,bitArray:[]},"/_error":["static/chunks/pages/_error-71d2b6a7b832d02a.js"],sortedPages:["/_app","/_error"]}}(1,0,1e-4),self.__BUILD_MANIFEST_CB&&self.__BUILD_MANIFEST_CB();

View file

@ -0,0 +1 @@
self.__SSG_MANIFEST=new Set([]);self.__SSG_MANIFEST_CB&&self.__SSG_MANIFEST_CB()

File diff suppressed because one or more lines are too long