This commit is contained in:
Eduard Prigoana 2025-07-20 18:23:49 +03:00
parent fb499a72c4
commit 4b708ba090
13 changed files with 272 additions and 141 deletions

390
main.py
View file

@ -1,168 +1,288 @@
import os
import threading
import time
import requests import requests
import zipfile import zipfile
import threading
import time
import random
import hashlib
from bs4 import BeautifulSoup
import csv import csv
import re import re
from bs4 import BeautifulSoup from flask import Flask, send_file
from flask import Flask, send_file, render_template, send_from_directory import os
from flask_cors import CORS import json
app = Flask(__name__, template_folder="templates") app = Flask(__name__)
CORS(app) # ✅ ENABLE CORS FOR ALL ROUTES
# Constants
ZIP_URL = "https://docs.google.com/spreadsheets/d/1S6WwM05O277npQbaiNk-jZlXK3TdooSyWtqaWUvAI78/export?format=zip" ZIP_URL = "https://docs.google.com/spreadsheets/d/1S6WwM05O277npQbaiNk-jZlXK3TdooSyWtqaWUvAI78/export?format=zip"
XLSX_URL = "https://docs.google.com/spreadsheets/d/1S6WwM05O277npQbaiNk-jZlXK3TdooSyWtqaWUvAI78/export?format=xlsx" XLSX_URL = "https://docs.google.com/spreadsheets/d/1S6WwM05O277npQbaiNk-jZlXK3TdooSyWtqaWUvAI78/export?format=xlsx"
ZIP_FILE = "Trackerhub Reup.zip"
XLSX_FILE = "artists.xlsx"
EXTRACT_FOLDER = "sheet"
HTML_FILE = os.path.join(EXTRACT_FOLDER, "Artists.html")
CSV_FILE = "artists.csv"
# Artist names to exclude (no emojis, trimmed) ZIP_FILENAME = "Trackerhub.zip"
EXCLUDE_ARTISTS = { HTML_FILENAME = "Artists.html"
"🤖 AI Models", CSV_FILENAME = "artists.csv"
"🤖 Lawson", XLSX_FILENAME = "artists.xlsx"
"Comps & Edits",
exclude_names = {
"AI Models",
"Lawson",
"BPM Tracker",
"Worst Comps & Edits", "Worst Comps & Edits",
"Yedits",
"Allegations", "Allegations",
"Rap Disses Timeline", "Rap Disses Timeline",
"Underground Artists" "Underground Artists",
} }
def remove_emojis(text): USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0 Safari/537.36"
emoji_pattern = re.compile(
r'\s*['
'\U0001F600-\U0001F64F'
'\U0001F300-\U0001F5FF'
'\U0001F680-\U0001F6FF'
'\U0001F1E0-\U0001F1FF'
'\u2702-\u27B0'
'\u24C2-\U0001F251'
']\s*',
flags=re.UNICODE
)
cleaned_text = emoji_pattern.sub('', text)
return cleaned_text.strip()
def fetch_and_process(): # URLs to archive on changes — update these to your actual hosted domain
try: BASE_URL = "http://localhost:5000" # Change this to your public domain when deployed
print("[*] Downloading ZIP...")
r = requests.get(ZIP_URL)
with open(ZIP_FILE, "wb") as f:
f.write(r.content)
print("[*] Extracting ZIP...") ARCHIVE_URLS = [
with zipfile.ZipFile(ZIP_FILE, 'r') as zip_ref: f"{BASE_URL}/",
zip_ref.extractall(EXTRACT_FOLDER) f"{BASE_URL}/index.html/",
f"{BASE_URL}/artists.html",
print("[*] Downloading XLSX...") f"{BASE_URL}/artists.csv",
r = requests.get(XLSX_URL) f"{BASE_URL}/artists.xlsx",
with open(XLSX_FILE, "wb") as f:
f.write(r.content)
print("[*] Parsing HTML...")
with open(HTML_FILE, "r", encoding="utf-8") as f:
soup = BeautifulSoup(f, "html.parser")
table = soup.find("table", class_="waffle")
if not table:
print("[!] Table not found.")
return
rows = table.find_all("tr")[4:]
data = []
for row in rows:
cols = row.find_all("td")
if len(cols) < 4:
continue
artist_cell = cols[0]
a_tag = artist_cell.find("a")
artist_name_raw = a_tag.text.strip() if a_tag else artist_cell.text.strip()
artist_name_clean = remove_emojis(artist_name_raw.replace('"', '')).strip()
if artist_name_clean in EXCLUDE_ARTISTS:
continue
artist_url = a_tag['href'] if a_tag and a_tag.has_attr('href') else ""
credits = cols[1].get_text(strip=True)
updated = cols[2].get_text(strip=True)
links_work = cols[3].get_text(strip=True)
cleaned_row = [
artist_name_clean,
remove_emojis(artist_url.replace('"', '')),
remove_emojis(credits.replace('"', '')),
remove_emojis(updated.replace('"', '')),
remove_emojis(links_work.replace('"', ''))
] ]
if all(cell for cell in cleaned_row): DISCORD_WEBHOOK_URL = os.getenv("DISCORD_WEBHOOK_URL")
data.append(cleaned_row)
data.sort(key=lambda row: row[0].lower()) def clean_artist_name(text):
return re.sub(r'[⭐🤖🎭\u2B50\uFE0F]', '', text).strip()
print(f"[*] Writing {len(data)} rows to CSV...") def force_star_flag(starred=True):
with open(CSV_FILE, "w", newline="", encoding="utf-8") as f: return "Yes" if starred else "No"
writer = csv.writer(f)
writer.writerow(["artist name", "URL", "credits", "updated", "links work"]) def download_zip_and_extract_html():
print("🔄 Downloading ZIP...")
r = requests.get(ZIP_URL)
r.raise_for_status()
with open(ZIP_FILENAME, "wb") as f:
f.write(r.content)
print(f"✅ Saved ZIP as {ZIP_FILENAME}")
with zipfile.ZipFile(ZIP_FILENAME, "r") as z:
with z.open(HTML_FILENAME) as html_file:
html_content = html_file.read()
with open(HTML_FILENAME, "wb") as f:
f.write(html_content)
print(f"✅ Extracted {HTML_FILENAME}")
def download_xlsx():
print("🔄 Downloading XLSX...")
r = requests.get(XLSX_URL)
r.raise_for_status()
with open(XLSX_FILENAME, "wb") as f:
f.write(r.content)
print(f"✅ Saved XLSX as {XLSX_FILENAME}")
def generate_csv():
print("📝 Generating CSV...")
with open(HTML_FILENAME, "r", encoding="utf-8") as f:
soup = BeautifulSoup(f, "html.parser")
rows = soup.select("table.waffle tbody tr")[3:] # skip headers and Discord
data = []
starring = True
for row in rows:
cells = row.find_all("td")
if len(cells) < 4:
continue
link_tag = cells[0].find("a")
artist_name_raw = link_tag.get_text(strip=True) if link_tag else cells[0].get_text(strip=True)
artist_url = link_tag["href"] if link_tag else ""
if not artist_url:
continue
if "AI Models" in artist_name_raw:
starring = False
artist_name_clean = clean_artist_name(artist_name_raw)
if artist_name_clean in exclude_names:
continue
if "🚩" in artist_name_raw:
continue
best = force_star_flag(starring)
credit = cells[1].get_text(strip=True)
updated = cells[2].get_text(strip=True)
links_work = cells[3].get_text(strip=True)
data.append([artist_name_clean, artist_url, credit, links_work, updated, best])
with open(CSV_FILENAME, "w", newline='', encoding="utf-8") as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["Artist Name", "URL", "Credit", "Links Work", "Updated", "Best"])
writer.writerows(data) writer.writerows(data)
print("[✓] Done! CSV and XLSX updated.") print(f"✅ CSV saved as {CSV_FILENAME}")
def hash_file(filename):
hasher = hashlib.sha256()
with open(filename, "rb") as f:
buf = f.read()
hasher.update(buf)
return hasher.hexdigest()
def archive_url(url):
print(f"🌐 Archiving {url} ...")
headers = {"User-Agent": USER_AGENT}
try:
resp = requests.get(f"https://web.archive.org/save/{url}", headers=headers, timeout=30)
if resp.status_code == 200:
print(f"✅ Archived {url}")
else:
print(f"⚠️ Failed to archive {url}, status code {resp.status_code}")
except Exception as e:
print(f"⚠️ Exception archiving {url}: {e}")
def archive_all_urls():
for url in ARCHIVE_URLS:
delay = 10 + random.uniform(-3, 3)
time.sleep(delay)
archive_url(url)
def read_csv_to_dict(filename):
"""Read CSV into dict with artist_name as key, storing relevant fields."""
d = {}
with open(filename, newline='', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
d[row["Artist Name"]] = row
return d
def detect_changes(old_data, new_data):
"""
Compare old and new data dictionaries.
Returns a list of strings describing changes.
"""
changes = []
old_keys = set(old_data.keys())
new_keys = set(new_data.keys())
removed = old_keys - new_keys
added = new_keys - old_keys
common = old_keys & new_keys
for artist in removed:
changes.append(f"❌ Removed: **{artist}**")
for artist in added:
changes.append(f" Added: **{artist}**")
for artist in common:
old_row = old_data[artist]
new_row = new_data[artist]
# Check if URL changed
if old_row["URL"] != new_row["URL"]:
changes.append(f"🔗 Link changed for **{artist}**")
# Check other fields if needed (Credit, Updated, etc.)
if old_row["Credit"] != new_row["Credit"]:
changes.append(f"✏️ Credit changed for **{artist}**")
if old_row["Links Work"] != new_row["Links Work"]:
changes.append(f"🔄 Links Work status changed for **{artist}**")
if old_row["Updated"] != new_row["Updated"]:
changes.append(f"🕒 Updated date changed for **{artist}**")
if old_row["Best"] != new_row["Best"]:
changes.append(f"⭐ Best flag changed for **{artist}**")
return changes
def send_discord_message(content):
if not DISCORD_WEBHOOK_URL:
print("⚠️ Discord webhook URL not set in env")
return
headers = {"Content-Type": "application/json"}
data = {"content": content}
try:
resp = requests.post(DISCORD_WEBHOOK_URL, headers=headers, data=json.dumps(data), timeout=10)
if resp.status_code in (200, 204):
print("✅ Discord notification sent")
else:
print(f"⚠️ Failed to send Discord notification, status code {resp.status_code}")
except Exception as e:
print(f"⚠️ Exception sending Discord notification: {e}")
def update_loop():
last_csv_hash = None
last_csv_data = {}
while True:
try:
download_zip_and_extract_html()
download_xlsx()
generate_csv()
current_hash = hash_file(CSV_FILENAME)
current_data = read_csv_to_dict(CSV_FILENAME)
if last_csv_hash is None:
print(" Initial CSV hash stored.")
elif current_hash != last_csv_hash:
print("🔔 CSV has changed! Archiving URLs...")
changes = detect_changes(last_csv_data, current_data)
if changes:
message = "**CSV Update Detected:**\n" + "\n".join(changes)
send_discord_message(message)
else:
print(" No detectable content changes found.")
archive_all_urls()
else:
print(" CSV unchanged. No archiving needed.")
last_csv_hash = current_hash
last_csv_data = current_data
except Exception as e: except Exception as e:
print(f"[!] Error: {e}") print(f"⚠️ Error updating files: {e}")
def background_updater(): time.sleep(600) # 10 minutes
while True:
fetch_and_process()
time.sleep(600)
# Routes
@app.route("/")
@app.route("/index")
@app.route("/index.html")
def index():
return render_template("index.html")
@app.route('/favicon.png')
def serve_favicon():
return send_from_directory(app.template_folder, 'favicon.png')
@app.route("/artists.csv")
def serve_csv():
if os.path.exists(CSV_FILE):
return send_file(CSV_FILE, mimetype="text/csv", as_attachment=False)
return "CSV not ready yet.", 503
@app.route("/artists.xlsx")
def serve_xlsx():
if os.path.exists(XLSX_FILE):
return send_file(XLSX_FILE, mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", as_attachment=False)
return "XLSX not ready yet.", 503
@app.route("/artists.html") @app.route("/artists.html")
def serve_artists_html(): def serve_artists_html():
if os.path.exists(HTML_FILE): return send_file(HTML_FILENAME, mimetype="text/html")
return send_file(HTML_FILE, mimetype="text/html")
return "HTML file not found.", 404
@app.route("/<path:path>") @app.route("/artists.csv")
def catch_all(path): def serve_artists_csv():
if os.path.exists(CSV_FILE): return send_file(CSV_FILENAME, mimetype="text/csv")
return send_file(CSV_FILE, mimetype="text/csv", as_attachment=False)
return "CSV not ready yet.", 503
@app.route('/_next/<path:filename>') @app.route("/artists.xlsx")
def serve_next(filename): def serve_artists_xlsx():
return send_from_directory(os.path.join(app.template_folder, '_next'), filename) return send_file(XLSX_FILENAME, mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
@app.route("/")
@app.route("/index.html/")
def serve_index():
# Simple index page linking to your files
return """
<html>
<head><title>Artists Data</title></head>
<body>
<h1>Artists Data</h1>
<ul>
<li><a href="/artists.html">Artists.html</a></li>
<li><a href="/artists.csv">artists.csv</a></li>
<li><a href="/artists.xlsx">artists.xlsx</a></li>
</ul>
</body>
</html>
"""
if __name__ == "__main__": if __name__ == "__main__":
thread = threading.Thread(target=background_updater, daemon=True) threading.Thread(target=update_loop, daemon=True).start()
thread.start() try:
download_zip_and_extract_html()
download_xlsx()
generate_csv()
except Exception as e:
print(f"⚠️ Initial update failed: {e}")
app.run(host="0.0.0.0", port=5000) app.run(host="0.0.0.0", port=5000)

View file

@ -1,4 +1,4 @@
flask Flask
flask-cors
requests requests
beautifulsoup4 beautifulsoup4
lxml

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[492],{3632:(e,t,r)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),Object.defineProperty(t,"default",{enumerable:!0,get:function(){return o}});let l=r(5155),n=r(6395);function o(){return(0,l.jsx)(n.HTTPAccessErrorFallback,{status:404,message:"This page could not be found."})}("function"==typeof t.default||"object"==typeof t.default&&null!==t.default)&&void 0===t.default.__esModule&&(Object.defineProperty(t.default,"__esModule",{value:!0}),Object.assign(t.default,t),e.exports=t.default)},3868:(e,t,r)=>{(window.__NEXT_P=window.__NEXT_P||[]).push(["/_not-found/page",function(){return r(3632)}])},6395:(e,t,r)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),Object.defineProperty(t,"HTTPAccessErrorFallback",{enumerable:!0,get:function(){return o}}),r(8229);let l=r(5155);r(2115);let n={error:{fontFamily:'system-ui,"Segoe UI",Roboto,Helvetica,Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji"',height:"100vh",textAlign:"center",display:"flex",flexDirection:"column",alignItems:"center",justifyContent:"center"},desc:{display:"inline-block"},h1:{display:"inline-block",margin:"0 20px 0 0",padding:"0 23px 0 0",fontSize:24,fontWeight:500,verticalAlign:"top",lineHeight:"49px"},h2:{fontSize:14,fontWeight:400,lineHeight:"49px",margin:0}};function o(e){let{status:t,message:r}=e;return(0,l.jsxs)(l.Fragment,{children:[(0,l.jsx)("title",{children:t+": "+r}),(0,l.jsx)("div",{style:n.error,children:(0,l.jsxs)("div",{children:[(0,l.jsx)("style",{dangerouslySetInnerHTML:{__html:"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}),(0,l.jsx)("h1",{className:"next-error-h1",style:n.h1,children:t}),(0,l.jsx)("div",{style:n.desc,children:(0,l.jsx)("h2",{style:n.h2,children:r})})]})})]})}("function"==typeof t.default||"object"==typeof t.default&&null!==t.default)&&void 0===t.default.__esModule&&(Object.defineProperty(t.default,"__esModule",{value:!0}),Object.assign(t.default,t),e.exports=t.default)}},e=>{var t=t=>e(e.s=t);e.O(0,[441,684,358],()=>t(3868)),_N_E=e.O()}]);

View file

@ -0,0 +1 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[177],{2705:(e,t,r)=>{Promise.resolve().then(r.bind(r,7780)),Promise.resolve().then(r.t.bind(r,9840,23)),Promise.resolve().then(r.t.bind(r,9324,23))},7780:(e,t,r)=>{"use strict";r.d(t,{ThemeProvider:()=>b});var n=r(5155),s=r(2115),a=(e,t,r,n,s,a,l,o)=>{let c=document.documentElement,i=["light","dark"];function m(t){var r;(Array.isArray(e)?e:[e]).forEach(e=>{let r="class"===e,n=r&&a?s.map(e=>a[e]||e):s;r?(c.classList.remove(...n),c.classList.add(a&&a[t]?a[t]:t)):c.setAttribute(e,t)}),r=t,o&&i.includes(r)&&(c.style.colorScheme=r)}if(n)m(n);else try{let e=localStorage.getItem(t)||r,n=l&&"system"===e?window.matchMedia("(prefers-color-scheme: dark)").matches?"dark":"light":e;m(n)}catch(e){}},l=["light","dark"],o="(prefers-color-scheme: dark)",c=s.createContext(void 0),i=e=>s.useContext(c)?s.createElement(s.Fragment,null,e.children):s.createElement(d,{...e}),m=["light","dark"],d=e=>{let{forcedTheme:t,disableTransitionOnChange:r=!1,enableSystem:n=!0,enableColorScheme:a=!0,storageKey:i="theme",themes:d=m,defaultTheme:b=n?"system":"light",attribute:p="data-theme",value:v,children:g,nonce:E,scriptProps:S}=e,[k,w]=s.useState(()=>h(i,b)),[C,T]=s.useState(()=>"system"===k?f():k),_=v?Object.values(v):d,L=s.useCallback(e=>{let t=e;if(!t)return;"system"===e&&n&&(t=f());let s=v?v[t]:t,o=r?y(E):null,c=document.documentElement,i=e=>{"class"===e?(c.classList.remove(..._),s&&c.classList.add(s)):e.startsWith("data-")&&(s?c.setAttribute(e,s):c.removeAttribute(e))};if(Array.isArray(p)?p.forEach(i):i(p),a){let e=l.includes(b)?b:null,r=l.includes(t)?t:e;c.style.colorScheme=r}null==o||o()},[E]),A=s.useCallback(e=>{let t="function"==typeof e?e(k):e;w(t);try{localStorage.setItem(i,t)}catch(e){}},[k]),P=s.useCallback(e=>{T(f(e)),"system"===k&&n&&!t&&L("system")},[k,t]);s.useEffect(()=>{let e=window.matchMedia(o);return e.addListener(P),P(e),()=>e.removeListener(P)},[P]),s.useEffect(()=>{let e=e=>{e.key===i&&(e.newValue?w(e.newValue):A(b))};return window.addEventListener("storage",e),()=>window.removeEventListener("storage",e)},[A]),s.useEffect(()=>{L(null!=t?t:k)},[t,k]);let N=s.useMemo(()=>({theme:k,setTheme:A,forcedTheme:t,resolvedTheme:"system"===k?C:k,themes:n?[...d,"system"]:d,systemTheme:n?C:void 0}),[k,A,t,C,n,d]);return s.createElement(c.Provider,{value:N},s.createElement(u,{forcedTheme:t,storageKey:i,attribute:p,enableSystem:n,enableColorScheme:a,defaultTheme:b,value:v,themes:d,nonce:E,scriptProps:S}),g)},u=s.memo(e=>{let{forcedTheme:t,storageKey:r,attribute:n,enableSystem:l,enableColorScheme:o,defaultTheme:c,value:i,themes:m,nonce:d,scriptProps:u}=e,h=JSON.stringify([n,r,c,t,m,i,l,o]).slice(1,-1);return s.createElement("script",{...u,suppressHydrationWarning:!0,nonce:"",dangerouslySetInnerHTML:{__html:"(".concat(a.toString(),")(").concat(h,")")}})}),h=(e,t)=>{let r;try{r=localStorage.getItem(e)||void 0}catch(e){}return r||t},y=e=>{let t=document.createElement("style");return e&&t.setAttribute("nonce",e),t.appendChild(document.createTextNode("*,*::before,*::after{-webkit-transition:none!important;-moz-transition:none!important;-o-transition:none!important;-ms-transition:none!important;transition:none!important}")),document.head.appendChild(t),()=>{window.getComputedStyle(document.body),setTimeout(()=>{document.head.removeChild(t)},1)}},f=e=>(e||(e=window.matchMedia(o)),e.matches?"dark":"light");function b(e){let{children:t,...r}=e;return(0,n.jsx)(i,{...r,children:t})}},9324:()=>{},9840:e=>{e.exports={style:{fontFamily:"'Inter', 'Inter Fallback'",fontStyle:"normal"},className:"__className_e8ce0c"}}},e=>{var t=t=>e(e.s=t);e.O(0,[385,441,684,358],()=>t(2705)),_N_E=e.O()}]);

View file

@ -0,0 +1 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[974],{4441:()=>{}},_=>{var e=e=>_(_.s=e);_.O(0,[441,684,358],()=>e(4441)),_N_E=_.O()}]);

View file

@ -0,0 +1 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[358],{9941:(e,s,n)=>{Promise.resolve().then(n.t.bind(n,894,23)),Promise.resolve().then(n.t.bind(n,4970,23)),Promise.resolve().then(n.t.bind(n,6614,23)),Promise.resolve().then(n.t.bind(n,6975,23)),Promise.resolve().then(n.t.bind(n,7555,23)),Promise.resolve().then(n.t.bind(n,4911,23)),Promise.resolve().then(n.t.bind(n,9665,23)),Promise.resolve().then(n.t.bind(n,1295,23))}},e=>{var s=s=>e(e.s=s);e.O(0,[441,684],()=>(s(5415),s(9941))),_N_E=e.O()}]);

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1 @@
self.__BUILD_MANIFEST=function(e,r,t){return{__rewrites:{afterFiles:[],beforeFiles:[],fallback:[]},__routerFilterStatic:{numItems:2,errorRate:1e-4,numBits:39,numHashes:14,bitArray:[0,1,1,0,r,e,e,r,r,e,e,r,e,e,e,r,r,e,e,e,e,r,e,r,r,r,r,e,e,e,r,e,r,e,r,e,e,e,r]},__routerFilterDynamic:{numItems:r,errorRate:1e-4,numBits:r,numHashes:null,bitArray:[]},"/_error":["static/chunks/pages/_error-71d2b6a7b832d02a.js"],sortedPages:["/_app","/_error"]}}(1,0,1e-4),self.__BUILD_MANIFEST_CB&&self.__BUILD_MANIFEST_CB();

View file

@ -0,0 +1 @@
self.__SSG_MANIFEST=new Set([]);self.__SSG_MANIFEST_CB&&self.__SSG_MANIFEST_CB()

File diff suppressed because one or more lines are too long