This commit is contained in:
Eduard Prigoana 2025-07-20 18:23:49 +03:00
parent fb499a72c4
commit 4b708ba090
13 changed files with 272 additions and 141 deletions

360
main.py
View file

@ -1,168 +1,288 @@
import os
import threading
import time
import requests
import zipfile
import threading
import time
import random
import hashlib
from bs4 import BeautifulSoup
import csv
import re
from bs4 import BeautifulSoup
from flask import Flask, send_file, render_template, send_from_directory
from flask_cors import CORS
from flask import Flask, send_file
import os
import json
app = Flask(__name__, template_folder="templates")
CORS(app) # ✅ ENABLE CORS FOR ALL ROUTES
app = Flask(__name__)
# Constants
ZIP_URL = "https://docs.google.com/spreadsheets/d/1S6WwM05O277npQbaiNk-jZlXK3TdooSyWtqaWUvAI78/export?format=zip"
XLSX_URL = "https://docs.google.com/spreadsheets/d/1S6WwM05O277npQbaiNk-jZlXK3TdooSyWtqaWUvAI78/export?format=xlsx"
ZIP_FILE = "Trackerhub Reup.zip"
XLSX_FILE = "artists.xlsx"
EXTRACT_FOLDER = "sheet"
HTML_FILE = os.path.join(EXTRACT_FOLDER, "Artists.html")
CSV_FILE = "artists.csv"
# Artist names to exclude (no emojis, trimmed)
EXCLUDE_ARTISTS = {
"🤖 AI Models",
"🤖 Lawson",
"Comps & Edits",
ZIP_FILENAME = "Trackerhub.zip"
HTML_FILENAME = "Artists.html"
CSV_FILENAME = "artists.csv"
XLSX_FILENAME = "artists.xlsx"
exclude_names = {
"AI Models",
"Lawson",
"BPM Tracker",
"Worst Comps & Edits",
"Yedits",
"Allegations",
"Rap Disses Timeline",
"Underground Artists"
"Underground Artists",
}
def remove_emojis(text):
emoji_pattern = re.compile(
r'\s*['
'\U0001F600-\U0001F64F'
'\U0001F300-\U0001F5FF'
'\U0001F680-\U0001F6FF'
'\U0001F1E0-\U0001F1FF'
'\u2702-\u27B0'
'\u24C2-\U0001F251'
']\s*',
flags=re.UNICODE
)
cleaned_text = emoji_pattern.sub('', text)
return cleaned_text.strip()
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0 Safari/537.36"
def fetch_and_process():
try:
print("[*] Downloading ZIP...")
# URLs to archive on changes — update these to your actual hosted domain
BASE_URL = "http://localhost:5000" # Change this to your public domain when deployed
ARCHIVE_URLS = [
f"{BASE_URL}/",
f"{BASE_URL}/index.html/",
f"{BASE_URL}/artists.html",
f"{BASE_URL}/artists.csv",
f"{BASE_URL}/artists.xlsx",
]
DISCORD_WEBHOOK_URL = os.getenv("DISCORD_WEBHOOK_URL")
def clean_artist_name(text):
return re.sub(r'[⭐🤖🎭\u2B50\uFE0F]', '', text).strip()
def force_star_flag(starred=True):
return "Yes" if starred else "No"
def download_zip_and_extract_html():
print("🔄 Downloading ZIP...")
r = requests.get(ZIP_URL)
with open(ZIP_FILE, "wb") as f:
r.raise_for_status()
with open(ZIP_FILENAME, "wb") as f:
f.write(r.content)
print(f"✅ Saved ZIP as {ZIP_FILENAME}")
print("[*] Extracting ZIP...")
with zipfile.ZipFile(ZIP_FILE, 'r') as zip_ref:
zip_ref.extractall(EXTRACT_FOLDER)
with zipfile.ZipFile(ZIP_FILENAME, "r") as z:
with z.open(HTML_FILENAME) as html_file:
html_content = html_file.read()
with open(HTML_FILENAME, "wb") as f:
f.write(html_content)
print(f"✅ Extracted {HTML_FILENAME}")
print("[*] Downloading XLSX...")
def download_xlsx():
print("🔄 Downloading XLSX...")
r = requests.get(XLSX_URL)
with open(XLSX_FILE, "wb") as f:
r.raise_for_status()
with open(XLSX_FILENAME, "wb") as f:
f.write(r.content)
print(f"✅ Saved XLSX as {XLSX_FILENAME}")
print("[*] Parsing HTML...")
with open(HTML_FILE, "r", encoding="utf-8") as f:
def generate_csv():
print("📝 Generating CSV...")
with open(HTML_FILENAME, "r", encoding="utf-8") as f:
soup = BeautifulSoup(f, "html.parser")
table = soup.find("table", class_="waffle")
if not table:
print("[!] Table not found.")
return
rows = table.find_all("tr")[4:]
rows = soup.select("table.waffle tbody tr")[3:] # skip headers and Discord
data = []
starring = True
for row in rows:
cols = row.find_all("td")
if len(cols) < 4:
cells = row.find_all("td")
if len(cells) < 4:
continue
artist_cell = cols[0]
a_tag = artist_cell.find("a")
artist_name_raw = a_tag.text.strip() if a_tag else artist_cell.text.strip()
artist_name_clean = remove_emojis(artist_name_raw.replace('"', '')).strip()
if artist_name_clean in EXCLUDE_ARTISTS:
link_tag = cells[0].find("a")
artist_name_raw = link_tag.get_text(strip=True) if link_tag else cells[0].get_text(strip=True)
artist_url = link_tag["href"] if link_tag else ""
if not artist_url:
continue
artist_url = a_tag['href'] if a_tag and a_tag.has_attr('href') else ""
credits = cols[1].get_text(strip=True)
updated = cols[2].get_text(strip=True)
links_work = cols[3].get_text(strip=True)
if "AI Models" in artist_name_raw:
starring = False
cleaned_row = [
artist_name_clean,
remove_emojis(artist_url.replace('"', '')),
remove_emojis(credits.replace('"', '')),
remove_emojis(updated.replace('"', '')),
remove_emojis(links_work.replace('"', ''))
]
artist_name_clean = clean_artist_name(artist_name_raw)
if artist_name_clean in exclude_names:
continue
if all(cell for cell in cleaned_row):
data.append(cleaned_row)
if "🚩" in artist_name_raw:
continue
data.sort(key=lambda row: row[0].lower())
best = force_star_flag(starring)
credit = cells[1].get_text(strip=True)
updated = cells[2].get_text(strip=True)
links_work = cells[3].get_text(strip=True)
print(f"[*] Writing {len(data)} rows to CSV...")
with open(CSV_FILE, "w", newline="", encoding="utf-8") as f:
writer = csv.writer(f)
writer.writerow(["artist name", "URL", "credits", "updated", "links work"])
data.append([artist_name_clean, artist_url, credit, links_work, updated, best])
with open(CSV_FILENAME, "w", newline='', encoding="utf-8") as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["Artist Name", "URL", "Credit", "Links Work", "Updated", "Best"])
writer.writerows(data)
print("[✓] Done! CSV and XLSX updated.")
print(f"✅ CSV saved as {CSV_FILENAME}")
def hash_file(filename):
hasher = hashlib.sha256()
with open(filename, "rb") as f:
buf = f.read()
hasher.update(buf)
return hasher.hexdigest()
def archive_url(url):
print(f"🌐 Archiving {url} ...")
headers = {"User-Agent": USER_AGENT}
try:
resp = requests.get(f"https://web.archive.org/save/{url}", headers=headers, timeout=30)
if resp.status_code == 200:
print(f"✅ Archived {url}")
else:
print(f"⚠️ Failed to archive {url}, status code {resp.status_code}")
except Exception as e:
print(f"⚠️ Exception archiving {url}: {e}")
def archive_all_urls():
for url in ARCHIVE_URLS:
delay = 10 + random.uniform(-3, 3)
time.sleep(delay)
archive_url(url)
def read_csv_to_dict(filename):
"""Read CSV into dict with artist_name as key, storing relevant fields."""
d = {}
with open(filename, newline='', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
d[row["Artist Name"]] = row
return d
def detect_changes(old_data, new_data):
"""
Compare old and new data dictionaries.
Returns a list of strings describing changes.
"""
changes = []
old_keys = set(old_data.keys())
new_keys = set(new_data.keys())
removed = old_keys - new_keys
added = new_keys - old_keys
common = old_keys & new_keys
for artist in removed:
changes.append(f"❌ Removed: **{artist}**")
for artist in added:
changes.append(f" Added: **{artist}**")
for artist in common:
old_row = old_data[artist]
new_row = new_data[artist]
# Check if URL changed
if old_row["URL"] != new_row["URL"]:
changes.append(f"🔗 Link changed for **{artist}**")
# Check other fields if needed (Credit, Updated, etc.)
if old_row["Credit"] != new_row["Credit"]:
changes.append(f"✏️ Credit changed for **{artist}**")
if old_row["Links Work"] != new_row["Links Work"]:
changes.append(f"🔄 Links Work status changed for **{artist}**")
if old_row["Updated"] != new_row["Updated"]:
changes.append(f"🕒 Updated date changed for **{artist}**")
if old_row["Best"] != new_row["Best"]:
changes.append(f"⭐ Best flag changed for **{artist}**")
return changes
def send_discord_message(content):
if not DISCORD_WEBHOOK_URL:
print("⚠️ Discord webhook URL not set in env")
return
headers = {"Content-Type": "application/json"}
data = {"content": content}
try:
resp = requests.post(DISCORD_WEBHOOK_URL, headers=headers, data=json.dumps(data), timeout=10)
if resp.status_code in (200, 204):
print("✅ Discord notification sent")
else:
print(f"⚠️ Failed to send Discord notification, status code {resp.status_code}")
except Exception as e:
print(f"⚠️ Exception sending Discord notification: {e}")
def update_loop():
last_csv_hash = None
last_csv_data = {}
while True:
try:
download_zip_and_extract_html()
download_xlsx()
generate_csv()
current_hash = hash_file(CSV_FILENAME)
current_data = read_csv_to_dict(CSV_FILENAME)
if last_csv_hash is None:
print(" Initial CSV hash stored.")
elif current_hash != last_csv_hash:
print("🔔 CSV has changed! Archiving URLs...")
changes = detect_changes(last_csv_data, current_data)
if changes:
message = "**CSV Update Detected:**\n" + "\n".join(changes)
send_discord_message(message)
else:
print(" No detectable content changes found.")
archive_all_urls()
else:
print(" CSV unchanged. No archiving needed.")
last_csv_hash = current_hash
last_csv_data = current_data
except Exception as e:
print(f"[!] Error: {e}")
print(f"⚠️ Error updating files: {e}")
def background_updater():
while True:
fetch_and_process()
time.sleep(600)
# Routes
@app.route("/")
@app.route("/index")
@app.route("/index.html")
def index():
return render_template("index.html")
@app.route('/favicon.png')
def serve_favicon():
return send_from_directory(app.template_folder, 'favicon.png')
@app.route("/artists.csv")
def serve_csv():
if os.path.exists(CSV_FILE):
return send_file(CSV_FILE, mimetype="text/csv", as_attachment=False)
return "CSV not ready yet.", 503
@app.route("/artists.xlsx")
def serve_xlsx():
if os.path.exists(XLSX_FILE):
return send_file(XLSX_FILE, mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", as_attachment=False)
return "XLSX not ready yet.", 503
time.sleep(600) # 10 minutes
@app.route("/artists.html")
def serve_artists_html():
if os.path.exists(HTML_FILE):
return send_file(HTML_FILE, mimetype="text/html")
return "HTML file not found.", 404
return send_file(HTML_FILENAME, mimetype="text/html")
@app.route("/<path:path>")
def catch_all(path):
if os.path.exists(CSV_FILE):
return send_file(CSV_FILE, mimetype="text/csv", as_attachment=False)
return "CSV not ready yet.", 503
@app.route("/artists.csv")
def serve_artists_csv():
return send_file(CSV_FILENAME, mimetype="text/csv")
@app.route('/_next/<path:filename>')
def serve_next(filename):
return send_from_directory(os.path.join(app.template_folder, '_next'), filename)
@app.route("/artists.xlsx")
def serve_artists_xlsx():
return send_file(XLSX_FILENAME, mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
@app.route("/")
@app.route("/index.html/")
def serve_index():
# Simple index page linking to your files
return """
<html>
<head><title>Artists Data</title></head>
<body>
<h1>Artists Data</h1>
<ul>
<li><a href="/artists.html">Artists.html</a></li>
<li><a href="/artists.csv">artists.csv</a></li>
<li><a href="/artists.xlsx">artists.xlsx</a></li>
</ul>
</body>
</html>
"""
if __name__ == "__main__":
thread = threading.Thread(target=background_updater, daemon=True)
thread.start()
threading.Thread(target=update_loop, daemon=True).start()
try:
download_zip_and_extract_html()
download_xlsx()
generate_csv()
except Exception as e:
print(f"⚠️ Initial update failed: {e}")
app.run(host="0.0.0.0", port=5000)

View file

@ -1,4 +1,4 @@
flask
flask-cors
Flask
requests
beautifulsoup4
lxml

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[492],{3632:(e,t,r)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),Object.defineProperty(t,"default",{enumerable:!0,get:function(){return o}});let l=r(5155),n=r(6395);function o(){return(0,l.jsx)(n.HTTPAccessErrorFallback,{status:404,message:"This page could not be found."})}("function"==typeof t.default||"object"==typeof t.default&&null!==t.default)&&void 0===t.default.__esModule&&(Object.defineProperty(t.default,"__esModule",{value:!0}),Object.assign(t.default,t),e.exports=t.default)},3868:(e,t,r)=>{(window.__NEXT_P=window.__NEXT_P||[]).push(["/_not-found/page",function(){return r(3632)}])},6395:(e,t,r)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),Object.defineProperty(t,"HTTPAccessErrorFallback",{enumerable:!0,get:function(){return o}}),r(8229);let l=r(5155);r(2115);let n={error:{fontFamily:'system-ui,"Segoe UI",Roboto,Helvetica,Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji"',height:"100vh",textAlign:"center",display:"flex",flexDirection:"column",alignItems:"center",justifyContent:"center"},desc:{display:"inline-block"},h1:{display:"inline-block",margin:"0 20px 0 0",padding:"0 23px 0 0",fontSize:24,fontWeight:500,verticalAlign:"top",lineHeight:"49px"},h2:{fontSize:14,fontWeight:400,lineHeight:"49px",margin:0}};function o(e){let{status:t,message:r}=e;return(0,l.jsxs)(l.Fragment,{children:[(0,l.jsx)("title",{children:t+": "+r}),(0,l.jsx)("div",{style:n.error,children:(0,l.jsxs)("div",{children:[(0,l.jsx)("style",{dangerouslySetInnerHTML:{__html:"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}),(0,l.jsx)("h1",{className:"next-error-h1",style:n.h1,children:t}),(0,l.jsx)("div",{style:n.desc,children:(0,l.jsx)("h2",{style:n.h2,children:r})})]})})]})}("function"==typeof t.default||"object"==typeof t.default&&null!==t.default)&&void 0===t.default.__esModule&&(Object.defineProperty(t.default,"__esModule",{value:!0}),Object.assign(t.default,t),e.exports=t.default)}},e=>{var t=t=>e(e.s=t);e.O(0,[441,684,358],()=>t(3868)),_N_E=e.O()}]);

View file

@ -0,0 +1 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[177],{2705:(e,t,r)=>{Promise.resolve().then(r.bind(r,7780)),Promise.resolve().then(r.t.bind(r,9840,23)),Promise.resolve().then(r.t.bind(r,9324,23))},7780:(e,t,r)=>{"use strict";r.d(t,{ThemeProvider:()=>b});var n=r(5155),s=r(2115),a=(e,t,r,n,s,a,l,o)=>{let c=document.documentElement,i=["light","dark"];function m(t){var r;(Array.isArray(e)?e:[e]).forEach(e=>{let r="class"===e,n=r&&a?s.map(e=>a[e]||e):s;r?(c.classList.remove(...n),c.classList.add(a&&a[t]?a[t]:t)):c.setAttribute(e,t)}),r=t,o&&i.includes(r)&&(c.style.colorScheme=r)}if(n)m(n);else try{let e=localStorage.getItem(t)||r,n=l&&"system"===e?window.matchMedia("(prefers-color-scheme: dark)").matches?"dark":"light":e;m(n)}catch(e){}},l=["light","dark"],o="(prefers-color-scheme: dark)",c=s.createContext(void 0),i=e=>s.useContext(c)?s.createElement(s.Fragment,null,e.children):s.createElement(d,{...e}),m=["light","dark"],d=e=>{let{forcedTheme:t,disableTransitionOnChange:r=!1,enableSystem:n=!0,enableColorScheme:a=!0,storageKey:i="theme",themes:d=m,defaultTheme:b=n?"system":"light",attribute:p="data-theme",value:v,children:g,nonce:E,scriptProps:S}=e,[k,w]=s.useState(()=>h(i,b)),[C,T]=s.useState(()=>"system"===k?f():k),_=v?Object.values(v):d,L=s.useCallback(e=>{let t=e;if(!t)return;"system"===e&&n&&(t=f());let s=v?v[t]:t,o=r?y(E):null,c=document.documentElement,i=e=>{"class"===e?(c.classList.remove(..._),s&&c.classList.add(s)):e.startsWith("data-")&&(s?c.setAttribute(e,s):c.removeAttribute(e))};if(Array.isArray(p)?p.forEach(i):i(p),a){let e=l.includes(b)?b:null,r=l.includes(t)?t:e;c.style.colorScheme=r}null==o||o()},[E]),A=s.useCallback(e=>{let t="function"==typeof e?e(k):e;w(t);try{localStorage.setItem(i,t)}catch(e){}},[k]),P=s.useCallback(e=>{T(f(e)),"system"===k&&n&&!t&&L("system")},[k,t]);s.useEffect(()=>{let e=window.matchMedia(o);return e.addListener(P),P(e),()=>e.removeListener(P)},[P]),s.useEffect(()=>{let e=e=>{e.key===i&&(e.newValue?w(e.newValue):A(b))};return window.addEventListener("storage",e),()=>window.removeEventListener("storage",e)},[A]),s.useEffect(()=>{L(null!=t?t:k)},[t,k]);let N=s.useMemo(()=>({theme:k,setTheme:A,forcedTheme:t,resolvedTheme:"system"===k?C:k,themes:n?[...d,"system"]:d,systemTheme:n?C:void 0}),[k,A,t,C,n,d]);return s.createElement(c.Provider,{value:N},s.createElement(u,{forcedTheme:t,storageKey:i,attribute:p,enableSystem:n,enableColorScheme:a,defaultTheme:b,value:v,themes:d,nonce:E,scriptProps:S}),g)},u=s.memo(e=>{let{forcedTheme:t,storageKey:r,attribute:n,enableSystem:l,enableColorScheme:o,defaultTheme:c,value:i,themes:m,nonce:d,scriptProps:u}=e,h=JSON.stringify([n,r,c,t,m,i,l,o]).slice(1,-1);return s.createElement("script",{...u,suppressHydrationWarning:!0,nonce:"",dangerouslySetInnerHTML:{__html:"(".concat(a.toString(),")(").concat(h,")")}})}),h=(e,t)=>{let r;try{r=localStorage.getItem(e)||void 0}catch(e){}return r||t},y=e=>{let t=document.createElement("style");return e&&t.setAttribute("nonce",e),t.appendChild(document.createTextNode("*,*::before,*::after{-webkit-transition:none!important;-moz-transition:none!important;-o-transition:none!important;-ms-transition:none!important;transition:none!important}")),document.head.appendChild(t),()=>{window.getComputedStyle(document.body),setTimeout(()=>{document.head.removeChild(t)},1)}},f=e=>(e||(e=window.matchMedia(o)),e.matches?"dark":"light");function b(e){let{children:t,...r}=e;return(0,n.jsx)(i,{...r,children:t})}},9324:()=>{},9840:e=>{e.exports={style:{fontFamily:"'Inter', 'Inter Fallback'",fontStyle:"normal"},className:"__className_e8ce0c"}}},e=>{var t=t=>e(e.s=t);e.O(0,[385,441,684,358],()=>t(2705)),_N_E=e.O()}]);

View file

@ -0,0 +1 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[974],{4441:()=>{}},_=>{var e=e=>_(_.s=e);_.O(0,[441,684,358],()=>e(4441)),_N_E=_.O()}]);

View file

@ -0,0 +1 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[358],{9941:(e,s,n)=>{Promise.resolve().then(n.t.bind(n,894,23)),Promise.resolve().then(n.t.bind(n,4970,23)),Promise.resolve().then(n.t.bind(n,6614,23)),Promise.resolve().then(n.t.bind(n,6975,23)),Promise.resolve().then(n.t.bind(n,7555,23)),Promise.resolve().then(n.t.bind(n,4911,23)),Promise.resolve().then(n.t.bind(n,9665,23)),Promise.resolve().then(n.t.bind(n,1295,23))}},e=>{var s=s=>e(e.s=s);e.O(0,[441,684],()=>(s(5415),s(9941))),_N_E=e.O()}]);

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1 @@
self.__BUILD_MANIFEST=function(e,r,t){return{__rewrites:{afterFiles:[],beforeFiles:[],fallback:[]},__routerFilterStatic:{numItems:2,errorRate:1e-4,numBits:39,numHashes:14,bitArray:[0,1,1,0,r,e,e,r,r,e,e,r,e,e,e,r,r,e,e,e,e,r,e,r,r,r,r,e,e,e,r,e,r,e,r,e,e,e,r]},__routerFilterDynamic:{numItems:r,errorRate:1e-4,numBits:r,numHashes:null,bitArray:[]},"/_error":["static/chunks/pages/_error-71d2b6a7b832d02a.js"],sortedPages:["/_app","/_error"]}}(1,0,1e-4),self.__BUILD_MANIFEST_CB&&self.__BUILD_MANIFEST_CB();

View file

@ -0,0 +1 @@
self.__SSG_MANIFEST=new Set([]);self.__SSG_MANIFEST_CB&&self.__SSG_MANIFEST_CB()

File diff suppressed because one or more lines are too long