From 90a797b888082b256200781656bb365cc50066f5 Mon Sep 17 00:00:00 2001 From: Eduard Prigoana Date: Sat, 1 Nov 2025 19:10:43 +0200 Subject: [PATCH] go rewrite --- archive.py | 36 - config.py | 43 -- diff.py | 57 -- downloader.py | 42 -- .../package-lock.json | 9 + go.mod | 24 + go.sum | 63 ++ info/status.json | 9 +- main.go | 660 ++++++++++++++++++ main.py | 115 --- notify.py | 30 - parser.py | 70 -- requirements.txt | 6 - update_loop.py | 97 --- utils.py | 22 - 15 files changed, 760 insertions(+), 523 deletions(-) delete mode 100644 archive.py delete mode 100644 config.py delete mode 100644 diff.py delete mode 100644 downloader.py create mode 100644 go.mod create mode 100644 go.sum create mode 100644 main.go delete mode 100644 main.py delete mode 100644 notify.py delete mode 100644 parser.py delete mode 100644 requirements.txt delete mode 100644 update_loop.py delete mode 100644 utils.py diff --git a/archive.py b/archive.py deleted file mode 100644 index 5035d0f..0000000 --- a/archive.py +++ /dev/null @@ -1,36 +0,0 @@ -# archive.py -import logging -import random -import time -from typing import List - -from waybackpy import WaybackMachineSaveAPI - -from config import ARCHIVE_URLS, USER_AGENT - -logger = logging.getLogger(__name__) - - -def archive_url(url: str): - logger.info(f"๐ŸŒ Archiving {url} ...") - try: - save_api = WaybackMachineSaveAPI(url, user_agent=USER_AGENT) - save_api.save() - logger.info(f"โœ… Archived {url}") - except Exception as e: - logger.error(f"โš ๏ธ Exception archiving {url}: {e}", exc_info=True) - - -def archive_all_urls(): - logger.info("--- Starting archival process for all URLs ---") - for url in ARCHIVE_URLS: - delay = 10 + random.uniform(-3, 3) - logger.info(f"Waiting {delay:.2f} seconds before next archive...") - time.sleep(delay) - archive_url(url) - logger.info("--- Archival process finished ---") - - -def test_archive(): - test_url = "https://httpbin.org/anything/foo/bar" - archive_url(test_url) \ No newline at end of file diff --git a/config.py b/config.py deleted file mode 100644 index 4470f72..0000000 --- a/config.py +++ /dev/null @@ -1,43 +0,0 @@ -import os - -SHEET_URL = "https://docs.google.com/spreadsheets/d/1Z8aANbxXbnUGoZPRvJfWL3gz6jrzPPrwVt3d0c1iJ_4" -ZIP_URL = SHEET_URL + "/export?format=zip" -XLSX_URL = SHEET_URL + "/export?format=xlsx" - - -ZIP_FILENAME = "Trackerhub.zip" -HTML_FILENAME = "Artists.html" -CSV_FILENAME = "artists.csv" -XLSX_FILENAME = "artists.xlsx" - -exclude_names = { - "๐ŸŽนWorst Comps & Edits" - "K4$H K4$$!n0", - "K4HKn0", - "AI Models", - "๐ŸŽน BPM & Key Tracker", - "๐ŸŽนComps & Edits" - "๐ŸŽน Worst Comps & Edits", - "๐ŸŽนWorst Comps & Edits", - "๐ŸŽน Yedits", - "๐ŸŽนComps & Edits", - "Allegations", - "Rap Disses Timeline", - "Underground Artists", - "๐ŸŽน Comps & Edits", - "๐ŸŽน Worst Comps & Edits" -} - -USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0 Safari/537.36" - -BASE_URL = "https://sheets.artistgrid.cx" - -ARCHIVE_URLS = [ - f"{BASE_URL}/", - f"{BASE_URL}/artists.html", - f"{BASE_URL}/artists.csv", - f"{BASE_URL}/artists.xlsx", - f"https://artistgrid.cx", -] - -DISCORD_WEBHOOK_URL = os.getenv("DISCORD_WEBHOOK_URL") diff --git a/diff.py b/diff.py deleted file mode 100644 index 2850e7d..0000000 --- a/diff.py +++ /dev/null @@ -1,57 +0,0 @@ -# diff.py -import csv -import logging -from typing import Dict, List - -logger = logging.getLogger(__name__) - - -def read_csv_to_dict(filename: str) -> Dict[str, Dict[str, str]]: - data = {} - try: - with open(filename, newline="", encoding="utf-8") as f: - reader = csv.DictReader(f) - for row in reader: - if "Artist Name" in row and row["Artist Name"]: - data[row["Artist Name"]] = row - except FileNotFoundError: - logger.warning(f"CSV file not found: {filename}") - except Exception as e: - logger.error(f"Error reading CSV file {filename}: {e}", exc_info=True) - return data - - -def detect_changes( - old_data: Dict[str, Dict[str, str]], new_data: Dict[str, Dict[str, str]] -) -> List[str]: - changes = [] - - old_keys = set(old_data.keys()) - new_keys = set(new_data.keys()) - - removed = sorted(list(old_keys - new_keys)) - added = sorted(list(new_keys - old_keys)) - common = sorted(list(old_keys & new_keys)) - - for artist in removed: - changes.append(f"โŒ Removed: **{artist}**") - - for artist in added: - changes.append(f"โž• Added: **{artist}**") - - for artist in common: - old_row = old_data[artist] - new_row = new_data[artist] - - if old_row.get("URL") != new_row.get("URL"): - changes.append(f"๐Ÿ”— Link changed for **{artist}**") - if old_row.get("Credit") != new_row.get("Credit"): - changes.append(f"โœ๏ธ Credit changed for **{artist}**") - if old_row.get("Links Work") != new_row.get("Links Work"): - changes.append(f"๐Ÿ”„ Links Work status changed for **{artist}**") - if old_row.get("Updated") != new_row.get("Updated"): - changes.append(f"๐Ÿ•’ Updated date changed for **{artist}**") - if old_row.get("Best") != new_row.get("Best"): - changes.append(f"โญ Best flag changed for **{artist}**") - - return changes \ No newline at end of file diff --git a/downloader.py b/downloader.py deleted file mode 100644 index 36da269..0000000 --- a/downloader.py +++ /dev/null @@ -1,42 +0,0 @@ -# downloader.py -import logging -import zipfile - -import requests - -from config import HTML_FILENAME, XLSX_FILENAME, XLSX_URL, ZIP_FILENAME, ZIP_URL - -logger = logging.getLogger(__name__) - - -def _download_file(url: str, filename: str, timeout: int = 30) -> bool: - logger.info(f"๐Ÿ”„ Downloading {filename}...") - try: - with requests.get(url, timeout=timeout) as r: - r.raise_for_status() - with open(filename, "wb") as f: - f.write(r.content) - logger.info(f"โœ… Saved {filename}") - return True - except requests.RequestException as e: - logger.error(f"โŒ Failed to download {filename}: {e}") - return False - - -def download_zip_and_extract_html(): - if not _download_file(ZIP_URL, ZIP_FILENAME): - return - - logger.info(f"๐Ÿ“ฆ Extracting {HTML_FILENAME} from {ZIP_FILENAME}...") - try: - with zipfile.ZipFile(ZIP_FILENAME, "r") as z: - html_content = z.read(HTML_FILENAME) - with open(HTML_FILENAME, "wb") as f: - f.write(html_content) - logger.info(f"โœ… Extracted {HTML_FILENAME}") - except (zipfile.BadZipFile, KeyError, FileNotFoundError) as e: - logger.error(f"โŒ Failed to extract {HTML_FILENAME}: {e}") - - -def download_xlsx(): - _download_file(XLSX_URL, XLSX_FILENAME) \ No newline at end of file diff --git a/frontend/artistgrid-sheets-frontend/package-lock.json b/frontend/artistgrid-sheets-frontend/package-lock.json index 4f4d27d..2baf4c1 100644 --- a/frontend/artistgrid-sheets-frontend/package-lock.json +++ b/frontend/artistgrid-sheets-frontend/package-lock.json @@ -1349,6 +1349,7 @@ "integrity": "sha512-EhBeSYX0Y6ye8pNebpKrwFJq7BoQ8J5SO6NlvNwwHjSj6adXJViPQrKlsyPw7hLBLvckEMO1yxeGdR82YBBlDg==", "devOptional": true, "license": "MIT", + "peer": true, "dependencies": { "csstype": "^3.0.2" } @@ -1409,6 +1410,7 @@ "integrity": "sha512-jCNyAuXx8dr5KJMkecGmZ8KI61KBUhkCob+SD+C+I5+Y1FWI2Y3QmY4/cxMCC5WAsZqoEtEETVhUiUMIGCf6Bw==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@typescript-eslint/scope-manager": "8.40.0", "@typescript-eslint/types": "8.40.0", @@ -1926,6 +1928,7 @@ "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", "dev": true, "license": "MIT", + "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -2822,6 +2825,7 @@ "integrity": "sha512-TS9bTNIryDzStCpJN93aC5VRSW3uTx9sClUn4B87pwiCaJh220otoI0X8mJKr+VcPtniMdN8GKjlwgWGUv5ZKA==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.2.0", "@eslint-community/regexpp": "^4.12.1", @@ -2996,6 +3000,7 @@ "integrity": "sha512-whOE1HFo/qJDyX4SnXzP4N6zOWn79WhnCUY/iDR0mPfQZO8wcYE4JClzI2oZrhBnnMUCBCHZhO6VQyoBU95mZA==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@rtsao/scc": "^1.1.0", "array-includes": "^3.1.9", @@ -5139,6 +5144,7 @@ "resolved": "https://registry.npmjs.org/react/-/react-19.1.0.tgz", "integrity": "sha512-FS+XFBNvn3GTAWq26joslQgWNoFu08F4kl0J4CgdNKADkdSGXQyTCnKteIAJy96Br6YbpEU1LSzV5dYtjMkMDg==", "license": "MIT", + "peer": true, "engines": { "node": ">=0.10.0" } @@ -5148,6 +5154,7 @@ "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.1.0.tgz", "integrity": "sha512-Xs1hdnE+DyKgeHJeJznQmYMIBG3TKIHJJT95Q58nHLSrElKlGQqDTR2HQ9fx5CN/Gk6Vh/kupBTDLU11/nDk/g==", "license": "MIT", + "peer": true, "dependencies": { "scheduler": "^0.26.0" }, @@ -5858,6 +5865,7 @@ "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "dev": true, "license": "MIT", + "peer": true, "engines": { "node": ">=12" }, @@ -6017,6 +6025,7 @@ "integrity": "sha512-CWBzXQrc/qOkhidw1OzBTQuYRbfyxDXJMVJ1XNwUHGROVmuaeiEm3OslpZ1RV96d7SKKjZKrSJu3+t/xlw3R9A==", "dev": true, "license": "Apache-2.0", + "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..bbd8c66 --- /dev/null +++ b/go.mod @@ -0,0 +1,24 @@ +module artistgrid + +go 1.21 + +require ( + github.com/PuerkitoBio/goquery v1.8.1 + github.com/gofiber/fiber/v2 v2.52.0 +) + +require ( + github.com/andybalholm/brotli v1.0.5 // indirect + github.com/andybalholm/cascadia v1.3.1 // indirect + github.com/google/uuid v1.5.0 // indirect + github.com/klauspost/compress v1.17.0 // indirect + github.com/mattn/go-colorable v0.1.13 // indirect + github.com/mattn/go-isatty v0.0.20 // indirect + github.com/mattn/go-runewidth v0.0.15 // indirect + github.com/rivo/uniseg v0.2.0 // indirect + github.com/valyala/bytebufferpool v1.0.0 // indirect + github.com/valyala/fasthttp v1.51.0 // indirect + github.com/valyala/tcplisten v1.0.0 // indirect + golang.org/x/net v0.17.0 // indirect + golang.org/x/sys v0.15.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..91f9d46 --- /dev/null +++ b/go.sum @@ -0,0 +1,63 @@ +github.com/PuerkitoBio/goquery v1.8.1 h1:uQxhNlArOIdbrH1tr0UXwdVFgDcZDrZVdcpygAcwmWM= +github.com/PuerkitoBio/goquery v1.8.1/go.mod h1:Q8ICL1kNUJ2sXGoAhPGUdYDJvgQgHzJsnnd3H7Ho5jQ= +github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs= +github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= +github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c= +github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA= +github.com/gofiber/fiber/v2 v2.52.0 h1:S+qXi7y+/Pgvqq4DrSmREGiFwtB7Bu6+QFLuIHYw/UE= +github.com/gofiber/fiber/v2 v2.52.0/go.mod h1:KEOE+cXMhXG0zHc9d8+E38hoX+ZN7bhOtgeF2oT6jrQ= +github.com/google/uuid v1.5.0 h1:1p67kYwdtXjb0gL0BPiP1Av9wiZPo5A8z2cWkTZ+eyU= +github.com/google/uuid v1.5.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/klauspost/compress v1.17.0 h1:Rnbp4K9EjcDuVuHtd0dgA4qNuv9yKDYKK1ulpJwgrqM= +github.com/klauspost/compress v1.17.0/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= +github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= +github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= +github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= +github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= +github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U= +github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= +github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= +github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= +github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw= +github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= +github.com/valyala/fasthttp v1.51.0 h1:8b30A5JlZ6C7AS81RsWjYMQmrZG6feChmgAolCl1SqA= +github.com/valyala/fasthttp v1.51.0/go.mod h1:oI2XroL+lI7vdXyYoQk03bXBThfFl2cVdIA3Xl7cH8g= +github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVSA8= +github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM= +golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.15.0 h1:h48lPFYpsTvQJZF4EKyI4aLHaev3CxivZmv7yZig9pc= +golang.org/x/sys v0.15.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/info/status.json b/info/status.json index 6cb3f33..22a2e98 100644 --- a/info/status.json +++ b/info/status.json @@ -1,15 +1,14 @@ { - "last_updated": "2025-10-04T15:56:39.361555+00:00", + "last_updated": "2025-11-01T17:07:11Z", "files": { "Artists.html": { - "hash": "84e3e5797fabbe28261922dddcbb628fdb17135bd6978218d0c352f06e5fade9", - "last_archived": "2025-08-19T05:14:17.304886Z" + "hash": "cc693c4b529fbe15e175afb10b052fb27fc36ca1184e4e0f8a39c4f94920c435" }, "artists.csv": { - "hash": "77ecb4f1291bbb20a58c6693fe96ff00289e883549c72c2ba035bb611594d716" + "hash": "ac2941316ca0fab4807a0a22b9bc60154fff36383521acc93d188341d5f7263c" }, "artists.xlsx": { - "hash": "05813caf0e1f6fc991cd7e9334a3dc50faf2b5a7c878e17cf73282fc385ffbb1" + "hash": "6ccb6f8d9c9247b9d5fe3c2f0cc4a0a84eeb5cfa7c0d2d840abd43b129ea85b4" } } } \ No newline at end of file diff --git a/main.go b/main.go new file mode 100644 index 0000000..21ded29 --- /dev/null +++ b/main.go @@ -0,0 +1,660 @@ +package main + +import ( + "archive/zip" + "bytes" + "crypto/sha256" + "encoding/csv" + "encoding/hex" + "encoding/json" + "fmt" + "io" + "log" + "net/http" + "os" + "regexp" + "sort" + "strings" + "time" + + "github.com/PuerkitoBio/goquery" + "github.com/gofiber/fiber/v2" + "github.com/gofiber/fiber/v2/middleware/cors" +) + +const ( + SheetURL = "https://docs.google.com/spreadsheets/d/1Z8aANbxXbnUGoZPRvJfWL3gz6jrzPPrwVt3d0c1iJ_4" + ZipURL = SheetURL + "/export?format=zip" + XlsxURL = SheetURL + "/export?format=xlsx" + + ZipFilename = "Trackerhub.zip" + HTMLFilename = "Artists.html" + CSVFilename = "artists.csv" + XlsxFilename = "artists.xlsx" + + UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0 Safari/537.36" + BaseURL = "https://sheets.artistgrid.cx" + + UpdateIntervalSeconds = 600 + InfoPath = "info/status.json" + + DEV_MODE = false +) + +var ExcludeNames = map[string]bool{ + "๐ŸŽนWorst Comps & Edits": true, + "K4$H K4$$!n0": true, + "K4HKn0": true, + "AI Models": true, + "๐ŸŽน BPM & Key Tracker": true, + "๐ŸŽนComps & Edits": true, + "๐ŸŽน Worst Comps & Edits": true, + "๐ŸŽน Yedits": true, + "Allegations": true, + "Rap Disses Timeline": true, + "Underground Artists": true, +} + +var ManualCSVRows = [][]string{ + {"Kanye West", "https://docs.google.com/spreadsheets/d/1oGgQrlUxxoGNAiKa_98vhjxR96pxJ3OdvVHKqpvb29w/", "p4, @kiwieater, Maker, Bobby, SamV1sion, @comptonrapper, Rose, Dr Wolf, Oreo Eater, Arco, @Free The Robots, @Alek, @Commandtechno, Snoop Dogg, Awesomefied, @rocky, @flab, Shadow, Reuben๐Ÿ‡ฎ๐Ÿ‡ช, @razacosmica, @Marcemaire, Solidus Jack, Marin, garfiiieeelld", "Yes", "Yes", "Yes"}, +} + +var ( + lastHTMLHash string + lastCSVData ArtistData + emojiRegex = regexp.MustCompile(`[\p{So}\p{Sk}\x{FE0F}\x{FE0E}\x{200D}โญ๐Ÿค–๐ŸŽญ๏ธŽ]+`) +) + +type ArtistData map[string]map[string]string + +type FileInfo struct { + Hash string `json:"hash"` +} + +type StatusInfo struct { + LastUpdated string `json:"last_updated"` + Files map[string]FileInfo `json:"files"` +} + +type DiscordMessage struct { + Content string `json:"content"` +} + +func cleanArtistName(text string) string { + cleaned := emojiRegex.ReplaceAllString(text, "") + cleaned = strings.TrimSpace(cleaned) + cleaned = strings.TrimPrefix(cleaned, " ") + return cleaned +} + +func forceStarFlag(starred bool) string { + if starred { + return "Yes" + } + return "No" +} + +func hashFile(filename string) (string, error) { + f, err := os.Open(filename) + if err != nil { + return "file_not_found", err + } + defer f.Close() + + hasher := sha256.New() + if _, err := io.Copy(hasher, f); err != nil { + return "", err + } + + return hex.EncodeToString(hasher.Sum(nil)), nil +} + +func downloadFile(url, filename string, timeout time.Duration) bool { + log.Printf("Downloading %s...\n", filename) + + client := &http.Client{Timeout: timeout} + resp, err := client.Get(url) + if err != nil { + log.Printf("ERROR: Failed to download %s: %v\n", filename, err) + return false + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + log.Printf("ERROR: Failed to download %s: status %d\n", filename, resp.StatusCode) + return false + } + + out, err := os.Create(filename) + if err != nil { + log.Printf("ERROR: Failed to create file %s: %v\n", filename, err) + return false + } + defer out.Close() + + _, err = io.Copy(out, resp.Body) + if err != nil { + log.Printf("ERROR: Failed to write file %s: %v\n", filename, err) + return false + } + + log.Printf("SUCCESS: Saved %s\n", filename) + return true +} + +func downloadZipAndExtractHTML() { + if !downloadFile(ZipURL, ZipFilename, 30*time.Second) { + return + } + + log.Printf("Extracting %s from %s...\n", HTMLFilename, ZipFilename) + + r, err := zip.OpenReader(ZipFilename) + if err != nil { + log.Printf("ERROR: Failed to open zip file: %v\n", err) + return + } + defer r.Close() + + for _, f := range r.File { + if f.Name == HTMLFilename { + rc, err := f.Open() + if err != nil { + log.Printf("ERROR: Failed to open file in zip: %v\n", err) + return + } + defer rc.Close() + + content, err := io.ReadAll(rc) + if err != nil { + log.Printf("ERROR: Failed to read file from zip: %v\n", err) + return + } + + err = os.WriteFile(HTMLFilename, content, 0644) + if err != nil { + log.Printf("ERROR: Failed to write extracted file: %v\n", err) + return + } + + log.Printf("SUCCESS: Extracted %s\n", HTMLFilename) + return + } + } + + log.Printf("ERROR: %s not found in zip archive\n", HTMLFilename) +} + +func downloadXLSX() { + downloadFile(XlsxURL, XlsxFilename, 30*time.Second) +} + +func quoteCSVField(field string) string { + escaped := strings.ReplaceAll(field, `"`, `""`) + return `"` + escaped + `"` +} + +func writeCSVRow(w io.Writer, fields []string) error { + quotedFields := make([]string, len(fields)) + for i, field := range fields { + quotedFields[i] = quoteCSVField(field) + } + _, err := w.Write([]byte(strings.Join(quotedFields, ",") + "\n")) + return err +} + +func generateCSV() { + log.Printf("Generating %s from %s...\n", CSVFilename, HTMLFilename) + + f, err := os.Open(HTMLFilename) + if err != nil { + log.Printf("ERROR: %s not found. Cannot generate CSV.\n", HTMLFilename) + return + } + defer f.Close() + + doc, err := goquery.NewDocumentFromReader(f) + if err != nil { + log.Printf("ERROR: Failed to parse HTML: %v\n", err) + return + } + + tableBody := doc.Find("table.waffle tbody") + if tableBody.Length() == 0 { + log.Println("ERROR: Could not find the table body in HTML. Cannot generate CSV.") + return + } + + rows := tableBody.Find("tr") + var data [][]string + starringSection := true + existingArtists := make(map[string]bool) + + rows.Each(func(i int, row *goquery.Selection) { + if i < 3 { + return + } + + cells := row.Find("td") + if cells.Length() < 4 { + return + } + + artistNameRaw := cells.Eq(0).Text() + artistNameRaw = strings.TrimSpace(artistNameRaw) + + artistURL, _ := cells.Eq(0).Find("a").Attr("href") + + if artistNameRaw == "" || artistURL == "" { + return + } + + if strings.Contains(artistNameRaw, "AI Models") { + starringSection = false + } + + artistNameClean := cleanArtistName(artistNameRaw) + if ExcludeNames[artistNameClean] || strings.Contains(artistNameRaw, "๐Ÿšฉ") { + return + } + + credit := strings.TrimSpace(cells.Eq(1).Text()) + linksWork := strings.TrimSpace(cells.Eq(3).Text()) + updated := strings.TrimSpace(cells.Eq(2).Text()) + best := forceStarFlag(starringSection) + + data = append(data, []string{ + artistNameClean, + artistURL, + credit, + linksWork, + updated, + best, + }) + existingArtists[artistNameClean] = true + }) + + for _, manualRow := range ManualCSVRows { + if len(manualRow) >= 6 { + artistName := manualRow[0] + if !existingArtists[artistName] { + data = append(data, manualRow) + existingArtists[artistName] = true + } + } + } + + sort.Slice(data, func(i, j int) bool { + bestI := data[i][5] + bestJ := data[j][5] + nameI := data[i][0] + nameJ := data[j][0] + + if bestI != bestJ { + return bestI > bestJ + } + return strings.ToLower(nameI) < strings.ToLower(nameJ) + }) + + csvFile, err := os.Create(CSVFilename) + if err != nil { + log.Printf("ERROR: Failed to create CSV file %s: %v\n", CSVFilename, err) + return + } + defer csvFile.Close() + + header := []string{"Artist Name", "URL", "Credit", "Links Work", "Updated", "Best"} + if err := writeCSVRow(csvFile, header); err != nil { + log.Printf("ERROR: Failed to write CSV header: %v\n", err) + return + } + + for _, record := range data { + if err := writeCSVRow(csvFile, record); err != nil { + log.Printf("ERROR: Failed to write CSV row: %v\n", err) + return + } + } + + log.Printf("SUCCESS: Generated %s with %d rows.\n", CSVFilename, len(data)) +} + +func readCSVToDict(filename string) ArtistData { + data := make(ArtistData) + + f, err := os.Open(filename) + if err != nil { + log.Printf("WARNING: CSV file not found: %s\n", filename) + return data + } + defer f.Close() + + reader := csv.NewReader(f) + records, err := reader.ReadAll() + if err != nil { + log.Printf("ERROR: Error reading CSV file %s: %v\n", filename, err) + return data + } + + if len(records) == 0 { + return data + } + + headers := records[0] + for _, record := range records[1:] { + if len(record) < len(headers) { + continue + } + + row := make(map[string]string) + for i, header := range headers { + row[header] = record[i] + } + + if artistName, ok := row["Artist Name"]; ok && artistName != "" { + data[artistName] = row + } + } + + return data +} + +func detectChanges(oldData, newData ArtistData) []string { + var changes []string + + oldKeys := make(map[string]bool) + newKeys := make(map[string]bool) + + for k := range oldData { + oldKeys[k] = true + } + for k := range newData { + newKeys[k] = true + } + + var removed []string + for k := range oldKeys { + if !newKeys[k] { + removed = append(removed, k) + } + } + sort.Strings(removed) + + var added []string + for k := range newKeys { + if !oldKeys[k] { + added = append(added, k) + } + } + sort.Strings(added) + + var common []string + for k := range oldKeys { + if newKeys[k] { + common = append(common, k) + } + } + sort.Strings(common) + + for _, artist := range removed { + changes = append(changes, "REMOVED: **"+artist+"**") + } + + for _, artist := range added { + changes = append(changes, "ADDED: **"+artist+"**") + } + + for _, artist := range common { + oldRow := oldData[artist] + newRow := newData[artist] + + if oldRow["URL"] != newRow["URL"] { + changes = append(changes, "LINK CHANGED: **"+artist+"**") + } + if oldRow["Credit"] != newRow["Credit"] { + changes = append(changes, "CREDIT CHANGED: **"+artist+"**") + } + if oldRow["Links Work"] != newRow["Links Work"] { + changes = append(changes, "LINKS WORK STATUS CHANGED: **"+artist+"**") + } + if oldRow["Updated"] != newRow["Updated"] { + changes = append(changes, "UPDATED DATE CHANGED: **"+artist+"**") + } + if oldRow["Best"] != newRow["Best"] { + changes = append(changes, "BEST FLAG CHANGED: **"+artist+"**") + } + } + + return changes +} + +func sendDiscordMessage(content string) { + webhookURL := os.Getenv("DISCORD_WEBHOOK_URL") + if webhookURL == "" { + log.Println("WARNING: Discord webhook URL not set. Skipping notification.") + return + } + + if len(content) > 2000 { + content = content[:1990] + "\n... (truncated)" + } + + message := DiscordMessage{Content: content} + jsonData, err := json.Marshal(message) + if err != nil { + log.Printf("WARNING: Failed to marshal Discord message: %v\n", err) + return + } + + resp, err := http.Post(webhookURL, "application/json", bytes.NewBuffer(jsonData)) + if err != nil { + log.Printf("WARNING: Exception sending Discord notification: %v\n", err) + return + } + defer resp.Body.Close() + + if resp.StatusCode >= 200 && resp.StatusCode < 300 { + log.Println("SUCCESS: Discord notification sent successfully.") + } else { + log.Printf("WARNING: Discord notification failed with status: %d\n", resp.StatusCode) + } +} + +func writeInfo(htmlHash, csvHash, xlsxHash string) { + os.MkdirAll("info", 0755) + nowISO := time.Now().UTC().Format(time.RFC3339) + + var info StatusInfo + + data, err := os.ReadFile(InfoPath) + if err == nil { + json.Unmarshal(data, &info) + } + + if info.Files == nil { + info.Files = make(map[string]FileInfo) + } + + info.LastUpdated = nowISO + info.Files[HTMLFilename] = FileInfo{Hash: htmlHash} + info.Files[CSVFilename] = FileInfo{Hash: csvHash} + info.Files[XlsxFilename] = FileInfo{Hash: xlsxHash} + + jsonData, err := json.MarshalIndent(info, "", " ") + if err != nil { + log.Printf("WARNING: Failed to marshal status info: %v\n", err) + return + } + + os.WriteFile(InfoPath, jsonData, 0644) +} + +func runDevTests() { + log.Println("=== DEVELOPMENT MODE - Running Tests ===") + + log.Println("\nTesting Discord Webhook...") + testMessage := fmt.Sprintf("**Development Mode Test**\nTimestamp: %s\nWebhook is working correctly!", time.Now().Format(time.RFC3339)) + sendDiscordMessage(testMessage) + + log.Println("\nDevelopment tests completed!") + log.Println("=========================================\n") +} + +func updateLoop() { + for { + log.Println("--- Starting update cycle ---") + + downloadZipAndExtractHTML() + downloadXLSX() + generateCSV() + + files := []string{HTMLFilename, CSVFilename, XlsxFilename} + allExist := true + for _, f := range files { + if _, err := os.Stat(f); os.IsNotExist(err) { + allExist = false + break + } + } + + if !allExist { + log.Println("WARNING: One or more files are missing after download/parse. Skipping this cycle.") + time.Sleep(UpdateIntervalSeconds * time.Second) + continue + } + + htmlHash, _ := hashFile(HTMLFilename) + csvHash, _ := hashFile(CSVFilename) + xlsxHash, _ := hashFile(XlsxFilename) + currentCSVData := readCSVToDict(CSVFilename) + + if lastHTMLHash == "" { + log.Println("INFO: First run: storing initial file hashes.") + } else if htmlHash != lastHTMLHash { + log.Println("ALERT: Artists.html has changed! Checking for data differences.") + changes := detectChanges(lastCSVData, currentCSVData) + if len(changes) > 0 { + message := "**Tracker Update Detected:**\n" + strings.Join(changes, "\n") + sendDiscordMessage(message) + } else { + log.Println("INFO: HTML hash changed, but no data differences found.") + } + } else { + log.Println("INFO: Artists.html is unchanged.") + } + + writeInfo(htmlHash, csvHash, xlsxHash) + lastHTMLHash = htmlHash + lastCSVData = currentCSVData + + log.Println("--- Update cycle finished ---") + log.Printf("Sleeping for %d seconds...\n", UpdateIntervalSeconds) + time.Sleep(UpdateIntervalSeconds * time.Second) + } +} + +func getStatusData() (*StatusInfo, error) { + data, err := os.ReadFile(InfoPath) + if err != nil { + return nil, err + } + + var status StatusInfo + err = json.Unmarshal(data, &status) + if err != nil { + return nil, err + } + + return &status, nil +} + +func main() { + log.SetFlags(log.LstdFlags | log.Lshortfile) + + if DEV_MODE { + runDevTests() + } + + log.Println("Starting background update goroutine...") + go updateLoop() + + app := fiber.New() + app.Use(cors.New()) + + app.Get("/", func(c *fiber.Ctx) error { + return c.SendFile("templates/index.html") + }) + + app.Get("/artists.html", func(c *fiber.Ctx) error { + return c.SendFile(HTMLFilename) + }) + + app.Get("/artists.csv", func(c *fiber.Ctx) error { + return c.SendFile(CSVFilename) + }) + + app.Get("/artists.xlsx", func(c *fiber.Ctx) error { + return c.SendFile(XlsxFilename) + }) + + app.Static("/_next", "templates/_next") + + app.Get("/info", func(c *fiber.Ctx) error { + data, err := getStatusData() + if err != nil { + return c.Status(404).JSON(fiber.Map{"error": "Info not available"}) + } + return c.JSON(data) + }) + + app.Get("/info/html", func(c *fiber.Ctx) error { + data, err := getStatusData() + if err != nil { + c.Set("Content-Type", "text/html") + return c.Status(404).SendString("

Status info not available.

") + } + + htmlInfo := data.Files[HTMLFilename] + csvInfo := data.Files[CSVFilename] + xlsxInfo := data.Files[XlsxFilename] + + html := fmt.Sprintf(` + + + + + File Info + + + +

Latest File Info

+

Last Updated: %s

+ + + +`, data.LastUpdated, + HTMLFilename, htmlInfo.Hash, + CSVFilename, csvInfo.Hash, + XlsxFilename, xlsxInfo.Hash) + + c.Set("Content-Type", "text/html") + return c.SendString(html) + }) + + app.Use(func(c *fiber.Ctx) error { + return c.Status(404).SendFile("templates/404.html") + }) + + log.Println("Starting Fiber server on :5000...") + log.Fatal(app.Listen(":5000")) +} diff --git a/main.py b/main.py deleted file mode 100644 index 7a098ba..0000000 --- a/main.py +++ /dev/null @@ -1,115 +0,0 @@ -# main.py -import json -import logging -import os -import threading - -from flask import Flask, jsonify, send_file, send_from_directory -from flask_cors import CORS - -from config import CSV_FILENAME, HTML_FILENAME, XLSX_FILENAME -from update_loop import update_loop - -logging.basicConfig( - level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" -) -logger = logging.getLogger(__name__) - -app = Flask(__name__) -CORS(app) - - -@app.route("/") -def serve_index(): - return send_file("templates/index.html") - - -@app.route("/artists.html") -def serve_artists_html(): - return send_file(HTML_FILENAME) - - -@app.route("/artists.csv") -def serve_artists_csv(): - return send_file(CSV_FILENAME) - - -@app.route("/artists.xlsx") -def serve_artists_xlsx(): - return send_file(XLSX_FILENAME) - - -@app.route("/_next/") -def serve_next_static(filename): - return send_from_directory("templates/_next", filename) - - -def get_status_data(): - info_path = os.path.join("info", "status.json") - if not os.path.exists(info_path): - return None - try: - with open(info_path, "r") as f: - return json.load(f) - except (IOError, json.JSONDecodeError) as e: - logger.error(f"Failed to read or parse status.json: {e}") - return None - - -@app.route("/info") -def info_json(): - data = get_status_data() - if data: - return jsonify(data) - return jsonify({"error": "Info not available"}), 404 - - -@app.route("/info/html") -def info_html(): - data = get_status_data() - if not data: - return "

Status info not available.

", 404 - - files_info = data.get("files", {}) - html_info = files_info.get(HTML_FILENAME, {}) - csv_info = files_info.get(CSV_FILENAME, {}) - xlsx_info = files_info.get(XLSX_FILENAME, {}) - - return f""" - - - - - File Info - - - -

Latest File Info

-

Last Updated: {data.get('last_updated', 'N/A')}

- - - - """ - - -@app.errorhandler(404) -def page_not_found(e): - return send_file("templates/404.html"), 404 - - -if __name__ == "__main__": - logger.info("Starting background update thread...") - threading.Thread(target=update_loop, daemon=True).start() - logger.info("Starting Flask server...") - app.run(host="0.0.0.0", port=5000) \ No newline at end of file diff --git a/notify.py b/notify.py deleted file mode 100644 index f6da4a6..0000000 --- a/notify.py +++ /dev/null @@ -1,30 +0,0 @@ -# notify.py -import json -import logging - -import requests - -from config import DISCORD_WEBHOOK_URL - -logger = logging.getLogger(__name__) - - -def send_discord_message(content: str): - if not DISCORD_WEBHOOK_URL: - logger.warning("Discord webhook URL not set. Skipping notification.") - return - - if len(content) > 2000: - content = content[:1990] + "\n... (truncated)" - - headers = {"Content-Type": "application/json"} - data = {"content": content} - - try: - response = requests.post( - DISCORD_WEBHOOK_URL, headers=headers, data=json.dumps(data), timeout=10 - ) - response.raise_for_status() - logger.info("โœ… Discord notification sent successfully.") - except requests.RequestException as e: - logger.error(f"โš ๏ธ Exception sending Discord notification: {e}") \ No newline at end of file diff --git a/parser.py b/parser.py deleted file mode 100644 index df05ab6..0000000 --- a/parser.py +++ /dev/null @@ -1,70 +0,0 @@ -# parser.py -import csv -import logging - -from bs4 import BeautifulSoup - -from config import CSV_FILENAME, HTML_FILENAME, exclude_names -from utils import clean_artist_name, force_star_flag - -logger = logging.getLogger(__name__) - - -def generate_csv(): - logger.info(f"๐Ÿ“ Generating {CSV_FILENAME} from {HTML_FILENAME}...") - try: - with open(HTML_FILENAME, "r", encoding="utf-8") as f: - soup = BeautifulSoup(f, "html.parser") - except FileNotFoundError: - logger.error(f"โŒ {HTML_FILENAME} not found. Cannot generate CSV.") - return - - table_body = soup.select_one("table.waffle tbody") - if not table_body: - logger.error("โŒ Could not find the table body in HTML. Cannot generate CSV.") - return - - rows = table_body.select("tr") - data = [] - starring_section = True - - for row in rows[3:]: - cells = row.find_all("td") - if len(cells) < 4: - continue - - artist_name_raw = cells[0].get_text(strip=True) - link_tag = cells[0].find("a") - artist_url = link_tag.get("href") if link_tag else "" - - if not artist_name_raw or not artist_url: - continue - - if "AI Models" in artist_name_raw: - starring_section = False - - artist_name_clean = clean_artist_name(artist_name_raw) - if artist_name_clean in exclude_names or "๐Ÿšฉ" in artist_name_raw: - continue - - data.append( - [ - artist_name_clean, - artist_url, - cells[1].get_text(strip=True), - cells[3].get_text(strip=True), - cells[2].get_text(strip=True), - force_star_flag(starring_section), - ] - ) - - try: - with open(CSV_FILENAME, "w", newline="", encoding="utf-8") as csvfile: - writer = csv.writer(csvfile, quoting=csv.QUOTE_ALL) - writer.writerow( - ["Artist Name", "URL", "Credit", "Links Work", "Updated", "Best"] - ) - writer.writerows(data) - logger.info(f"โœ… Generated {CSV_FILENAME} with {len(data)} rows.") - except IOError as e: - logger.error(f"โŒ Failed to write CSV file {CSV_FILENAME}: {e}") \ No newline at end of file diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 338beb1..0000000 --- a/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -Flask -requests -beautifulsoup4 -lxml -flask-cors -waybacKpy \ No newline at end of file diff --git a/update_loop.py b/update_loop.py deleted file mode 100644 index 1651294..0000000 --- a/update_loop.py +++ /dev/null @@ -1,97 +0,0 @@ -# update_loop.py -import json -import logging -import os -import time -from datetime import datetime, timezone - -from archive import archive_all_urls -from config import CSV_FILENAME, HTML_FILENAME, XLSX_FILENAME -from diff import detect_changes, read_csv_to_dict -from downloader import download_xlsx, download_zip_and_extract_html -from notify import send_discord_message -from parser import generate_csv -from utils import hash_file - -logger = logging.getLogger(__name__) - -last_html_hash = None -last_csv_data = {} -INFO_PATH = os.path.join("info", "status.json") -UPDATE_INTERVAL_SECONDS = 600 - - -def write_info(html_hash: str, csv_hash: str, xlsx_hash: str, is_archived: bool): - os.makedirs("info", exist_ok=True) - now_iso = datetime.now(timezone.utc).isoformat() - - try: - with open(INFO_PATH, "r") as f: - info = json.load(f) - except (FileNotFoundError, json.JSONDecodeError): - info = {"files": {HTML_FILENAME: {}}} - - info["last_updated"] = now_iso - info["files"][HTML_FILENAME]["hash"] = html_hash - if is_archived: - info["files"][HTML_FILENAME]["last_archived"] = now_iso - - info["files"][CSV_FILENAME] = {"hash": csv_hash} - info["files"][XLSX_FILENAME] = {"hash": xlsx_hash} - - with open(INFO_PATH, "w") as f: - json.dump(info, f, indent=2) - - -def update_loop(): - global last_html_hash, last_csv_data - - while True: - logger.info("--- Starting update cycle ---") - try: - download_zip_and_extract_html() - download_xlsx() - generate_csv() - - if not all( - os.path.exists(f) for f in [HTML_FILENAME, CSV_FILENAME, XLSX_FILENAME] - ): - logger.warning( - "One or more files are missing after download/parse. Skipping this cycle." - ) - time.sleep(UPDATE_INTERVAL_SECONDS) - continue - - html_hash = hash_file(HTML_FILENAME) - csv_hash = hash_file(CSV_FILENAME) - xlsx_hash = hash_file(XLSX_FILENAME) - current_csv_data = read_csv_to_dict(CSV_FILENAME) - - archived_this_cycle = False - if last_html_hash is None: - logger.info("First run: storing initial file hashes.") - elif html_hash != last_html_hash: - logger.info("๐Ÿ”” Artists.html has changed! Checking for data differences.") - changes = detect_changes(last_csv_data, current_csv_data) - if changes: - message = "**Tracker Update Detected:**\n" + "\n".join(changes) - send_discord_message(message) - archive_all_urls() - archived_this_cycle = True - else: - logger.info("โ„น๏ธ HTML hash changed, but no data differences found.") - else: - logger.info("โ„น๏ธ Artists.html is unchanged.") - - write_info(html_hash, csv_hash, xlsx_hash, is_archived=archived_this_cycle) - last_html_hash = html_hash - last_csv_data = current_csv_data - logger.info("--- Update cycle finished ---") - - except Exception as e: - logger.critical( - f"An unexpected error occurred in the update loop: {e}", exc_info=True - ) - - logger.info(f"Sleeping for {UPDATE_INTERVAL_SECONDS} seconds...") - time.sleep(UPDATE_INTERVAL_SECONDS) \ No newline at end of file diff --git a/utils.py b/utils.py deleted file mode 100644 index 3463f9f..0000000 --- a/utils.py +++ /dev/null @@ -1,22 +0,0 @@ -# utils.py -import hashlib -import re - - -def clean_artist_name(text: str) -> str: - return re.sub(r"[โญ๐Ÿค–๐ŸŽญ\u2B50\uFE0F]", "", text).strip() - - -def force_star_flag(starred: bool = True) -> str: - return "Yes" if starred else "No" - - -def hash_file(filename: str, block_size: int = 65536) -> str: - hasher = hashlib.sha256() - try: - with open(filename, "rb") as f: - for block in iter(lambda: f.read(block_size), b""): - hasher.update(block) - except FileNotFoundError: - return "file_not_found" - return hasher.hexdigest() \ No newline at end of file