From 95523697602f7ab0b28ff8b5bbffcedcce6786a4 Mon Sep 17 00:00:00 2001 From: Eduard Prigoana Date: Tue, 22 Jul 2025 06:16:02 +0300 Subject: [PATCH] modularize --- .gitignore | 1 + __pycache__/archive.cpython-313.pyc | Bin 0 -> 1475 bytes __pycache__/config.cpython-313.pyc | Bin 0 -> 1140 bytes __pycache__/diff.cpython-313.pyc | Bin 0 -> 2073 bytes __pycache__/downloader.cpython-313.pyc | Bin 0 -> 2197 bytes __pycache__/notify.cpython-313.pyc | Bin 0 -> 1246 bytes __pycache__/parser.cpython-313.pyc | Bin 0 -> 2438 bytes __pycache__/update_loop.cpython-313.pyc | Bin 0 -> 1781 bytes __pycache__/utils.cpython-313.pyc | Bin 0 -> 1127 bytes archive.py | 21 ++ config.py | 33 +++ diff.py | 42 ++++ downloader.py | 26 +++ main.py | 258 +----------------------- notify.py | 20 ++ parser.py | 47 +++++ update_loop.py | 47 +++++ utils.py | 14 ++ 18 files changed, 259 insertions(+), 250 deletions(-) create mode 100644 __pycache__/archive.cpython-313.pyc create mode 100644 __pycache__/config.cpython-313.pyc create mode 100644 __pycache__/diff.cpython-313.pyc create mode 100644 __pycache__/downloader.cpython-313.pyc create mode 100644 __pycache__/notify.cpython-313.pyc create mode 100644 __pycache__/parser.cpython-313.pyc create mode 100644 __pycache__/update_loop.cpython-313.pyc create mode 100644 __pycache__/utils.cpython-313.pyc create mode 100644 archive.py create mode 100644 config.py create mode 100644 diff.py create mode 100644 downloader.py create mode 100644 notify.py create mode 100644 parser.py create mode 100644 update_loop.py create mode 100644 utils.py diff --git a/.gitignore b/.gitignore index 92149bd..5ed4419 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ artists.csv Artists.html artists.xlsx Trackerhub.zip +.env diff --git a/__pycache__/archive.cpython-313.pyc b/__pycache__/archive.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7ce85413869b9fe93c56971e9c834ddb29146dd9 GIT binary patch literal 1475 zcmZuxO=ufO6n^_Bt@UHcKcRLMI~#|#kyFbJL6z&aA+qY)O&XVIqfjbgqex>(Ag$Dy z-Lx^G_);*TU{ez6mOylLZ$A3i^wJzxIH}Dd2(*QwQ%!U!EybN#>&Jx*%zHEM&3y0e zd-HA6>vaJgBcFI-BFv z`T&w0pUNemm&Up%gj+TiE7{Hx-sCiH6p~^aEAD8ox_S&$k)d4V6Pzlkj<*GkPw;WM zd!oHkoztHHqREKQ#^j}3?%K!vv^KO2LLqT*tfcEEKfrC z|Gm?+`kD|GaxJ~gEyH`lX;|ih-UWju3kQ$B-cX`AlP}yZXgOs?R6?Q9iYq}7o`~j< zrmu|Tb-hBSCnrBgw?b+ABnp*rZjz*Lqshi71)QnKxeP7_=r)KZD)wu)SDNWUVudLs_ch?+!XD7B3Hcv&L zM9kD;&9}5B?z??^?m*2Q*d)8|vAU;k<68A>^@Q2~`ZLeD-#vZi;KWvDE3qB=ad0ns zy%xQ0UXGjn>NC%czv}M6CU6v8_L~0lTd}S1Hotvg`>fgf)}9=x$&r5!1nT~qjL;;% zi$o{jabJMR3D-=&@Ho;p;}w4LN_3r}2Reh%#3tN*x}b#XI0j(;5`C7G_Fa<2JM`)t z14=4h6~ZkdPEd!&B|GGM8M($F!F9=ybhoh*(6wH>4B!R@au5_38jLHAe-9528!XkJ z12bhga9YcjOP15n3d?0&vPAY+Er}Ens@P2n@|dy0lCr3nzC&1!ih}!@$=_~px>#gQ zCYaT&k4_S%VilhI0&Bv)zyJQtZx_E=tj=t4yC=qL{_!=bE_>Ep_gov{r}A)J9@vQe zA`iWAg8x*LUWA~G*%#rHObk(BIWlE!xsbDywqLcKn;q{5z>IN6Dar4a%2}g`F5xrO g#{P1I-KTBkxaTmk3nPEP`4^&#n`#b;+^~J~Ke_}-d;kCd literal 0 HcmV?d00001 diff --git a/__pycache__/config.cpython-313.pyc b/__pycache__/config.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b4935b56b2133b23bce15723f89d9ac8914eb915 GIT binary patch literal 1140 zcmcIjPjA~c9F-i$mTf6<;jKflwIzBO8lbc6xpszh>!3JpV)#!w)2_3^K%hvB%~}#w zQcheA7<3qRE(#R;4EqHA8Yy-Ov`c`Ub~BorFPr2j^JUlap!eQSIJ*?~e&JRp&bw+wSqlx0xDj>aKOEs*MpHO^))Ruc%N5Gl%6z zj%m0VHBuToam8y9E_|E}((n^a^^_6I&nSk0hN^>!#pWrc4q8M<2AgNLpSD2cFwb>V z8|+}l5CdIoA_H4!p6gjUqC-k1mJTBCd9F_=b3m1tHUnRRnvNajj$6dVhM~&kd;#3= zV@oHe479tTSjZP1fuK-%_y|lZ4>y2f+Xm{RqXu^5a%nSPs(|~Ay>6@d02p|Tz%J6p zWCK)3l$b~^7Q>EEJyt1>rn{ep@dt-&5KT2kggIN~LZRUEvMb7gGny{)kzGl?4rP&- z_@4{|TmB3a^K(0OSl#dJ9QJG5d;9y1!(OMkA^9=F{N(fc(@^@tK@%P8Cx)SI=o z(yIAVxcj9PtMJ)UNsC3xxoD~x6CE8|s)?AN?!j8;P}!}uyS}ikz}i)NrBmIjKU)as zCx*yD)(Oo7hx$8r8QyfiP~95z7@};T=q9!XJ4Bn9$a2_V7dv~CqrufxA85on#>2e* z$``&hi9RvVWBPf}AbdRR*Wfc-;d%Z~E_IQ9=_SuI%o8rw?|R~S_Kqi=uYVD-4{_l} z%#&uHaBDyDUiu<)&l6?|PRM#MUh`i2Jh>ha@hpD(ib-ejJO5XkWfIw#C(g3mXSaUC TZ}1r?|8cYOp3Bi&!B+kS6hB}a literal 0 HcmV?d00001 diff --git a/__pycache__/diff.cpython-313.pyc b/__pycache__/diff.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..aeb9c534bc98f56d96c36c000da62d5d5170d047 GIT binary patch literal 2073 zcma)7Pi)&{6#r}|juSg+o9s{5ZmnAiTM8YFE>%0ND_J*M#F|4Q(@3kRve<5;HFmL` zP3r;Wz#)|oErY6^#314n4oDN?kQ+U6Oh~CBZ-J*!3HR z#nOa>#)5{2f~WB{OdIG8q!8De=K}B~$OJV;kw_u0?|VUU7l2zvKN84@W2sOA6++Vx zui{EZ1y{la0i)CK%y0;?h{qAaFXAxrNFWAKA9@X)aacuQl1U!*0@gkqr+Sf0gxxU{ zt+;cBJTw|6O^+hoUdkwv$w*3C6*J1h3LDjojBfC;R8|U8rb}Ix6q8Epw@lxaOxlP` zDN)kGZj+HSOOgU0lMqcpHfhZ|F)2;Gt>bWcJj}C`7v=ppQwkmFdgeyHkv#@+{`~Ya zFRXXG5oi0S?gi>kPESxj_@V(%x>+aCHtVZ^b_2bR>j=5Ehk)WZvX&8X_Rp*N4-4Xd zS-6tthj8`X$w^Dnum`V-M{vWvfJ>?{0vX3yZW#$eU;=P&hpBz@Me=g0alH~6S1+FFwm;@5JxeeHeq`6hoYquk`viz#J65_wtGz|N#6 zHA&1Exs%xTi?!d&YrL�j~J4wa(9~+Rfa{HsePYqpjn0BW2|D7Aw;uEQ={a5_7y| zvT>Jp6g*b9qFt9I-3jH}bslecc`3Ef0)qxb>5^f(Zc2A_lTIx|GZMqB$*4=>JkX|0 z20EC%^4PDd+lQ8cb!e?Tnw>z?ox&M2>9m^7stQnA-Olu2Lg-vpN5l)Ag}1O;m_2E@ z5@tY@3@L5QJFc_=P-37i!@m=HQ=mOR^=n&jeROmD)A7$HYHcs&rysTK9|w+A1AY0a zI(MYX_2#eE{YR^QKL7TU;E4iVXWG^RtAVZF(y%r!>jG}ATE@TH6Yz6Olb@V>)T)z;Txpkn`&2yxtLYO+>&%j!=Fst at@R*GdlW$s50UQ=7ftvZ?Y%_EviTcJVbnzc literal 0 HcmV?d00001 diff --git a/__pycache__/downloader.cpython-313.pyc b/__pycache__/downloader.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..09d984d31a5a28b1cf0b6cec82089778e606a87d GIT binary patch literal 2197 zcmd^A-Af!-6uL*sjs&y4YPek*svtxU)KR-8FY+ zs|~aS+CmHkQ!sQ3()gG@^>2vr53o2(We^Ij5c*bXLGYzLcXqBruq1u&4BYd(_ndpa zIOp6$MUfGFU;O>~!p9s!zmX07SQi{#0q_u|P>PvAxK0_&z@D98$2rW6^OzqOurTh! zzHt$YDoSx}2>Vk!tkOIa5q>AC5)r``Z(f_qrjv0yK)}^&@sY&v*CU6Bu!76z@OyCdA<8h6nPLWAtfCAJ0wsCM_h=Ypd~`?elu#NaC3@=7 zK^KC~26~zaP?TZ^T}vz~F=;0G3gvhd^fq}ril|*kn)6dVV8@Gy^DH%@w^;8^<_y)7 zJWG*BAwJJTo@xdU*)*FxiNcf5v`|bffGn5hlIQ8UM`=Dv{3*XrG<*8~OMY(f)avP6 zoU-BNM)0gWpPP9ULVf6>>vA4$6j&8?A{G9`k3(E1DljjQ$iGx#;-wPPb%}^CQ8@>G z{PAf0OEp$rE)~nUyk44Dp$P^C25ffuR;3yEzixb`rgEQXdBRY0hH5jo(&#cqR!p3m zGl3aVY+(uOCDZm}?RG^oOvC2pH4~F|u$ypBH?(Y_jI)NBGb@J8mzT5>CiQO%%UCxx zTbwJGObrx>cl4!#UWAjI`qEVZSRi6N)^hoXfXQ&MnS#ySv84rbv6yva0}gam803OV zKKtzt-knVw8a8H!^NV_EHddaiEP@qdc2qaVDz|2h1xlBH3>%iaXrGsM%8B&5!^H^ zsrT{iJ*9E&`s(%dNlOWD>U$TuA4R{5?q2A<7l-SOT?b0@djIBGOX+zW+6yRaqpPD2 z;#MF;m~R}3NbdVfMuL1Vu`yJ=ydz(#@x4c{g4voBIzEy?K}`uCKT~>YO3$$)UXyx{ zjWG!Ci7yyy;I_`S#G>3*e{(D-Y(-lEKW#Y^>ldD0^aI{*kYne??VudHByIPIXwE&Li_gFGCniph+%m6K*zDH*0`yZ6CCIL7#U&hzo6D#S#Y4O!dF z*rka?l?xK;g-DAIf1m73J&nbn+>7LllMIF^{uWMKz>lSQ49s0*AOnvh*0Px4*iWzo*u=Q>HDN?UhI9}y zxrTMJAxbamIQlU6l`%4z&y_EY4v$Svj+YDhi3*N^TPg`gg0DFRyLVA>D|C2sWu*}+ zii}tce0sB5Lp7i_qyg+;gA@?hL@z3mc4oe#y?ST+x8WCXQE5l=@iDZ$mhDG{M4QWy zjgB+KkqS~Cy7|8t$aSn!zgW1Voh(b^N1KvmTw>ZF=5}T`_&zb&g-95b!u9a@rYQa zZ$?SqZcsPy^~A;#NZ5D^D(Dd%+`F|!50rLw6BW1}KloH+w0iu~v}dTL`L2jqJJuZ0 zxNrIa*D9_>wcz->;?eIb)Dz9d#kN!1v3^j~6((L)Bzw$u{77N+dO*1kQ+O`-BNC4) zTS3F)k=i0F3$kd}7K1J`86cQ=!%RG;h%m~%2vaWhH6hW!$Y80!;dn|zR>O8mIkyrt zU|?Q4Z~J4xbcxp~1us64*<;>p)Fq}aTdj*KHz?<3jk1pbQK0gNpyttYwf7ITcU|cW zGiRPDgKJ9W&umZFcOuN3oIk&o-uu&yQ0<$~y(CF_6t8FZ-MRVW&1XFW3x}SP-&4P( zLgUi%*f-&{wUni|$(ov7RlAqe?w{wDRee4ELAd9{O8VqKu;`k2GPQc)^3sLN;l!12 zPkAN%?SJcPR}*1@3-r6cc!U>tm>F4Ba!X3?^-D=aUh!UN^TAMRSVoVJ><7Ilr-lzn zi`mR@zqHsd!&>i%5-w+1NMxO$q7HaiRl647OFZwds0sUs?G;hw^M-2$GxQwm23PzA bco!((!}vKmu#67;i#}j8g~Yba)eVE?K$Hi z9$5J^{HQ>LQ7`e1`iQTp6V8aE5|Kvz#1Fi;6%je>iTO5}!5Bw_C$Ld)a+!ju~mqJcr}wHg=1S5jsnjgyu!hwYe$hEpastfWDlob6PlG?dAc z6i(WXL6RBM$fnp)?(hS_!++~G%palz=PESrf2hxZ^0B}7-M z!{=3av2#aZ08$qx!!?{sX_6M~ z;m(G4Z9WxaNqg+U@w%whF)z+?TFCP2e)p_2^YRMZsa}_QLC?I_!j_PTxM%9pu8*=V zCmLNjL#H7@wT9Zx@sANoG`k30*xjjs$B}9jp6+n~FpYnJ2?9p^15BOHPwsPlsIAkt zroWl~p=9j>Idb|J0h+@7ol)WMq8rF7nnrCQDIz4^VTdiEuDZ%~K2J+(bq^c;E-?Y=tH zSjMBGXW{lL8saA2@8h3E8Sd$riSBJgtaYN$1WrBtT%MW ztkwxDX3800#%ntyH&2C&1ZOB(^bd@v8^W+j0_4%~n;4~FZ zVB0CMoc+&7e`A+aZR1UxRt;N?MW}b4EJ64Vcm;B0U;t)cNz0wCQVp&Q$A(CNqq|>=U_tp=k&8vr;-C>V^rD} zmrR-Rkav3Vv(735U@(VTg;|IN^`)xeqGDkuL^3mICl{)51Gh6d6BCxfHVd2jA9RWb26VQ%wfy1XNGdlaA9`FzKAh!+vyUYe~SjP+j)kQV4fTUt1NW(7x3y= z(T3nJ%kB4sjtzhC-MNys{K{Q__sZpsP+RHT&0r-IFA9%&Bp-d~LxKA1z1Mon&GCxT zwZg4<%1ZaulMj^O)syR?$kpMWLXjfBULUy;SPGP;D)sF}?}k!WEWC59#J$u1eW39{ zq@@&Idi6%%Qs2!;C33JR-R~VNb*{C(u-f{<%7HRlU%3;#bNTC*wUG;}BNxgBF2g*M zDTeOHj=VQr%G`|}Tp>mIerO-Fr!2uA-V2dvoyHeu#&y&@Bi}ojd0_Q*7sXWL$|fhUcdGFYPc5;lj^RAu7&Ql{Pmu6 z0hl$ZWmRe^HQtl{jayjtKM`r1bvi#z=!r{l`96Z5y{_fu%p8)FkxBmX$ z>=t;$e*{l=^Z%?LkcPbczq;iikNA~W9*T%Dh4QoZQNrqp9DoV(E^lRW+3hl8HBv1d z3f(4W+S|3n%BY$ix6l@li1ffjr9aQ*(*+avlfMAVI)%-?4i9;b<9DqP|ts Q_iuD`OX%d{^(R@0O?h}7mo!gZC%RI815j5k{EnwfPV z94e&tfGQ+7q?cZrLl3FcLyqLsO8f&LgNnA$MvbMar&2kGsy(zbYcF=Hj^&y6{ocO$ zy{QKRUO?9FKc5s|IRJP?8Ee^^MExNlz5p4JVbTE6)0SpZHe^fLk)5Z!J}8SIUjtChtglQeNaWX=mD(@*}@VyV8MF2kIa+vlg5HYh1=12B>qDiF$sci5H_b zBO+<*s-lUKuql@XQI!O^p(8P`3q`%87=cUmhJrVujNxB}Dnz0Vg*<*l{5b@PFQElp z6C^pW8yynVA@R=_MRgTo;=_q3Uz8s~K~xl>f)pwXVMuaGGR|vA5=szbaTOA|rzqm0 zP>>avZ}!GKNaKK={OcDCZPWny^ zIF`FDyVjY#%>E7$s`lI!OP6iSwd8B7*V$IXRm-~7r#Ea}E!!+ub7Yw{cP?!CEcaVf z>l_9dPs?ZF2!J7w>#_7MAQD<-+p2MA*$yyu`4v1EBrsRm8Fn$=I15Yx+_VpZ0u%Kv zZu&%|%UH)T0=3E@iTlfEFMi+OWD(TO+{jg2K!KxRYYNq+*IwE z-N?hTE^Dg6>T(Gh4y-^}jyeo~zJd_>xJJlJ&Wz$}j<(@y&aJ^fOl#ulGUCa^N)|(e zS7xM=tghVA@|6+^g;(Ze{ch#K3NAuOIP*K9plDh-R(@>w+IU2<7(g3_WW7j__Kv-Ut)W{7VJL?j+}8<4&SdWZz#6&+fT?>JFa{B=$f2?*5DW->+A1E*%Xl?aiHq zMxR|h2~E~Qy@%nswuxfb{>W%pAS9nKJ1GB;J<$4zy9}yohbgobTmG_J)P_Z2i@Le+FnJC} b&cU^F(D#xHZ3RxbuB||w9cFsaFtPp@sgtdd literal 0 HcmV?d00001 diff --git a/__pycache__/utils.cpython-313.pyc b/__pycache__/utils.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e5df640f62f5b2e207307b19eff8d31968ab68fd GIT binary patch literal 1127 zcmah{&1(}u6rb7MY?7{NX~{v`rj4bF5viq+LPag9wF*KGS#yxahD~<1S;-eOyI3#M zV_U)WQt{RzUOW`~FZds%q|h=H1wDBSMyPnvH~E6bL*KG*-<$Wbzj^Q12n6~Ni23(* z?j=U(yKp)pwT;G2Fm{lRbo>O-))!N(OT&mtx<{9X(F;svT>)QN#S!&~D2!*JUspU} z$?!Jfpmxx5H<0N3rF(+V1pG;tk|-)FKoX+5N1|=dj-7ZZ~NQz-H-3;yKnbX z?#x1TI^`}soL+j)A%#nnaGALo?qLqK%Mr{KhpahFfDH575ALLrj8K+}nFYI;S}bMV z0x3Ey^~iP}yO|Wr5dxm;*m*Wpe#Hl~d14gP26b%aq>Dy@P%%gWn2m#4N2gk#GP^Zf z9hy3j4i;3a0f4Q3`&XFx(4+_J|D13G)vMVlD6{3DjkGV6<#&~Y8`zEj%-A$%C)^9D5}BP zIUpjl3FJ2z7jLSwhaz6?ZYc?XqwT#)s3;|+gcq9iNVif>;)FonBS|^hW>8pg4xUcW zFoFsJyhl16C(%`Ot2;mwx>!;ejiWF;7ggSq$B~62QXCkQiKyJ0GgvNfXSm98#!PgU zE2T0ia)lDZaX4To@lj;xvXDq&7U;XcdG6Tt|sidsTBwz!;eEv5k8Fi2}pei2QJ zWI6@`F#%?P0_p^Um6ffPS|GH(bm|Xm+AOOGC%n$e}h;(?V4xbmoOqDElw)hl$aBn23$apGW&*bKVGk8u3uEJsK2B z5V=X3CN3&i)1*lYw(oACWV(5Bp9-GCY!MTbt;-nWuW0C}g7E_WrDA-&p?dIGv+^H1 CAlTRd literal 0 HcmV?d00001 diff --git a/archive.py b/archive.py new file mode 100644 index 0000000..f63ff4c --- /dev/null +++ b/archive.py @@ -0,0 +1,21 @@ +import requests, time, random + +from config import ARCHIVE_URLS, USER_AGENT + +def archive_url(url): + print(f"🌐 Archiving {url} ...") + headers = {"User-Agent": USER_AGENT} + try: + resp = requests.get(f"https://web.archive.org/save/{url}", headers=headers, timeout=30) + if resp.status_code == 200: + print(f"✅ Archived {url}") + else: + print(f"âš ī¸ Failed to archive {url}, status code {resp.status_code}") + except Exception as e: + print(f"âš ī¸ Exception archiving {url}: {e}") + +def archive_all_urls(): + for url in ARCHIVE_URLS: + delay = 10 + random.uniform(-3, 3) + time.sleep(delay) + archive_url(url) diff --git a/config.py b/config.py new file mode 100644 index 0000000..9e8dbb5 --- /dev/null +++ b/config.py @@ -0,0 +1,33 @@ +import os + +ZIP_URL = "https://docs.google.com/spreadsheets/d/1S6WwM05O277npQbaiNk-jZlXK3TdooSyWtqaWUvAI78/export?format=zip" +XLSX_URL = "https://docs.google.com/spreadsheets/d/1S6WwM05O277npQbaiNk-jZlXK3TdooSyWtqaWUvAI78/export?format=xlsx" + +ZIP_FILENAME = "Trackerhub.zip" +HTML_FILENAME = "Artists.html" +CSV_FILENAME = "artists.csv" +XLSX_FILENAME = "artists.xlsx" + +exclude_names = { + "AI Models", + "Lawson", + "BPM Tracker", + "Worst Comps & Edits", + "Allegations", + "Rap Disses Timeline", + "Underground Artists", +} + +USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0 Safari/537.36" + +BASE_URL = "http://localhost:5000" + +ARCHIVE_URLS = [ + f"{BASE_URL}/", + f"{BASE_URL}/index.html/", + f"{BASE_URL}/artists.html", + f"{BASE_URL}/artists.csv", + f"{BASE_URL}/artists.xlsx", +] + +DISCORD_WEBHOOK_URL = os.getenv("DISCORD_WEBHOOK_URL") diff --git a/diff.py b/diff.py new file mode 100644 index 0000000..ceb2937 --- /dev/null +++ b/diff.py @@ -0,0 +1,42 @@ +import csv + +def read_csv_to_dict(filename): + d = {} + with open(filename, newline='', encoding='utf-8') as f: + reader = csv.DictReader(f) + for row in reader: + d[row["Artist Name"]] = row + return d + +def detect_changes(old_data, new_data): + changes = [] + + old_keys = set(old_data.keys()) + new_keys = set(new_data.keys()) + + removed = old_keys - new_keys + added = new_keys - old_keys + common = old_keys & new_keys + + for artist in removed: + changes.append(f"❌ Removed: **{artist}**") + + for artist in added: + changes.append(f"➕ Added: **{artist}**") + + for artist in common: + old_row = old_data[artist] + new_row = new_data[artist] + + if old_row["URL"] != new_row["URL"]: + changes.append(f"🔗 Link changed for **{artist}**") + if old_row["Credit"] != new_row["Credit"]: + changes.append(f"âœī¸ Credit changed for **{artist}**") + if old_row["Links Work"] != new_row["Links Work"]: + changes.append(f"🔄 Links Work status changed for **{artist}**") + if old_row["Updated"] != new_row["Updated"]: + changes.append(f"🕒 Updated date changed for **{artist}**") + if old_row["Best"] != new_row["Best"]: + changes.append(f"⭐ Best flag changed for **{artist}**") + + return changes diff --git a/downloader.py b/downloader.py new file mode 100644 index 0000000..d6c69fd --- /dev/null +++ b/downloader.py @@ -0,0 +1,26 @@ +import requests, zipfile + +from config import ZIP_URL, ZIP_FILENAME, HTML_FILENAME, XLSX_URL, XLSX_FILENAME + +def download_zip_and_extract_html(): + print("🔄 Downloading ZIP...") + r = requests.get(ZIP_URL) + r.raise_for_status() + with open(ZIP_FILENAME, "wb") as f: + f.write(r.content) + print(f"✅ Saved ZIP as {ZIP_FILENAME}") + + with zipfile.ZipFile(ZIP_FILENAME, "r") as z: + with z.open(HTML_FILENAME) as html_file: + html_content = html_file.read() + with open(HTML_FILENAME, "wb") as f: + f.write(html_content) + print(f"✅ Extracted {HTML_FILENAME}") + +def download_xlsx(): + print("🔄 Downloading XLSX...") + r = requests.get(XLSX_URL) + r.raise_for_status() + with open(XLSX_FILENAME, "wb") as f: + f.write(r.content) + print(f"✅ Saved XLSX as {XLSX_FILENAME}") diff --git a/main.py b/main.py index 73d9d3c..0d10b71 100644 --- a/main.py +++ b/main.py @@ -1,254 +1,13 @@ -import requests -import zipfile -import threading -import time -import random -import hashlib -from bs4 import BeautifulSoup -import csv -import re -from flask import Flask, send_file, send_from_directory, abort -import os -import json +from flask import Flask, send_file, send_from_directory from flask_cors import CORS +import threading + +from config import HTML_FILENAME, CSV_FILENAME, XLSX_FILENAME +from update_loop import update_loop app = Flask(__name__) CORS(app) -ZIP_URL = "https://docs.google.com/spreadsheets/d/1S6WwM05O277npQbaiNk-jZlXK3TdooSyWtqaWUvAI78/export?format=zip" -XLSX_URL = "https://docs.google.com/spreadsheets/d/1S6WwM05O277npQbaiNk-jZlXK3TdooSyWtqaWUvAI78/export?format=xlsx" - -ZIP_FILENAME = "Trackerhub.zip" -HTML_FILENAME = "Artists.html" -CSV_FILENAME = "artists.csv" -XLSX_FILENAME = "artists.xlsx" - -exclude_names = { - "AI Models", - "Lawson", - "BPM Tracker", - "Worst Comps & Edits", - "Allegations", - "Rap Disses Timeline", - "Underground Artists", -} - -USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0 Safari/537.36" - -# URLs to archive on changes — update these to your actual hosted domain -BASE_URL = "http://localhost:5000" # Change this to your public domain when deployed - -ARCHIVE_URLS = [ - f"{BASE_URL}/", - f"{BASE_URL}/index.html/", - f"{BASE_URL}/artists.html", - f"{BASE_URL}/artists.csv", - f"{BASE_URL}/artists.xlsx", -] - -DISCORD_WEBHOOK_URL = os.getenv("DISCORD_WEBHOOK_URL") - -def clean_artist_name(text): - return re.sub(r'[⭐🤖🎭\u2B50\uFE0F]', '', text).strip() - -def force_star_flag(starred=True): - return "Yes" if starred else "No" - -def download_zip_and_extract_html(): - print("🔄 Downloading ZIP...") - r = requests.get(ZIP_URL) - r.raise_for_status() - with open(ZIP_FILENAME, "wb") as f: - f.write(r.content) - print(f"✅ Saved ZIP as {ZIP_FILENAME}") - - with zipfile.ZipFile(ZIP_FILENAME, "r") as z: - with z.open(HTML_FILENAME) as html_file: - html_content = html_file.read() - with open(HTML_FILENAME, "wb") as f: - f.write(html_content) - print(f"✅ Extracted {HTML_FILENAME}") - -def download_xlsx(): - print("🔄 Downloading XLSX...") - r = requests.get(XLSX_URL) - r.raise_for_status() - with open(XLSX_FILENAME, "wb") as f: - f.write(r.content) - print(f"✅ Saved XLSX as {XLSX_FILENAME}") - -def generate_csv(): - print("📝 Generating CSV...") - with open(HTML_FILENAME, "r", encoding="utf-8") as f: - soup = BeautifulSoup(f, "html.parser") - - rows = soup.select("table.waffle tbody tr")[3:] # skip headers and Discord - - data = [] - starring = True - - for row in rows: - cells = row.find_all("td") - if len(cells) < 4: - continue - - link_tag = cells[0].find("a") - artist_name_raw = link_tag.get_text(strip=True) if link_tag else cells[0].get_text(strip=True) - artist_url = link_tag["href"] if link_tag else "" - if not artist_url: - continue - - if "AI Models" in artist_name_raw: - starring = False - - artist_name_clean = clean_artist_name(artist_name_raw) - if artist_name_clean in exclude_names: - continue - - if "🚩" in artist_name_raw: - continue - - best = force_star_flag(starring) - credit = cells[1].get_text(strip=True) - updated = cells[2].get_text(strip=True) - links_work = cells[3].get_text(strip=True) - - data.append([artist_name_clean, artist_url, credit, links_work, updated, best]) - - with open(CSV_FILENAME, "w", newline='', encoding="utf-8") as csvfile: - writer = csv.writer(csvfile, quoting=csv.QUOTE_ALL) - writer.writerow(["Artist Name", "URL", "Credit", "Links Work", "Updated", "Best"]) - writer.writerows(data) - - - print(f"✅ CSV saved as {CSV_FILENAME}") - -def hash_file(filename): - hasher = hashlib.sha256() - with open(filename, "rb") as f: - buf = f.read() - hasher.update(buf) - return hasher.hexdigest() - -def archive_url(url): - print(f"🌐 Archiving {url} ...") - headers = {"User-Agent": USER_AGENT} - try: - resp = requests.get(f"https://web.archive.org/save/{url}", headers=headers, timeout=30) - if resp.status_code == 200: - print(f"✅ Archived {url}") - else: - print(f"âš ī¸ Failed to archive {url}, status code {resp.status_code}") - except Exception as e: - print(f"âš ī¸ Exception archiving {url}: {e}") - -def archive_all_urls(): - for url in ARCHIVE_URLS: - delay = 10 + random.uniform(-3, 3) - time.sleep(delay) - archive_url(url) - -def read_csv_to_dict(filename): - """Read CSV into dict with artist_name as key, storing relevant fields.""" - d = {} - with open(filename, newline='', encoding='utf-8') as f: - reader = csv.DictReader(f) - for row in reader: - d[row["Artist Name"]] = row - return d - -def detect_changes(old_data, new_data): - """ - Compare old and new data dictionaries. - Returns a list of strings describing changes. - """ - changes = [] - - old_keys = set(old_data.keys()) - new_keys = set(new_data.keys()) - - removed = old_keys - new_keys - added = new_keys - old_keys - common = old_keys & new_keys - - for artist in removed: - changes.append(f"❌ Removed: **{artist}**") - - for artist in added: - changes.append(f"➕ Added: **{artist}**") - - for artist in common: - old_row = old_data[artist] - new_row = new_data[artist] - # Check if URL changed - if old_row["URL"] != new_row["URL"]: - changes.append(f"🔗 Link changed for **{artist}**") - # Check other fields if needed (Credit, Updated, etc.) - if old_row["Credit"] != new_row["Credit"]: - changes.append(f"âœī¸ Credit changed for **{artist}**") - if old_row["Links Work"] != new_row["Links Work"]: - changes.append(f"🔄 Links Work status changed for **{artist}**") - if old_row["Updated"] != new_row["Updated"]: - changes.append(f"🕒 Updated date changed for **{artist}**") - if old_row["Best"] != new_row["Best"]: - changes.append(f"⭐ Best flag changed for **{artist}**") - - return changes - -def send_discord_message(content): - if not DISCORD_WEBHOOK_URL: - print("âš ī¸ Discord webhook URL not set in env") - return - - headers = {"Content-Type": "application/json"} - data = {"content": content} - - try: - resp = requests.post(DISCORD_WEBHOOK_URL, headers=headers, data=json.dumps(data), timeout=10) - if resp.status_code in (200, 204): - print("✅ Discord notification sent") - else: - print(f"âš ī¸ Failed to send Discord notification, status code {resp.status_code}") - except Exception as e: - print(f"âš ī¸ Exception sending Discord notification: {e}") - -def update_loop(): - last_csv_hash = None - last_csv_data = {} - - while True: - try: - download_zip_and_extract_html() - download_xlsx() - generate_csv() - - current_hash = hash_file(CSV_FILENAME) - current_data = read_csv_to_dict(CSV_FILENAME) - - if last_csv_hash is None: - print("â„šī¸ Initial CSV hash stored.") - elif current_hash != last_csv_hash: - print("🔔 CSV has changed! Archiving URLs...") - - changes = detect_changes(last_csv_data, current_data) - if changes: - message = "**CSV Update Detected:**\n" + "\n".join(changes) - send_discord_message(message) - else: - print("â„šī¸ No detectable content changes found.") - - archive_all_urls() - else: - print("â„šī¸ CSV unchanged. No archiving needed.") - - last_csv_hash = current_hash - last_csv_data = current_data - - except Exception as e: - print(f"âš ī¸ Error updating files: {e}") - - time.sleep(600) # 10 minutes - @app.route("/artists.html") def serve_artists_html(): return send_file(HTML_FILENAME, mimetype="text/html") @@ -261,26 +20,25 @@ def serve_artists_csv(): def serve_artists_xlsx(): return send_file(XLSX_FILENAME, mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet") -# Serve index.html at "/", "/index", and "/index.html" @app.route("/") @app.route("/index") @app.route("/index.html") def serve_index(): return send_file("templates/index.html", mimetype="text/html") -# Serve static files from templates/_next/ as /_next/... @app.route("/_next/") def serve_next_static(filename): return send_from_directory("templates/_next", filename) -# Custom 404 error page @app.errorhandler(404) def page_not_found(e): return send_file("templates/404.html", mimetype="text/html"), 404 - if __name__ == "__main__": threading.Thread(target=update_loop, daemon=True).start() + from downloader import download_zip_and_extract_html, download_xlsx + from parser import generate_csv + try: download_zip_and_extract_html() download_xlsx() diff --git a/notify.py b/notify.py new file mode 100644 index 0000000..550fcc9 --- /dev/null +++ b/notify.py @@ -0,0 +1,20 @@ +import os, json, requests + +from config import DISCORD_WEBHOOK_URL + +def send_discord_message(content): + if not DISCORD_WEBHOOK_URL: + print("âš ī¸ Discord webhook URL not set in env") + return + + headers = {"Content-Type": "application/json"} + data = {"content": content} + + try: + resp = requests.post(DISCORD_WEBHOOK_URL, headers=headers, data=json.dumps(data), timeout=10) + if resp.status_code in (200, 204): + print("✅ Discord notification sent") + else: + print(f"âš ī¸ Failed to send Discord notification, status code {resp.status_code}") + except Exception as e: + print(f"âš ī¸ Exception sending Discord notification: {e}") diff --git a/parser.py b/parser.py new file mode 100644 index 0000000..c848eb5 --- /dev/null +++ b/parser.py @@ -0,0 +1,47 @@ +from bs4 import BeautifulSoup +import csv + +from config import HTML_FILENAME, CSV_FILENAME, exclude_names +from utils import clean_artist_name, force_star_flag + +def generate_csv(): + print("📝 Generating CSV...") + with open(HTML_FILENAME, "r", encoding="utf-8") as f: + soup = BeautifulSoup(f, "html.parser") + + rows = soup.select("table.waffle tbody tr")[3:] + + data = [] + starring = True + + for row in rows: + cells = row.find_all("td") + if len(cells) < 4: + continue + + link_tag = cells[0].find("a") + artist_name_raw = link_tag.get_text(strip=True) if link_tag else cells[0].get_text(strip=True) + artist_url = link_tag["href"] if link_tag else "" + if not artist_url: + continue + + if "AI Models" in artist_name_raw: + starring = False + + artist_name_clean = clean_artist_name(artist_name_raw) + if artist_name_clean in exclude_names or "🚩" in artist_name_raw: + continue + + best = force_star_flag(starring) + credit = cells[1].get_text(strip=True) + updated = cells[2].get_text(strip=True) + links_work = cells[3].get_text(strip=True) + + data.append([artist_name_clean, artist_url, credit, links_work, updated, best]) + + with open(CSV_FILENAME, "w", newline='', encoding="utf-8") as csvfile: + writer = csv.writer(csvfile, quoting=csv.QUOTE_ALL) + writer.writerow(["Artist Name", "URL", "Credit", "Links Work", "Updated", "Best"]) + writer.writerows(data) + + print(f"✅ CSV saved as {CSV_FILENAME}") diff --git a/update_loop.py b/update_loop.py new file mode 100644 index 0000000..a2cfe2f --- /dev/null +++ b/update_loop.py @@ -0,0 +1,47 @@ +import time + +from downloader import download_zip_and_extract_html, download_xlsx +from parser import generate_csv +from diff import read_csv_to_dict, detect_changes +from archive import archive_all_urls +from notify import send_discord_message +from utils import hash_file + +last_csv_hash = None +last_csv_data = {} + +def update_loop(): + global last_csv_hash, last_csv_data + + while True: + try: + download_zip_and_extract_html() + download_xlsx() + generate_csv() + + current_hash = hash_file("artists.csv") + current_data = read_csv_to_dict("artists.csv") + + if last_csv_hash is None: + print("â„šī¸ Initial CSV hash stored.") + elif current_hash != last_csv_hash: + print("🔔 CSV has changed! Archiving URLs...") + + changes = detect_changes(last_csv_data, current_data) + if changes: + message = "**CSV Update Detected:**\n" + "\n".join(changes) + send_discord_message(message) + else: + print("â„šī¸ No detectable content changes found.") + + archive_all_urls() + else: + print("â„šī¸ CSV unchanged. No archiving needed.") + + last_csv_hash = current_hash + last_csv_data = current_data + + except Exception as e: + print(f"âš ī¸ Error updating files: {e}") + + time.sleep(600) diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..8294f2f --- /dev/null +++ b/utils.py @@ -0,0 +1,14 @@ +import re, hashlib + +def clean_artist_name(text): + return re.sub(r'[⭐🤖🎭\u2B50\uFE0F]', '', text).strip() + +def force_star_flag(starred=True): + return "Yes" if starred else "No" + +def hash_file(filename): + hasher = hashlib.sha256() + with open(filename, "rb") as f: + buf = f.read() + hasher.update(buf) + return hasher.hexdigest()