Make deezify not pick up garbage albums

This commit is contained in:
Soph :3 2025-12-31 16:22:38 +02:00
parent a2cc4c7793
commit 6ca31b2916

View file

@ -1,14 +1,12 @@
import fs from "fs";
import path from "path";
import crypto from "crypto"
import crypto from "crypto";
const CACHE_DIR = "./deezer_cache";
fs.mkdirSync(CACHE_DIR, { recursive: true });
const artists = [
];
const album_links = [
]
const artists = [];
const album_links = [];
const BAD_KEYWORDS = [
" remix",
"remix ",
@ -22,7 +20,8 @@ const BAD_KEYWORDS = [
"anniversary",
"(remixes)",
"(remix)",
"(acustic)"
"(acustic)",
"mixes",
];
function normalize(str) {
@ -31,8 +30,9 @@ function normalize(str) {
function hasBadKeyword(title) {
const t = normalize(title);
return BAD_KEYWORDS.some(k => t.includes(k));
return BAD_KEYWORDS.some((k) => t.includes(k));
}
function cachePath(url) {
const hash = crypto.createHash("sha1").update(url).digest("hex");
return path.join(CACHE_DIR, `${hash}.json`);
@ -53,13 +53,9 @@ async function cachedFetch(url) {
return json;
}
async function getArtistId(name) {
const q = encodeURIComponent(name);
const data = await cachedFetch(
`https://api.deezer.com/search/artist?q=${q}`
);
const data = await cachedFetch(`https://api.deezer.com/search/artist?q=${q}`);
return data?.data?.[0]?.id ?? null;
}
@ -75,63 +71,84 @@ async function getAllAlbums(artistId) {
return out;
}
async function getAlbum(albumId) {
const url = `https://api.deezer.com/album/${albumId}`;
return await cachedFetch(url);
}
function selectBestAlbumVersions(albums) {
const byTitle = new Map();
for (const a of albums) {
if (hasBadKeyword(a.title)) continue;
async function selectUniqueReleases(albums) {
const seenTracks = new Set();
const finalReleases = [];
const key = normalize(a.title.replace(/\s*\(.*?\)\s*/g, ""));
const albumsOnly = albums.filter(
(a) => a.record_type === "album" && !hasBadKeyword(a.title),
);
const singlesEPs = albums.filter(
(a) =>
(a.record_type === "single" || a.record_type === "ep") &&
!hasBadKeyword(a.title),
);
if (!byTitle.has(key)) {
byTitle.set(key, a);
continue;
for (const a of albumsOnly) {
const fullAlbum = await getAlbum(a.id);
const tracks = (fullAlbum.tracks?.data ?? []).map((t) =>
normalize(t.title),
);
if (tracks.every((t) => seenTracks.has(t))) continue;
tracks.forEach((t) => seenTracks.add(t));
finalReleases.push(fullAlbum);
}
const existing = byTitle.get(key);
for (const s of singlesEPs) {
const fullSingle = await getAlbum(s.id);
const tracks = (fullSingle.tracks?.data ?? []).map((t) =>
normalize(t.title),
);
// Prefer explicit
if (!existing.explicit_lyrics && a.explicit_lyrics) {
byTitle.set(key, a);
}
if (tracks.every((t) => seenTracks.has(t))) continue;
tracks.forEach((t) => seenTracks.add(t));
finalReleases.push(fullSingle);
}
return [...byTitle.values()];
return finalReleases;
}
(async () => {
const links = [];
const displays = []
const album_links_raw = await Promise.all(album_links.map(async z => await getAlbum(z.replace(/[^\d]*(\d*)$/gm, "$1"))));
const displays = [];
const album_links_raw = await Promise.all(
album_links.map(
async (z) => await getAlbum(z.replace(/[^\d]*(\d*)$/gm, "$1")),
),
);
for (const artist of artists) {
console.log(`\n=== ${artist} ===`);
const artistId = typeof artist == "number" ? artist : await getArtistId(artist);
const artistId =
typeof artist === "number" ? artist : await getArtistId(artist);
if (!artistId) {
console.log("Artist not found");
continue;
}
const albums = (await getAllAlbums(artistId));
const albums = await getAllAlbums(artistId);
const cleanAlbums = await selectUniqueReleases(albums);
const cleanAlbums = selectBestAlbumVersions(albums);
let artistLines = [];
let extraCount = 0;
for (const a of cleanAlbums) {
console.log(
`${a.title} | ${a.explicit_lyrics ? "EXPLICIT" : "CLEAN"} | https://www.deezer.com/en/album/${a.id}`
`${a.title} | ${a.explicit_lyrics ? "EXPLICIT" : "CLEAN"} | https://www.deezer.com/en/album/${a.id}`,
);
links.push(`https://www.deezer.com/en/album/${a.id}`);
if (artistLines.length < 100) {
artistLines.push(
`+ ${typeof artist == "number" ? `No name (${artist})` : artist} - ${a.title} ${a.explicit_lyrics ? "[E]" : ""}`
`+ ${typeof artist === "number" ? `No name (${artist})` : artist} - ${a.title} ${a.explicit_lyrics ? "[E]" : ""}`,
);
} else {
extraCount++;
@ -144,16 +161,24 @@ function selectBestAlbumVersions(albums) {
displays.push(...artistLines);
}
album_links_raw.forEach(z => {
album_links_raw.forEach((z) => {
links.push(`https://www.deezer.com/en/album/${z.id}`);
displays.push(
`+ ${z.artist.name} - ${z.title} ${z.explicit_lyrics ? "[E]" : ""}`
`+ ${z.artist.name} - ${z.title} ${z.explicit_lyrics ? "[E]" : ""}`,
);
})
fs.writeFileSync("orpheus_links.txt", links.join(" "))
fs.writeFileSync("discord_send.txt", displays.join(" \n").match(/.{1,2000}$/gms).map(z => z.trim()).join("\n\n"+'-'.repeat(15)+"\n\n"))
});
})().catch(err => {
fs.writeFileSync("orpheus_links.txt", links.join(" "));
fs.writeFileSync(
"discord_send.txt",
displays
.join(" \n")
.match(/.{1,2000}$/gms)
.map((z) => z.trim())
.join("\n\n" + "-".repeat(15) + "\n\n"),
);
})().catch((err) => {
console.error(err);
process.exit(1);
});