nuke/cmd/robots2policy/batch/batch_process.go
fucksophie 896858e027
Some checks failed
Docker image builds / build (push) Waiting to run
Asset Build Verification / asset_verification (push) Has been cancelled
Docs deploy / build (push) Has been cancelled
Go Mod Tidy Check / go_mod_tidy_check (push) Has been cancelled
Go / go_tests (push) Has been cancelled
Package builds (unstable) / package_builds (push) Has been cancelled
Smoke tests / smoke-test (default-config-macro) (push) Has been cancelled
Smoke tests / smoke-test (docker-registry) (push) Has been cancelled
Smoke tests / smoke-test (double_slash) (push) Has been cancelled
Smoke tests / smoke-test (forced-language) (push) Has been cancelled
Smoke tests / smoke-test (git-clone) (push) Has been cancelled
Smoke tests / smoke-test (git-push) (push) Has been cancelled
Smoke tests / smoke-test (healthcheck) (push) Has been cancelled
Smoke tests / smoke-test (i18n) (push) Has been cancelled
Smoke tests / smoke-test (log-file) (push) Has been cancelled
Smoke tests / smoke-test (nginx) (push) Has been cancelled
Smoke tests / smoke-test (palemoon/amd64) (push) Has been cancelled
Smoke tests / smoke-test (robots_txt) (push) Has been cancelled
Check Spelling / Check Spelling (push) Has been cancelled
SSH CI / ssh (aarch64-16k) (push) Has been cancelled
SSH CI / ssh (aarch64-4k) (push) Has been cancelled
SSH CI / ssh (ppc64le) (push) Has been cancelled
SSH CI / ssh (riscv64) (push) Has been cancelled
zizmor / zizmor latest via PyPI (push) Has been cancelled
jane remover
2026-02-07 13:08:47 +02:00

78 lines
1.8 KiB
Go

/*
Batch process robots.txt files from archives like https://github.com/nrjones8/robots-dot-txt-archive-bot/tree/master/data/cleaned
into nuke CEL policies. Usage: go run batch_process.go <directory with robots.txt files>
*/
package main
import (
"fmt"
"io/fs"
"log"
"os"
"os/exec"
"path/filepath"
"strings"
)
func main() {
if len(os.Args) < 2 {
fmt.Println("Usage: go run batch_process.go <cleaned_directory>")
fmt.Println("Example: go run batch_process.go ./cleaned")
os.Exit(1)
}
cleanedDir := os.Args[1]
outputDir := "generated_policies"
// Create output directory
if err := os.MkdirAll(outputDir, 0755); err != nil {
log.Fatalf("Failed to create output directory: %v", err)
}
count := 0
err := filepath.WalkDir(cleanedDir, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
// Skip directories
if d.IsDir() {
return nil
}
// Generate policy name from file path
relPath, _ := filepath.Rel(cleanedDir, path)
policyName := strings.ReplaceAll(relPath, "/", "-")
policyName = strings.TrimSuffix(policyName, "-robots.txt")
policyName = strings.ReplaceAll(policyName, ".", "-")
outputFile := filepath.Join(outputDir, policyName+".yaml")
cmd := exec.Command("go", "run", "main.go",
"-input", path,
"-output", outputFile,
"-name", policyName,
"-format", "yaml")
if err := cmd.Run(); err != nil {
fmt.Printf("Warning: Failed to process %s: %v\n", path, err)
return nil // Continue processing other files
}
count++
if count%100 == 0 {
fmt.Printf("Processed %d files...\n", count)
} else if count%10 == 0 {
fmt.Print(".")
}
return nil
})
if err != nil {
log.Fatalf("Error walking directory: %v", err)
}
fmt.Printf("Successfully processed %d robots.txt files\n", count)
fmt.Printf("Generated policies saved to: %s/\n", outputDir)
}