Some checks failed
Docker image builds / build (push) Waiting to run
Asset Build Verification / asset_verification (push) Has been cancelled
Docs deploy / build (push) Has been cancelled
Go Mod Tidy Check / go_mod_tidy_check (push) Has been cancelled
Go / go_tests (push) Has been cancelled
Package builds (unstable) / package_builds (push) Has been cancelled
Smoke tests / smoke-test (default-config-macro) (push) Has been cancelled
Smoke tests / smoke-test (docker-registry) (push) Has been cancelled
Smoke tests / smoke-test (double_slash) (push) Has been cancelled
Smoke tests / smoke-test (forced-language) (push) Has been cancelled
Smoke tests / smoke-test (git-clone) (push) Has been cancelled
Smoke tests / smoke-test (git-push) (push) Has been cancelled
Smoke tests / smoke-test (healthcheck) (push) Has been cancelled
Smoke tests / smoke-test (i18n) (push) Has been cancelled
Smoke tests / smoke-test (log-file) (push) Has been cancelled
Smoke tests / smoke-test (nginx) (push) Has been cancelled
Smoke tests / smoke-test (palemoon/amd64) (push) Has been cancelled
Smoke tests / smoke-test (robots_txt) (push) Has been cancelled
Check Spelling / Check Spelling (push) Has been cancelled
SSH CI / ssh (aarch64-16k) (push) Has been cancelled
SSH CI / ssh (aarch64-4k) (push) Has been cancelled
SSH CI / ssh (ppc64le) (push) Has been cancelled
SSH CI / ssh (riscv64) (push) Has been cancelled
zizmor / zizmor latest via PyPI (push) Has been cancelled
78 lines
1.8 KiB
Go
78 lines
1.8 KiB
Go
/*
|
|
Batch process robots.txt files from archives like https://github.com/nrjones8/robots-dot-txt-archive-bot/tree/master/data/cleaned
|
|
into nuke CEL policies. Usage: go run batch_process.go <directory with robots.txt files>
|
|
*/
|
|
package main
|
|
|
|
import (
|
|
"fmt"
|
|
"io/fs"
|
|
"log"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"strings"
|
|
)
|
|
|
|
func main() {
|
|
if len(os.Args) < 2 {
|
|
fmt.Println("Usage: go run batch_process.go <cleaned_directory>")
|
|
fmt.Println("Example: go run batch_process.go ./cleaned")
|
|
os.Exit(1)
|
|
}
|
|
|
|
cleanedDir := os.Args[1]
|
|
outputDir := "generated_policies"
|
|
|
|
// Create output directory
|
|
if err := os.MkdirAll(outputDir, 0755); err != nil {
|
|
log.Fatalf("Failed to create output directory: %v", err)
|
|
}
|
|
|
|
count := 0
|
|
err := filepath.WalkDir(cleanedDir, func(path string, d fs.DirEntry, err error) error {
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Skip directories
|
|
if d.IsDir() {
|
|
return nil
|
|
}
|
|
|
|
// Generate policy name from file path
|
|
relPath, _ := filepath.Rel(cleanedDir, path)
|
|
policyName := strings.ReplaceAll(relPath, "/", "-")
|
|
policyName = strings.TrimSuffix(policyName, "-robots.txt")
|
|
policyName = strings.ReplaceAll(policyName, ".", "-")
|
|
|
|
outputFile := filepath.Join(outputDir, policyName+".yaml")
|
|
|
|
cmd := exec.Command("go", "run", "main.go",
|
|
"-input", path,
|
|
"-output", outputFile,
|
|
"-name", policyName,
|
|
"-format", "yaml")
|
|
|
|
if err := cmd.Run(); err != nil {
|
|
fmt.Printf("Warning: Failed to process %s: %v\n", path, err)
|
|
return nil // Continue processing other files
|
|
}
|
|
|
|
count++
|
|
if count%100 == 0 {
|
|
fmt.Printf("Processed %d files...\n", count)
|
|
} else if count%10 == 0 {
|
|
fmt.Print(".")
|
|
}
|
|
|
|
return nil
|
|
})
|
|
|
|
if err != nil {
|
|
log.Fatalf("Error walking directory: %v", err)
|
|
}
|
|
|
|
fmt.Printf("Successfully processed %d robots.txt files\n", count)
|
|
fmt.Printf("Generated policies saved to: %s/\n", outputDir)
|
|
}
|