NUKE
Some checks failed
Docker image builds / build (push) Failing after 4m22s

This commit is contained in:
Soph :3 2026-02-07 14:27:38 +02:00
parent d2205b11a7
commit 02b9aebbe5
341 changed files with 1571 additions and 32574 deletions

View file

@ -1,6 +1,6 @@
/*
Batch process robots.txt files from archives like https://github.com/nrjones8/robots-dot-txt-archive-bot/tree/master/data/cleaned
into Anubis CEL policies. Usage: go run batch_process.go <directory with robots.txt files>
into nuke CEL policies. Usage: go run batch_process.go <directory with robots.txt files>
*/
package main

View file

@ -12,7 +12,7 @@ import (
"regexp"
"strings"
"github.com/TecharoHQ/anubis/lib/config"
"git.sad.ovh/sophie/nuke/lib/config"
"sigs.k8s.io/yaml"
)
@ -36,7 +36,7 @@ type RobotsRule struct {
IsBlacklist bool // true if this is a specifically denied user agent
}
type AnubisRule struct {
type NukeRule struct {
Expression *config.ExpressionOrList `yaml:"expression,omitempty" json:"expression,omitempty"`
Challenge *config.ChallengeRules `yaml:"challenge,omitempty" json:"challenge,omitempty"`
Weight *config.Weight `yaml:"weight,omitempty" json:"weight,omitempty"`
@ -95,11 +95,11 @@ func main() {
log.Fatalf("failed to parse robots.txt: %v", err)
}
// Convert to Anubis rules
anubisRules := convertToAnubisRules(rules)
// Convert to Nuke rules
nukeRules := convertToNukeRules(rules)
// Check if any rules were generated
if len(anubisRules) == 0 {
if len(nukeRules) == 0 {
log.Fatal("no valid rules generated from robots.txt - file may be empty or contain no disallow directives")
}
@ -107,9 +107,9 @@ func main() {
var output []byte
switch strings.ToLower(*outputFormat) {
case "yaml":
output, err = yaml.Marshal(anubisRules)
output, err = yaml.Marshal(nukeRules)
case "json":
output, err = json.MarshalIndent(anubisRules, "", " ")
output, err = json.MarshalIndent(nukeRules, "", " ")
default:
log.Fatalf("unsupported output format: %s (use yaml or json)", *outputFormat)
}
@ -126,7 +126,7 @@ func main() {
if err != nil {
log.Fatalf("failed to write output file: %v", err)
}
fmt.Printf("Generated Anubis policy written to %s\n", *outputFile)
fmt.Printf("Generated Nuke policy written to %s\n", *outputFile)
}
}
@ -227,8 +227,8 @@ func parseIntSafe(s string) (int, error) {
return result, err
}
func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
var anubisRules []AnubisRule
func convertToNukeRules(robotsRules []RobotsRule) []NukeRule {
var nukeRules []NukeRule
ruleCounter := 0
// Process each robots rule individually
@ -238,7 +238,7 @@ func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
// Handle crawl delay
if robotsRule.CrawlDelay > 0 && *crawlDelay > 0 {
ruleCounter++
rule := AnubisRule{
rule := NukeRule{
Name: fmt.Sprintf("%s-crawl-delay-%d", *policyName, ruleCounter),
Action: "WEIGH",
Weight: &config.Weight{Adjust: *crawlDelay},
@ -266,13 +266,13 @@ func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
Any: expressions,
}
}
anubisRules = append(anubisRules, rule)
nukeRules = append(nukeRules, rule)
}
// Handle blacklisted user agents
if robotsRule.IsBlacklist {
ruleCounter++
rule := AnubisRule{
rule := NukeRule{
Name: fmt.Sprintf("%s-blacklist-%d", *policyName, ruleCounter),
Action: *userAgentDeny,
}
@ -306,7 +306,7 @@ func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
Any: expressions,
}
}
anubisRules = append(anubisRules, rule)
nukeRules = append(nukeRules, rule)
}
// Handle specific disallow rules
@ -316,7 +316,7 @@ func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
}
ruleCounter++
rule := AnubisRule{
rule := NukeRule{
Name: fmt.Sprintf("%s-disallow-%d", *policyName, ruleCounter),
Action: *baseAction,
}
@ -338,7 +338,7 @@ func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
continue // Skip wildcard as it's handled separately
}
ruleCounter++
subRule := AnubisRule{
subRule := NukeRule{
Name: fmt.Sprintf("%s-disallow-%d", *policyName, ruleCounter),
Action: *baseAction,
Expression: &config.ExpressionOrList{
@ -348,7 +348,7 @@ func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
},
},
}
anubisRules = append(anubisRules, subRule)
nukeRules = append(nukeRules, subRule)
}
continue
}
@ -361,11 +361,11 @@ func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
All: conditions,
}
anubisRules = append(anubisRules, rule)
nukeRules = append(nukeRules, rule)
}
}
return anubisRules
return nukeRules
}
func buildPathCondition(robotsPath string) string {

View file

@ -136,16 +136,16 @@ func TestDataFileConversion(t *testing.T) {
*userAgentDeny = oldDeniedAction
}()
// Convert to Anubis rules
anubisRules := convertToAnubisRules(rules)
// Convert to Nuke rules
nukeRules := convertToNukeRules(rules)
// Generate output
var actualOutput []byte
switch strings.ToLower(*outputFormat) {
case "yaml":
actualOutput, err = yaml.Marshal(anubisRules)
actualOutput, err = yaml.Marshal(nukeRules)
case "json":
actualOutput, err = json.MarshalIndent(anubisRules, "", " ")
actualOutput, err = json.MarshalIndent(nukeRules, "", " ")
}
if err != nil {
t.Fatalf("Failed to marshal output: %v", err)
@ -249,10 +249,10 @@ Disallow: /admin`
*policyName = "test-policy"
defer func() { *policyName = oldPolicyName }()
anubisRules := convertToAnubisRules(rules)
nukeRules := convertToNukeRules(rules)
// Test YAML output
yamlOutput, err := yaml.Marshal(anubisRules)
yamlOutput, err := yaml.Marshal(nukeRules)
if err != nil {
t.Fatalf("Failed to marshal YAML: %v", err)
}
@ -262,7 +262,7 @@ Disallow: /admin`
}
// Test JSON output
jsonOutput, err := json.MarshalIndent(anubisRules, "", " ")
jsonOutput, err := json.MarshalIndent(nukeRules, "", " ")
if err != nil {
t.Fatalf("Failed to marshal JSON: %v", err)
}
@ -290,14 +290,14 @@ Disallow: /admin`
*baseAction = action
defer func() { *baseAction = oldAction }()
anubisRules := convertToAnubisRules(rules)
nukeRules := convertToNukeRules(rules)
if len(anubisRules) != 1 {
t.Fatalf("Expected 1 rule, got %d", len(anubisRules))
if len(nukeRules) != 1 {
t.Fatalf("Expected 1 rule, got %d", len(nukeRules))
}
if anubisRules[0].Action != action {
t.Errorf("Expected action %s, got %s", action, anubisRules[0].Action)
if nukeRules[0].Action != action {
t.Errorf("Expected action %s, got %s", action, nukeRules[0].Action)
}
})
}
@ -325,10 +325,10 @@ Disallow: /`
*policyName = name
defer func() { *policyName = oldName }()
anubisRules := convertToAnubisRules(rules)
nukeRules := convertToNukeRules(rules)
// Check that all rule names use the custom prefix
for _, rule := range anubisRules {
for _, rule := range nukeRules {
if !strings.HasPrefix(rule.Name, name+"-") {
t.Errorf("Rule name %s doesn't start with expected prefix %s-", rule.Name, name)
}
@ -360,11 +360,11 @@ Crawl-delay: 60`
*crawlDelay = weight
defer func() { *crawlDelay = oldWeight }()
anubisRules := convertToAnubisRules(rules)
nukeRules := convertToNukeRules(rules)
// Count weight rules and verify they have correct weight
weightRules := 0
for _, rule := range anubisRules {
for _, rule := range nukeRules {
if rule.Action == "WEIGH" && rule.Weight != nil {
weightRules++
if rule.Weight.Adjust != weight {
@ -402,10 +402,10 @@ Disallow: /`
*userAgentDeny = action
defer func() { *userAgentDeny = oldAction }()
anubisRules := convertToAnubisRules(rules)
nukeRules := convertToNukeRules(rules)
// All rules should be blacklist rules with the specified action
for _, rule := range anubisRules {
for _, rule := range nukeRules {
if !strings.Contains(rule.Name, "blacklist") {
t.Errorf("Expected blacklist rule, got %s", rule.Name)
}