This commit is contained in:
parent
d2205b11a7
commit
02b9aebbe5
341 changed files with 1571 additions and 32574 deletions
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Batch process robots.txt files from archives like https://github.com/nrjones8/robots-dot-txt-archive-bot/tree/master/data/cleaned
|
||||
into Anubis CEL policies. Usage: go run batch_process.go <directory with robots.txt files>
|
||||
into nuke CEL policies. Usage: go run batch_process.go <directory with robots.txt files>
|
||||
*/
|
||||
package main
|
||||
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ import (
|
|||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/TecharoHQ/anubis/lib/config"
|
||||
"git.sad.ovh/sophie/nuke/lib/config"
|
||||
|
||||
"sigs.k8s.io/yaml"
|
||||
)
|
||||
|
|
@ -36,7 +36,7 @@ type RobotsRule struct {
|
|||
IsBlacklist bool // true if this is a specifically denied user agent
|
||||
}
|
||||
|
||||
type AnubisRule struct {
|
||||
type NukeRule struct {
|
||||
Expression *config.ExpressionOrList `yaml:"expression,omitempty" json:"expression,omitempty"`
|
||||
Challenge *config.ChallengeRules `yaml:"challenge,omitempty" json:"challenge,omitempty"`
|
||||
Weight *config.Weight `yaml:"weight,omitempty" json:"weight,omitempty"`
|
||||
|
|
@ -95,11 +95,11 @@ func main() {
|
|||
log.Fatalf("failed to parse robots.txt: %v", err)
|
||||
}
|
||||
|
||||
// Convert to Anubis rules
|
||||
anubisRules := convertToAnubisRules(rules)
|
||||
// Convert to Nuke rules
|
||||
nukeRules := convertToNukeRules(rules)
|
||||
|
||||
// Check if any rules were generated
|
||||
if len(anubisRules) == 0 {
|
||||
if len(nukeRules) == 0 {
|
||||
log.Fatal("no valid rules generated from robots.txt - file may be empty or contain no disallow directives")
|
||||
}
|
||||
|
||||
|
|
@ -107,9 +107,9 @@ func main() {
|
|||
var output []byte
|
||||
switch strings.ToLower(*outputFormat) {
|
||||
case "yaml":
|
||||
output, err = yaml.Marshal(anubisRules)
|
||||
output, err = yaml.Marshal(nukeRules)
|
||||
case "json":
|
||||
output, err = json.MarshalIndent(anubisRules, "", " ")
|
||||
output, err = json.MarshalIndent(nukeRules, "", " ")
|
||||
default:
|
||||
log.Fatalf("unsupported output format: %s (use yaml or json)", *outputFormat)
|
||||
}
|
||||
|
|
@ -126,7 +126,7 @@ func main() {
|
|||
if err != nil {
|
||||
log.Fatalf("failed to write output file: %v", err)
|
||||
}
|
||||
fmt.Printf("Generated Anubis policy written to %s\n", *outputFile)
|
||||
fmt.Printf("Generated Nuke policy written to %s\n", *outputFile)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -227,8 +227,8 @@ func parseIntSafe(s string) (int, error) {
|
|||
return result, err
|
||||
}
|
||||
|
||||
func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
|
||||
var anubisRules []AnubisRule
|
||||
func convertToNukeRules(robotsRules []RobotsRule) []NukeRule {
|
||||
var nukeRules []NukeRule
|
||||
ruleCounter := 0
|
||||
|
||||
// Process each robots rule individually
|
||||
|
|
@ -238,7 +238,7 @@ func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
|
|||
// Handle crawl delay
|
||||
if robotsRule.CrawlDelay > 0 && *crawlDelay > 0 {
|
||||
ruleCounter++
|
||||
rule := AnubisRule{
|
||||
rule := NukeRule{
|
||||
Name: fmt.Sprintf("%s-crawl-delay-%d", *policyName, ruleCounter),
|
||||
Action: "WEIGH",
|
||||
Weight: &config.Weight{Adjust: *crawlDelay},
|
||||
|
|
@ -266,13 +266,13 @@ func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
|
|||
Any: expressions,
|
||||
}
|
||||
}
|
||||
anubisRules = append(anubisRules, rule)
|
||||
nukeRules = append(nukeRules, rule)
|
||||
}
|
||||
|
||||
// Handle blacklisted user agents
|
||||
if robotsRule.IsBlacklist {
|
||||
ruleCounter++
|
||||
rule := AnubisRule{
|
||||
rule := NukeRule{
|
||||
Name: fmt.Sprintf("%s-blacklist-%d", *policyName, ruleCounter),
|
||||
Action: *userAgentDeny,
|
||||
}
|
||||
|
|
@ -306,7 +306,7 @@ func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
|
|||
Any: expressions,
|
||||
}
|
||||
}
|
||||
anubisRules = append(anubisRules, rule)
|
||||
nukeRules = append(nukeRules, rule)
|
||||
}
|
||||
|
||||
// Handle specific disallow rules
|
||||
|
|
@ -316,7 +316,7 @@ func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
|
|||
}
|
||||
|
||||
ruleCounter++
|
||||
rule := AnubisRule{
|
||||
rule := NukeRule{
|
||||
Name: fmt.Sprintf("%s-disallow-%d", *policyName, ruleCounter),
|
||||
Action: *baseAction,
|
||||
}
|
||||
|
|
@ -338,7 +338,7 @@ func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
|
|||
continue // Skip wildcard as it's handled separately
|
||||
}
|
||||
ruleCounter++
|
||||
subRule := AnubisRule{
|
||||
subRule := NukeRule{
|
||||
Name: fmt.Sprintf("%s-disallow-%d", *policyName, ruleCounter),
|
||||
Action: *baseAction,
|
||||
Expression: &config.ExpressionOrList{
|
||||
|
|
@ -348,7 +348,7 @@ func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
|
|||
},
|
||||
},
|
||||
}
|
||||
anubisRules = append(anubisRules, subRule)
|
||||
nukeRules = append(nukeRules, subRule)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
|
@ -361,11 +361,11 @@ func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
|
|||
All: conditions,
|
||||
}
|
||||
|
||||
anubisRules = append(anubisRules, rule)
|
||||
nukeRules = append(nukeRules, rule)
|
||||
}
|
||||
}
|
||||
|
||||
return anubisRules
|
||||
return nukeRules
|
||||
}
|
||||
|
||||
func buildPathCondition(robotsPath string) string {
|
||||
|
|
|
|||
|
|
@ -136,16 +136,16 @@ func TestDataFileConversion(t *testing.T) {
|
|||
*userAgentDeny = oldDeniedAction
|
||||
}()
|
||||
|
||||
// Convert to Anubis rules
|
||||
anubisRules := convertToAnubisRules(rules)
|
||||
// Convert to Nuke rules
|
||||
nukeRules := convertToNukeRules(rules)
|
||||
|
||||
// Generate output
|
||||
var actualOutput []byte
|
||||
switch strings.ToLower(*outputFormat) {
|
||||
case "yaml":
|
||||
actualOutput, err = yaml.Marshal(anubisRules)
|
||||
actualOutput, err = yaml.Marshal(nukeRules)
|
||||
case "json":
|
||||
actualOutput, err = json.MarshalIndent(anubisRules, "", " ")
|
||||
actualOutput, err = json.MarshalIndent(nukeRules, "", " ")
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to marshal output: %v", err)
|
||||
|
|
@ -249,10 +249,10 @@ Disallow: /admin`
|
|||
*policyName = "test-policy"
|
||||
defer func() { *policyName = oldPolicyName }()
|
||||
|
||||
anubisRules := convertToAnubisRules(rules)
|
||||
nukeRules := convertToNukeRules(rules)
|
||||
|
||||
// Test YAML output
|
||||
yamlOutput, err := yaml.Marshal(anubisRules)
|
||||
yamlOutput, err := yaml.Marshal(nukeRules)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to marshal YAML: %v", err)
|
||||
}
|
||||
|
|
@ -262,7 +262,7 @@ Disallow: /admin`
|
|||
}
|
||||
|
||||
// Test JSON output
|
||||
jsonOutput, err := json.MarshalIndent(anubisRules, "", " ")
|
||||
jsonOutput, err := json.MarshalIndent(nukeRules, "", " ")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to marshal JSON: %v", err)
|
||||
}
|
||||
|
|
@ -290,14 +290,14 @@ Disallow: /admin`
|
|||
*baseAction = action
|
||||
defer func() { *baseAction = oldAction }()
|
||||
|
||||
anubisRules := convertToAnubisRules(rules)
|
||||
nukeRules := convertToNukeRules(rules)
|
||||
|
||||
if len(anubisRules) != 1 {
|
||||
t.Fatalf("Expected 1 rule, got %d", len(anubisRules))
|
||||
if len(nukeRules) != 1 {
|
||||
t.Fatalf("Expected 1 rule, got %d", len(nukeRules))
|
||||
}
|
||||
|
||||
if anubisRules[0].Action != action {
|
||||
t.Errorf("Expected action %s, got %s", action, anubisRules[0].Action)
|
||||
if nukeRules[0].Action != action {
|
||||
t.Errorf("Expected action %s, got %s", action, nukeRules[0].Action)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
|
@ -325,10 +325,10 @@ Disallow: /`
|
|||
*policyName = name
|
||||
defer func() { *policyName = oldName }()
|
||||
|
||||
anubisRules := convertToAnubisRules(rules)
|
||||
nukeRules := convertToNukeRules(rules)
|
||||
|
||||
// Check that all rule names use the custom prefix
|
||||
for _, rule := range anubisRules {
|
||||
for _, rule := range nukeRules {
|
||||
if !strings.HasPrefix(rule.Name, name+"-") {
|
||||
t.Errorf("Rule name %s doesn't start with expected prefix %s-", rule.Name, name)
|
||||
}
|
||||
|
|
@ -360,11 +360,11 @@ Crawl-delay: 60`
|
|||
*crawlDelay = weight
|
||||
defer func() { *crawlDelay = oldWeight }()
|
||||
|
||||
anubisRules := convertToAnubisRules(rules)
|
||||
nukeRules := convertToNukeRules(rules)
|
||||
|
||||
// Count weight rules and verify they have correct weight
|
||||
weightRules := 0
|
||||
for _, rule := range anubisRules {
|
||||
for _, rule := range nukeRules {
|
||||
if rule.Action == "WEIGH" && rule.Weight != nil {
|
||||
weightRules++
|
||||
if rule.Weight.Adjust != weight {
|
||||
|
|
@ -402,10 +402,10 @@ Disallow: /`
|
|||
*userAgentDeny = action
|
||||
defer func() { *userAgentDeny = oldAction }()
|
||||
|
||||
anubisRules := convertToAnubisRules(rules)
|
||||
nukeRules := convertToNukeRules(rules)
|
||||
|
||||
// All rules should be blacklist rules with the specified action
|
||||
for _, rule := range anubisRules {
|
||||
for _, rule := range nukeRules {
|
||||
if !strings.Contains(rule.Name, "blacklist") {
|
||||
t.Errorf("Expected blacklist rule, got %s", rule.Name)
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue