feat: add robots2policy CLI to convert robots.txt to Anubis CEL (#657)
* feat: add robots2policy CLI utility to convert robots.txt to Anubis challenge policies * feat: add documentation for robots2policy CLI tool * feat: implement crawl delay handling as weight adjustment in Anubis rules * feat: add various robots.txt and YAML configurations for user agent handling and crawl delays * test: add comprehensive tests for robots2policy conversion and parsing * fix: update example URL in usage instructions for robots2policy CLI * Update metadata check-spelling run (pull_request) for json/robots2policycli Signed-off-by: check-spelling-bot <check-spelling-bot@users.noreply.github.com> on-behalf-of: @check-spelling <check-spelling-bot@check-spelling.dev> * docs: add crawl delay weight adjustment and deny user agents option to robots2policy CLI * Update cmd/robots2policy/main.go Co-authored-by: Xe Iaso <me@xeiaso.net> Signed-off-by: Jason Cameron <jasoncameron.all@gmail.com> * Update cmd/robots2policy/main.go Co-authored-by: Xe Iaso <me@xeiaso.net> Signed-off-by: Jason Cameron <jasoncameron.all@gmail.com> * fix(robots2policy): use sigs.k8s.io/yaml Signed-off-by: Xe Iaso <me@xeiaso.net> * feat(config): properly marshal bot policy rules Signed-off-by: Xe Iaso <me@xeiaso.net> * chore(yeetfile): expose robots2policy in libexec Signed-off-by: Xe Iaso <me@xeiaso.net> * fix(yeetfile): put robots2policy in $PATH Signed-off-by: Xe Iaso <me@xeiaso.net> * Update metadata check-spelling run (pull_request) for json/robots2policycli Signed-off-by: check-spelling-bot <check-spelling-bot@users.noreply.github.com> on-behalf-of: @check-spelling <check-spelling-bot@check-spelling.dev> * style: reorder imports * refactor: use preexisting structs in config * fix: correct flag check in main function * fix: reorder fields in AnubisRule struct for better alignment * style: improve alignment of struct fields in AnubisRule and OGTagCache * Update metadata check-spelling run (pull_request) for json/robots2policycli Signed-off-by: check-spelling-bot <check-spelling-bot@users.noreply.github.com> on-behalf-of: @check-spelling <check-spelling-bot@check-spelling.dev> * fix: add validation for generated Anubis rules from robots.txt * feat: add batch processing for robots.txt files to generate Anubis CEL policies * fix: improve usage message and error handling for input file requirement * refactor: update AnubisRule structure to use ExpressionOrList for improved expression handling * refactor: reorganize policy definitions in YAML files for consistency and clarity * fix: correct indentation in blacklist and complex YAML files for consistency * test: enhance output comparison in robots2policy tests for YAML and JSON formats * Revert "fix: improve usage message and error handling for input file requirement" This reverts commit ddcde1f2a326545d3ef2ec32e5e03f55f4f931a8. * fix: improve usage message and error handling in robots2policy Signed-off-by: Jason Cameron <git@jasoncameron.dev> --------- Signed-off-by: check-spelling-bot <check-spelling-bot@users.noreply.github.com> Signed-off-by: Jason Cameron <jasoncameron.all@gmail.com> Signed-off-by: Xe Iaso <me@xeiaso.net> Signed-off-by: Jason Cameron <git@jasoncameron.dev> Co-authored-by: Xe Iaso <me@xeiaso.net>
This commit is contained in:
parent
7a195f1595
commit
e0781e4560
28 changed files with 1302 additions and 27 deletions
|
|
@ -1,12 +1,147 @@
|
|||
package config
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"testing"
|
||||
|
||||
yaml "sigs.k8s.io/yaml/goyaml.v3"
|
||||
)
|
||||
|
||||
func TestExpressionOrListUnmarshal(t *testing.T) {
|
||||
func TestExpressionOrListMarshalJSON(t *testing.T) {
|
||||
for _, tt := range []struct {
|
||||
name string
|
||||
input *ExpressionOrList
|
||||
output []byte
|
||||
err error
|
||||
}{
|
||||
{
|
||||
name: "single expression",
|
||||
input: &ExpressionOrList{
|
||||
Expression: "true",
|
||||
},
|
||||
output: []byte(`"true"`),
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
name: "all",
|
||||
input: &ExpressionOrList{
|
||||
All: []string{"true", "true"},
|
||||
},
|
||||
output: []byte(`{"all":["true","true"]}`),
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
name: "all one",
|
||||
input: &ExpressionOrList{
|
||||
All: []string{"true"},
|
||||
},
|
||||
output: []byte(`"true"`),
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
name: "any",
|
||||
input: &ExpressionOrList{
|
||||
Any: []string{"true", "false"},
|
||||
},
|
||||
output: []byte(`{"any":["true","false"]}`),
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
name: "any one",
|
||||
input: &ExpressionOrList{
|
||||
Any: []string{"true"},
|
||||
},
|
||||
output: []byte(`"true"`),
|
||||
err: nil,
|
||||
},
|
||||
} {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result, err := json.Marshal(tt.input)
|
||||
if !errors.Is(err, tt.err) {
|
||||
t.Errorf("wanted marshal error: %v but got: %v", tt.err, err)
|
||||
}
|
||||
|
||||
if !bytes.Equal(result, tt.output) {
|
||||
t.Logf("wanted: %s", string(tt.output))
|
||||
t.Logf("got: %s", string(result))
|
||||
t.Error("mismatched output")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestExpressionOrListMarshalYAML(t *testing.T) {
|
||||
for _, tt := range []struct {
|
||||
name string
|
||||
input *ExpressionOrList
|
||||
output []byte
|
||||
err error
|
||||
}{
|
||||
{
|
||||
name: "single expression",
|
||||
input: &ExpressionOrList{
|
||||
Expression: "true",
|
||||
},
|
||||
output: []byte(`"true"`),
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
name: "all",
|
||||
input: &ExpressionOrList{
|
||||
All: []string{"true", "true"},
|
||||
},
|
||||
output: []byte(`all:
|
||||
- "true"
|
||||
- "true"`),
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
name: "all one",
|
||||
input: &ExpressionOrList{
|
||||
All: []string{"true"},
|
||||
},
|
||||
output: []byte(`"true"`),
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
name: "any",
|
||||
input: &ExpressionOrList{
|
||||
Any: []string{"true", "false"},
|
||||
},
|
||||
output: []byte(`any:
|
||||
- "true"
|
||||
- "false"`),
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
name: "any one",
|
||||
input: &ExpressionOrList{
|
||||
Any: []string{"true"},
|
||||
},
|
||||
output: []byte(`"true"`),
|
||||
err: nil,
|
||||
},
|
||||
} {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result, err := yaml.Marshal(tt.input)
|
||||
if !errors.Is(err, tt.err) {
|
||||
t.Errorf("wanted marshal error: %v but got: %v", tt.err, err)
|
||||
}
|
||||
|
||||
result = bytes.TrimSpace(result)
|
||||
|
||||
if !bytes.Equal(result, tt.output) {
|
||||
t.Logf("wanted: %q", string(tt.output))
|
||||
t.Logf("got: %q", string(result))
|
||||
t.Error("mismatched output")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestExpressionOrListUnmarshalJSON(t *testing.T) {
|
||||
for _, tt := range []struct {
|
||||
err error
|
||||
validErr error
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue