feat: enable loading config fragments (#321)

* feat(config): support importing bot policy snippets

This changes the grammar of the Anubis bot policy config to allow
importing from internal shared rules or external rules on the
filesystem.

This lets you create a file at `/data/policies/block-evilbot.yaml` and
then import it with:

```yaml
bots:
- import: /data/policies/block-evilbot.yaml
```

This also explodes the default policy file into a bunch of composable
snippets.

Thank you @Aibrew for your example gitea Atom / RSS feed rules!

Signed-off-by: Xe Iaso <me@xeiaso.net>

* fix(data): update botPolicies.json to use imports

Signed-off-by: Xe Iaso <me@xeiaso.net>

* fix(cmd/anubis): extract bot policies with --extract-resources

This allows a user that doesn't have anything but the Anubis binary to
figure out what the default configuration does.

* docs(data/botPolices.yaml): document import syntax in-line

Signed-off-by: Xe Iaso <me@xeiaso.net>

* fix(lib/policy): better test importing from JSON snippets

Signed-off-by: Xe Iaso <me@xeiaso.net>

* docs(admin): Add import syntax documentation

This documents the import syntax and is based on the block comment at
the top of the default bot policy file.

* docs(changelog): add note about importing snippets

Signed-off-by: Xe Iaso <me@xeiaso.net>

* style(lib/policy/config): use an error value instead of an inline error

Signed-off-by: Xe Iaso <me@xeiaso.net>

---------

Signed-off-by: Xe Iaso <me@xeiaso.net>
This commit is contained in:
Xe Iaso 2025-04-23 07:01:28 -04:00 committed by GitHub
parent 4e2c9de708
commit 74e11505c6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
37 changed files with 1210 additions and 1305 deletions

View file

@ -3,8 +3,15 @@ package config
import (
"errors"
"fmt"
"io"
"io/fs"
"net"
"os"
"regexp"
"strings"
"github.com/TecharoHQ/anubis/data"
"k8s.io/apimachinery/pkg/util/yaml"
)
var (
@ -17,6 +24,9 @@ var (
ErrInvalidPathRegex = errors.New("config.Bot: invalid path regex")
ErrInvalidHeadersRegex = errors.New("config.Bot: invalid headers regex")
ErrInvalidCIDR = errors.New("config.Bot: invalid CIDR")
ErrInvalidImportStatement = errors.New("config.ImportStatement: invalid source file")
ErrCantSetBotAndImportValuesAtOnce = errors.New("config.BotOrImport: can't set bot rules and import values at the same time")
ErrMustSetBotOrImportRules = errors.New("config.BotOrImport: rule definition is invalid, you must set either bot rules or an import statement, not both")
)
type Rule string
@ -47,6 +57,24 @@ type BotConfig struct {
Challenge *ChallengeRules `json:"challenge,omitempty"`
}
func (b BotConfig) Zero() bool {
for _, cond := range []bool{
b.Name != "",
b.UserAgentRegex != nil,
b.PathRegex != nil,
len(b.HeadersRegex) != 0,
b.Action != "",
len(b.RemoteAddr) != 0,
b.Challenge != nil,
} {
if cond {
return false
}
}
return true
}
func (b BotConfig) Valid() error {
var errs []error
@ -151,9 +179,147 @@ func (cr ChallengeRules) Valid() error {
return nil
}
type ImportStatement struct {
Import string `json:"import"`
Bots []BotConfig
}
func (is *ImportStatement) open() (fs.File, error) {
if strings.HasPrefix(is.Import, "(data)/") {
fname := strings.TrimPrefix(is.Import, "(data)/")
fin, err := data.BotPolicies.Open(fname)
return fin, err
}
return os.Open(is.Import)
}
func (is *ImportStatement) load() error {
fin, err := is.open()
if err != nil {
return fmt.Errorf("can't open %s: %w", is.Import, err)
}
defer fin.Close()
var result []BotConfig
if err := yaml.NewYAMLToJSONDecoder(fin).Decode(&result); err != nil {
return fmt.Errorf("can't parse %s: %w", is.Import, err)
}
var errs []error
for _, b := range result {
if err := b.Valid(); err != nil {
errs = append(errs, err)
}
}
if len(errs) != 0 {
return fmt.Errorf("config %s is not valid:\n%w", is.Import, errors.Join(errs...))
}
is.Bots = result
return nil
}
func (is *ImportStatement) Valid() error {
return is.load()
}
type BotOrImport struct {
*BotConfig `json:",inline"`
*ImportStatement `json:",inline"`
}
func (boi *BotOrImport) Valid() error {
if boi.BotConfig != nil && boi.ImportStatement != nil {
return ErrCantSetBotAndImportValuesAtOnce
}
if boi.BotConfig != nil {
return boi.BotConfig.Valid()
}
if boi.ImportStatement != nil {
return boi.ImportStatement.Valid()
}
return ErrMustSetBotOrImportRules
}
type fileConfig struct {
Bots []BotOrImport `json:"bots"`
DNSBL bool `json:"dnsbl"`
}
func (c fileConfig) Valid() error {
var errs []error
if len(c.Bots) == 0 {
errs = append(errs, ErrNoBotRulesDefined)
}
for _, b := range c.Bots {
if err := b.Valid(); err != nil {
errs = append(errs, err)
}
}
if len(errs) != 0 {
return fmt.Errorf("config is not valid:\n%w", errors.Join(errs...))
}
return nil
}
func Load(fin io.Reader, fname string) (*Config, error) {
var c fileConfig
if err := yaml.NewYAMLToJSONDecoder(fin).Decode(&c); err != nil {
return nil, fmt.Errorf("can't parse policy config YAML %s: %w", fname, err)
}
if err := c.Valid(); err != nil {
return nil, err
}
result := &Config{
DNSBL: c.DNSBL,
}
var validationErrs []error
for _, boi := range c.Bots {
if boi.ImportStatement != nil {
if err := boi.load(); err != nil {
validationErrs = append(validationErrs, err)
continue
}
result.Bots = append(result.Bots, boi.ImportStatement.Bots...)
}
if boi.BotConfig != nil {
if err := boi.BotConfig.Valid(); err != nil {
validationErrs = append(validationErrs, err)
continue
}
result.Bots = append(result.Bots, *boi.BotConfig)
}
}
if len(validationErrs) > 0 {
return nil, fmt.Errorf("errors validating policy config %s: %w", fname, errors.Join(validationErrs...))
}
return result, nil
}
type Config struct {
Bots []BotConfig `json:"bots"`
DNSBL bool `json:"dnsbl"`
Bots []BotConfig
DNSBL bool
}
func (c Config) Valid() error {

View file

@ -2,10 +2,12 @@ package config
import (
"errors"
"io/fs"
"os"
"path/filepath"
"testing"
"github.com/TecharoHQ/anubis/data"
"k8s.io/apimachinery/pkg/util/yaml"
)
@ -219,13 +221,69 @@ func TestConfigValidKnownGood(t *testing.T) {
}
defer fin.Close()
var c Config
if err := yaml.NewYAMLToJSONDecoder(fin).Decode(&c); err != nil {
t.Fatalf("can't decode file: %v", err)
c, err := Load(fin, st.Name())
if err != nil {
t.Fatal(err)
}
if err := c.Valid(); err != nil {
t.Fatal(err)
t.Error(err)
}
if len(c.Bots) == 0 {
t.Error("wanted more than 0 bots, got zero")
}
})
}
}
func TestImportStatement(t *testing.T) {
type testCase struct {
name string
importPath string
err error
}
var tests []testCase
for _, folderName := range []string{
"apps",
"bots",
"common",
"crawlers",
} {
if err := fs.WalkDir(data.BotPolicies, folderName, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
if d.IsDir() {
return nil
}
tests = append(tests, testCase{
name: "(data)/" + path,
importPath: "(data)/" + path,
err: nil,
})
return nil
}); err != nil {
t.Fatal(err)
}
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
is := &ImportStatement{
Import: tt.importPath,
}
if err := is.Valid(); err != nil {
t.Errorf("validation error: %v", err)
}
if len(is.Bots) == 0 {
t.Error("wanted bot definitions, but got none")
}
})
}
@ -246,7 +304,7 @@ func TestConfigValidBad(t *testing.T) {
}
defer fin.Close()
var c Config
var c fileConfig
if err := yaml.NewYAMLToJSONDecoder(fin).Decode(&c); err != nil {
t.Fatalf("can't decode file: %v", err)
}
@ -259,3 +317,49 @@ func TestConfigValidBad(t *testing.T) {
})
}
}
func TestBotConfigZero(t *testing.T) {
var b BotConfig
if !b.Zero() {
t.Error("zero value BotConfig is not zero value")
}
b.Name = "hi"
if b.Zero() {
t.Error("BotConfig with name is zero value")
}
b.UserAgentRegex = p(".*")
if b.Zero() {
t.Error("BotConfig with user agent regex is zero value")
}
b.PathRegex = p(".*")
if b.Zero() {
t.Error("BotConfig with path regex is zero value")
}
b.HeadersRegex = map[string]string{"hi": "there"}
if b.Zero() {
t.Error("BotConfig with headers regex is zero value")
}
b.Action = RuleAllow
if b.Zero() {
t.Error("BotConfig with action is zero value")
}
b.RemoteAddr = []string{"::/0"}
if b.Zero() {
t.Error("BotConfig with remote addresses is zero value")
}
b.Challenge = &ChallengeRules{
Difficulty: 4,
ReportAs: 4,
Algorithm: AlgorithmFast,
}
if b.Zero() {
t.Error("BotConfig with challenge rules is zero value")
}
}

View file

@ -0,0 +1,10 @@
{
"bots": [
{
"import": "(data)/bots/ai-robots-txt.yaml",
"name": "generic-browser",
"user_agent_regex": "Mozilla|Opera\n",
"action": "CHALLENGE"
}
]
}

View file

@ -0,0 +1,6 @@
bots:
- import: (data)/bots/ai-robots-txt.yaml
name: generic-browser
user_agent_regex: >
Mozilla|Opera
action: CHALLENGE

View file

@ -0,0 +1,7 @@
{
"bots": [
{
"import": "(data)/does-not-exist-fake-file.yaml"
}
]
}

View file

@ -0,0 +1,2 @@
bots:
- import: (data)/does-not-exist-fake-file.yaml

View file

@ -0,0 +1,7 @@
{
"bots": [
{
"import": "./testdata/hack-test.json"
}
]
}

View file

@ -0,0 +1,2 @@
bots:
- import: ./testdata/hack-test.yaml

View file

@ -0,0 +1,7 @@
{
"bots": [
{
"import": "(data)/common/keep-internet-working.yaml"
}
]
}

View file

@ -0,0 +1,2 @@
bots:
- import: (data)/common/keep-internet-working.yaml

View file

@ -0,0 +1,9 @@
[
{
"name": "ipv6-ula",
"action": "ALLOW",
"remote_addresses": [
"fc00::/7"
]
}
]

View file

@ -0,0 +1,3 @@
- name: well-known
path_regex: ^/.well-known/.*$
action: ALLOW

View file

@ -7,7 +7,6 @@ import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"k8s.io/apimachinery/pkg/util/yaml"
"github.com/TecharoHQ/anubis/lib/policy/config"
)
@ -20,26 +19,22 @@ var (
)
type ParsedConfig struct {
orig config.Config
orig *config.Config
Bots []Bot
DNSBL bool
DefaultDifficulty int
}
func NewParsedConfig(orig config.Config) *ParsedConfig {
func NewParsedConfig(orig *config.Config) *ParsedConfig {
return &ParsedConfig{
orig: orig,
}
}
func ParseConfig(fin io.Reader, fname string, defaultDifficulty int) (*ParsedConfig, error) {
var c config.Config
if err := yaml.NewYAMLToJSONDecoder(fin).Decode(&c); err != nil {
return nil, fmt.Errorf("can't parse policy config YAML %s: %w", fname, err)
}
if err := c.Valid(); err != nil {
c, err := config.Load(fin, fname)
if err != nil {
return nil, err
}

9
lib/policy/testdata/hack-test.json vendored Normal file
View file

@ -0,0 +1,9 @@
[
{
"name": "ipv6-ula",
"action": "ALLOW",
"remote_addresses": [
"fc00::/7"
]
}
]

3
lib/policy/testdata/hack-test.yaml vendored Normal file
View file

@ -0,0 +1,3 @@
- name: well-known
path_regex: ^/.well-known/.*$
action: ALLOW