feat: writing logs to the filesystem with rotation support (#1299)

* refactor: move lib/policy/config to lib/config

Signed-off-by: Xe Iaso <me@xeiaso.net>

* refactor: don't set global loggers anymore

Ref #864

You were right @kotx, it is a bad idea to set the global logger
instance.

Signed-off-by: Xe Iaso <me@xeiaso.net>

* feat(config): add log sink support

Signed-off-by: Xe Iaso <me@xeiaso.net>

* chore: update spelling

Signed-off-by: Xe Iaso <me@xeiaso.net>

* chore(test): go mod tidy

Signed-off-by: Xe Iaso <me@xeiaso.net>

* chore: update spelling

Signed-off-by: Xe Iaso <me@xeiaso.net>

* docs(admin/policies): add logging block documentation

Signed-off-by: Xe Iaso <me@xeiaso.net>

* docs: update CHANGELOG

Signed-off-by: Xe Iaso <me@xeiaso.net>

* fix(cmd/anubis): revert this change, it's meant to be its own PR

Signed-off-by: Xe Iaso <me@xeiaso.net>

* chore: go mod tidy

Signed-off-by: Xe Iaso <me@xeiaso.net>

* test: add file logging smoke test

Assisted-by: GLM 4.6 via Claude Code
Signed-off-by: Xe Iaso <me@xeiaso.net>

* fix: don't expose the old log file time format string

Signed-off-by: Xe Iaso <me@xeiaso.net>

---------

Signed-off-by: Xe Iaso <me@xeiaso.net>
This commit is contained in:
Xe Iaso 2025-11-21 11:46:00 -05:00 committed by GitHub
parent a709a2b2da
commit f032d5d0ac
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
118 changed files with 789 additions and 65 deletions

View file

@ -4,8 +4,8 @@ import (
"fmt"
"github.com/TecharoHQ/anubis/internal"
"github.com/TecharoHQ/anubis/lib/config"
"github.com/TecharoHQ/anubis/lib/policy/checker"
"github.com/TecharoHQ/anubis/lib/policy/config"
)
type Bot struct {

View file

@ -5,7 +5,7 @@ import (
"net/http"
"github.com/TecharoHQ/anubis/internal"
"github.com/TecharoHQ/anubis/lib/policy/config"
"github.com/TecharoHQ/anubis/lib/config"
"github.com/TecharoHQ/anubis/lib/policy/expressions"
"github.com/google/cel-go/cel"
"github.com/google/cel-go/common/types"

View file

@ -3,7 +3,7 @@ package policy
import (
"log/slog"
"github.com/TecharoHQ/anubis/lib/policy/config"
"github.com/TecharoHQ/anubis/lib/config"
)
type CheckResult struct {

View file

@ -1,44 +0,0 @@
package config
import (
"errors"
"fmt"
)
var (
ErrPrivateASN = errors.New("bot.ASNs: you have specified a private use ASN")
)
type ASNs struct {
Match []uint32 `json:"match"`
}
func (a *ASNs) Valid() error {
var errs []error
for _, asn := range a.Match {
if isPrivateASN(asn) {
errs = append(errs, fmt.Errorf("%w: %d is private (see RFC 6996)", ErrPrivateASN, asn))
}
}
if len(errs) != 0 {
return fmt.Errorf("bot.ASNs: invalid ASN settings: %w", errors.Join(errs...))
}
return nil
}
// isPrivateASN checks if an ASN is in the private use area.
//
// Based on RFC 6996 and IANA allocations.
func isPrivateASN(asn uint32) bool {
switch {
case asn >= 64512 && asn <= 65534:
return true
case asn >= 4200000000 && asn <= 4294967294:
return true
default:
return false
}
}

View file

@ -1,55 +0,0 @@
package config
import (
"errors"
"fmt"
"testing"
)
func TestASNsValid(t *testing.T) {
for _, tt := range []struct {
err error
input *ASNs
name string
}{
{
name: "basic valid",
input: &ASNs{
Match: []uint32{13335}, // Cloudflare
},
},
{
name: "private ASN",
input: &ASNs{
Match: []uint32{64513, 4206942069}, // 16 and 32 bit private ASN
},
err: ErrPrivateASN,
},
} {
t.Run(tt.name, func(t *testing.T) {
if err := tt.input.Valid(); !errors.Is(err, tt.err) {
t.Logf("want: %v", tt.err)
t.Logf("got: %v", err)
t.Error("got wrong validation error")
}
})
}
}
func TestIsPrivateASN(t *testing.T) {
for _, tt := range []struct {
input uint32
output bool
}{
{13335, false}, // Cloudflare
{64513, true}, // 16 bit private ASN
{4206942069, true}, // 32 bit private ASN
} {
t.Run(fmt.Sprint(tt.input, "->", tt.output), func(t *testing.T) {
result := isPrivateASN(tt.input)
if result != tt.output {
t.Errorf("wanted isPrivateASN(%d) == %v, got: %v", tt.input, tt.output, result)
}
})
}
}

View file

@ -1,53 +0,0 @@
//go:build ignore
package config
import (
"context"
"encoding/json"
"errors"
"fmt"
"github.com/TecharoHQ/anubis/lib/checker"
)
var (
ErrUnknownCheckType = errors.New("config.Bot.Check: unknown check type")
)
type AllChecks struct {
All []Check `json:"all"`
}
type AnyChecks struct {
All []Check `json:"any"`
}
type Check struct {
Type string `json:"type"`
Args json.RawMessage `json:"args"`
}
func (c *Check) Valid(ctx context.Context) error {
var errs []error
if len(c.Type) == 0 {
errs = append(errs, ErrNoStoreBackend)
}
fac, ok := checker.Get(c.Type)
switch ok {
case true:
if err := fac.Valid(ctx, c.Args); err != nil {
errs = append(errs, err)
}
case false:
errs = append(errs, fmt.Errorf("%w: %q", ErrUnknownCheckType, c.Type))
}
if len(errs) != 0 {
return errors.Join(errs...)
}
return nil
}

View file

@ -1,493 +0,0 @@
package config
import (
"errors"
"fmt"
"io"
"io/fs"
"net"
"net/http"
"os"
"regexp"
"strings"
"time"
"github.com/TecharoHQ/anubis/data"
"k8s.io/apimachinery/pkg/util/yaml"
)
var (
ErrNoBotRulesDefined = errors.New("config: must define at least one (1) bot rule")
ErrBotMustHaveName = errors.New("config.Bot: must set name")
ErrBotMustHaveUserAgentOrPath = errors.New("config.Bot: must set either user_agent_regex, path_regex, headers_regex, or remote_addresses")
ErrBotMustHaveUserAgentOrPathNotBoth = errors.New("config.Bot: must set either user_agent_regex, path_regex, and not both")
ErrUnknownAction = errors.New("config.Bot: unknown action")
ErrInvalidUserAgentRegex = errors.New("config.Bot: invalid user agent regex")
ErrInvalidPathRegex = errors.New("config.Bot: invalid path regex")
ErrInvalidHeadersRegex = errors.New("config.Bot: invalid headers regex")
ErrInvalidCIDR = errors.New("config.Bot: invalid CIDR")
ErrRegexEndsWithNewline = errors.New("config.Bot: regular expression ends with newline (try >- instead of > in yaml)")
ErrInvalidImportStatement = errors.New("config.ImportStatement: invalid source file")
ErrCantSetBotAndImportValuesAtOnce = errors.New("config.BotOrImport: can't set bot rules and import values at the same time")
ErrMustSetBotOrImportRules = errors.New("config.BotOrImport: rule definition is invalid, you must set either bot rules or an import statement, not both")
ErrStatusCodeNotValid = errors.New("config.StatusCode: status code not valid, must be between 100 and 599")
)
type Rule string
const (
RuleUnknown Rule = ""
RuleAllow Rule = "ALLOW"
RuleDeny Rule = "DENY"
RuleChallenge Rule = "CHALLENGE"
RuleWeigh Rule = "WEIGH"
RuleBenchmark Rule = "DEBUG_BENCHMARK"
)
func (r Rule) Valid() error {
switch r {
case RuleAllow, RuleDeny, RuleChallenge, RuleWeigh, RuleBenchmark:
return nil
default:
return ErrUnknownAction
}
}
const DefaultAlgorithm = "fast"
type BotConfig struct {
UserAgentRegex *string `json:"user_agent_regex,omitempty" yaml:"user_agent_regex,omitempty"`
PathRegex *string `json:"path_regex,omitempty" yaml:"path_regex,omitempty"`
HeadersRegex map[string]string `json:"headers_regex,omitempty" yaml:"headers_regex,omitempty"`
Expression *ExpressionOrList `json:"expression,omitempty" yaml:"expression,omitempty"`
Challenge *ChallengeRules `json:"challenge,omitempty" yaml:"challenge,omitempty"`
Weight *Weight `json:"weight,omitempty" yaml:"weight,omitempty"`
// Thoth features
GeoIP *GeoIP `json:"geoip,omitempty"`
ASNs *ASNs `json:"asns,omitempty"`
Name string `json:"name" yaml:"name"`
Action Rule `json:"action" yaml:"action"`
RemoteAddr []string `json:"remote_addresses,omitempty" yaml:"remote_addresses,omitempty"`
}
func (b BotConfig) Zero() bool {
for _, cond := range []bool{
b.Name != "",
b.UserAgentRegex != nil,
b.PathRegex != nil,
len(b.HeadersRegex) != 0,
b.Action != "",
len(b.RemoteAddr) != 0,
b.Challenge != nil,
b.GeoIP != nil,
b.ASNs != nil,
} {
if cond {
return false
}
}
return true
}
func (b *BotConfig) Valid() error {
var errs []error
if b.Name == "" {
errs = append(errs, ErrBotMustHaveName)
}
allFieldsEmpty := b.UserAgentRegex == nil &&
b.PathRegex == nil &&
len(b.RemoteAddr) == 0 &&
len(b.HeadersRegex) == 0 &&
b.ASNs == nil &&
b.GeoIP == nil
if allFieldsEmpty && b.Expression == nil {
errs = append(errs, ErrBotMustHaveUserAgentOrPath)
}
if b.UserAgentRegex != nil && b.PathRegex != nil {
errs = append(errs, ErrBotMustHaveUserAgentOrPathNotBoth)
}
if b.UserAgentRegex != nil {
if strings.HasSuffix(*b.UserAgentRegex, "\n") {
errs = append(errs, fmt.Errorf("%w: user agent regex: %q", ErrRegexEndsWithNewline, *b.UserAgentRegex))
}
if _, err := regexp.Compile(*b.UserAgentRegex); err != nil {
errs = append(errs, ErrInvalidUserAgentRegex, err)
}
}
if b.PathRegex != nil {
if strings.HasSuffix(*b.PathRegex, "\n") {
errs = append(errs, fmt.Errorf("%w: path regex: %q", ErrRegexEndsWithNewline, *b.PathRegex))
}
if _, err := regexp.Compile(*b.PathRegex); err != nil {
errs = append(errs, ErrInvalidPathRegex, err)
}
}
if len(b.HeadersRegex) > 0 {
for name, expr := range b.HeadersRegex {
if name == "" {
continue
}
if strings.HasSuffix(expr, "\n") {
errs = append(errs, fmt.Errorf("%w: header %s regex: %q", ErrRegexEndsWithNewline, name, expr))
}
if _, err := regexp.Compile(expr); err != nil {
errs = append(errs, ErrInvalidHeadersRegex, err)
}
}
}
if len(b.RemoteAddr) > 0 {
for _, cidr := range b.RemoteAddr {
if _, _, err := net.ParseCIDR(cidr); err != nil {
errs = append(errs, ErrInvalidCIDR, err)
}
}
}
if b.Expression != nil {
if err := b.Expression.Valid(); err != nil {
errs = append(errs, err)
}
}
switch b.Action {
case RuleAllow, RuleBenchmark, RuleChallenge, RuleDeny, RuleWeigh:
// okay
default:
errs = append(errs, fmt.Errorf("%w: %q", ErrUnknownAction, b.Action))
}
if b.Action == RuleChallenge && b.Challenge != nil {
if err := b.Challenge.Valid(); err != nil {
errs = append(errs, err)
}
}
if b.Action == RuleWeigh && b.Weight == nil {
b.Weight = &Weight{Adjust: 5}
}
if len(errs) != 0 {
return fmt.Errorf("config: bot entry for %q is not valid:\n%w", b.Name, errors.Join(errs...))
}
return nil
}
type ChallengeRules struct {
Algorithm string `json:"algorithm,omitempty" yaml:"algorithm,omitempty"`
Difficulty int `json:"difficulty,omitempty" yaml:"difficulty,omitempty"`
ReportAs int `json:"report_as,omitempty" yaml:"report_as,omitempty"`
}
var (
ErrChallengeDifficultyTooLow = errors.New("config.ChallengeRules: difficulty is too low (must be >= 0)")
ErrChallengeDifficultyTooHigh = errors.New("config.ChallengeRules: difficulty is too high (must be <= 64)")
ErrChallengeMustHaveAlgorithm = errors.New("config.ChallengeRules: must have algorithm name set")
)
func (cr ChallengeRules) Valid() error {
var errs []error
if cr.Algorithm == "" {
errs = append(errs, ErrChallengeMustHaveAlgorithm)
}
if cr.Difficulty < 0 {
errs = append(errs, fmt.Errorf("%w, got: %d", ErrChallengeDifficultyTooLow, cr.Difficulty))
}
if cr.Difficulty > 64 {
errs = append(errs, fmt.Errorf("%w, got: %d", ErrChallengeDifficultyTooHigh, cr.Difficulty))
}
if len(errs) != 0 {
return fmt.Errorf("config: challenge rules entry is not valid:\n%w", errors.Join(errs...))
}
return nil
}
type ImportStatement struct {
Import string `json:"import"`
Bots []BotConfig
}
func (is *ImportStatement) open() (fs.File, error) {
if strings.HasPrefix(is.Import, "(data)/") {
fname := strings.TrimPrefix(is.Import, "(data)/")
fin, err := data.BotPolicies.Open(fname)
return fin, err
}
return os.Open(is.Import)
}
func (is *ImportStatement) load() error {
fin, err := is.open()
if err != nil {
return fmt.Errorf("%w: %s: %w", ErrInvalidImportStatement, is.Import, err)
}
defer fin.Close()
var imported []BotOrImport
var result []BotConfig
if err := yaml.NewYAMLToJSONDecoder(fin).Decode(&imported); err != nil {
return fmt.Errorf("can't parse %s: %w", is.Import, err)
}
var errs []error
for _, b := range imported {
if err := b.Valid(); err != nil {
errs = append(errs, err)
}
if b.ImportStatement != nil {
result = append(result, b.ImportStatement.Bots...)
}
if b.BotConfig != nil {
result = append(result, *b.BotConfig)
}
}
if len(errs) != 0 {
return fmt.Errorf("config %s is not valid:\n%w", is.Import, errors.Join(errs...))
}
is.Bots = result
return nil
}
func (is *ImportStatement) Valid() error {
return is.load()
}
type BotOrImport struct {
*BotConfig `json:",inline"`
*ImportStatement `json:",inline"`
}
func (boi *BotOrImport) Valid() error {
if boi.BotConfig != nil && boi.ImportStatement != nil {
return ErrCantSetBotAndImportValuesAtOnce
}
if boi.BotConfig != nil {
return boi.BotConfig.Valid()
}
if boi.ImportStatement != nil {
return boi.ImportStatement.Valid()
}
return ErrMustSetBotOrImportRules
}
type StatusCodes struct {
Challenge int `json:"CHALLENGE"`
Deny int `json:"DENY"`
}
func (sc StatusCodes) Valid() error {
var errs []error
if sc.Challenge == 0 || (sc.Challenge < 100 && sc.Challenge >= 599) {
errs = append(errs, fmt.Errorf("%w: challenge is %d", ErrStatusCodeNotValid, sc.Challenge))
}
if sc.Deny == 0 || (sc.Deny < 100 && sc.Deny >= 599) {
errs = append(errs, fmt.Errorf("%w: deny is %d", ErrStatusCodeNotValid, sc.Deny))
}
if len(errs) != 0 {
return fmt.Errorf("status codes not valid:\n%w", errors.Join(errs...))
}
return nil
}
type fileConfig struct {
OpenGraph openGraphFileConfig `json:"openGraph,omitempty"`
Impressum *Impressum `json:"impressum,omitempty"`
Store *Store `json:"store"`
Bots []BotOrImport `json:"bots"`
Thresholds []Threshold `json:"thresholds"`
StatusCodes StatusCodes `json:"status_codes"`
DNSBL bool `json:"dnsbl"`
}
func (c *fileConfig) Valid() error {
var errs []error
if len(c.Bots) == 0 {
errs = append(errs, ErrNoBotRulesDefined)
}
for i, b := range c.Bots {
if err := b.Valid(); err != nil {
errs = append(errs, fmt.Errorf("bot %d: %w", i, err))
}
}
if c.OpenGraph.Enabled {
if err := c.OpenGraph.Valid(); err != nil {
errs = append(errs, err)
}
}
if err := c.StatusCodes.Valid(); err != nil {
errs = append(errs, err)
}
for i, t := range c.Thresholds {
if err := t.Valid(); err != nil {
errs = append(errs, fmt.Errorf("threshold %d: %w", i, err))
}
}
if c.Store != nil {
if err := c.Store.Valid(); err != nil {
errs = append(errs, err)
}
}
if len(errs) != 0 {
return fmt.Errorf("config is not valid:\n%w", errors.Join(errs...))
}
return nil
}
func Load(fin io.Reader, fname string) (*Config, error) {
c := &fileConfig{
StatusCodes: StatusCodes{
Challenge: http.StatusOK,
Deny: http.StatusOK,
},
Store: &Store{
Backend: "memory",
},
}
if err := yaml.NewYAMLToJSONDecoder(fin).Decode(&c); err != nil {
return nil, fmt.Errorf("can't parse policy config YAML %s: %w", fname, err)
}
if err := c.Valid(); err != nil {
return nil, err
}
result := &Config{
DNSBL: c.DNSBL,
OpenGraph: OpenGraph{
Enabled: c.OpenGraph.Enabled,
ConsiderHost: c.OpenGraph.ConsiderHost,
Override: c.OpenGraph.Override,
},
StatusCodes: c.StatusCodes,
Store: c.Store,
}
if c.OpenGraph.TimeToLive != "" {
// XXX(Xe): already validated in Valid()
ogTTL, _ := time.ParseDuration(c.OpenGraph.TimeToLive)
result.OpenGraph.TimeToLive = ogTTL
}
var validationErrs []error
for _, boi := range c.Bots {
if boi.ImportStatement != nil {
if err := boi.load(); err != nil {
validationErrs = append(validationErrs, err)
continue
}
result.Bots = append(result.Bots, boi.ImportStatement.Bots...)
}
if boi.BotConfig != nil {
if err := boi.BotConfig.Valid(); err != nil {
validationErrs = append(validationErrs, err)
continue
}
result.Bots = append(result.Bots, *boi.BotConfig)
}
}
if c.Impressum != nil {
if err := c.Impressum.Valid(); err != nil {
validationErrs = append(validationErrs, err)
}
result.Impressum = c.Impressum
}
if len(c.Thresholds) == 0 {
c.Thresholds = DefaultThresholds
}
for _, t := range c.Thresholds {
if err := t.Valid(); err != nil {
validationErrs = append(validationErrs, err)
continue
}
result.Thresholds = append(result.Thresholds, t)
}
if len(validationErrs) > 0 {
return nil, fmt.Errorf("errors validating policy config %s: %w", fname, errors.Join(validationErrs...))
}
return result, nil
}
type Config struct {
Impressum *Impressum
Store *Store
OpenGraph OpenGraph
Bots []BotConfig
Thresholds []Threshold
StatusCodes StatusCodes
DNSBL bool
}
func (c Config) Valid() error {
var errs []error
if len(c.Bots) == 0 {
errs = append(errs, ErrNoBotRulesDefined)
}
for _, b := range c.Bots {
if err := b.Valid(); err != nil {
errs = append(errs, err)
}
}
if len(errs) != 0 {
return fmt.Errorf("config is not valid:\n%w", errors.Join(errs...))
}
return nil
}

View file

@ -1,370 +0,0 @@
package config_test
import (
"errors"
"io/fs"
"os"
"path/filepath"
"testing"
"github.com/TecharoHQ/anubis/data"
. "github.com/TecharoHQ/anubis/lib/policy/config"
)
func p[V any](v V) *V { return &v }
func TestBotValid(t *testing.T) {
var tests = []struct {
bot BotConfig
err error
name string
}{
{
name: "simple user agent",
bot: BotConfig{
Name: "mozilla-ua",
Action: RuleChallenge,
UserAgentRegex: p("Mozilla"),
},
err: nil,
},
{
name: "simple path",
bot: BotConfig{
Name: "well-known-path",
Action: RuleAllow,
PathRegex: p("^/.well-known/.*$"),
},
err: nil,
},
{
name: "no rule name",
bot: BotConfig{
Action: RuleChallenge,
UserAgentRegex: p("Mozilla"),
},
err: ErrBotMustHaveName,
},
{
name: "no rule matcher",
bot: BotConfig{
Name: "broken-rule",
Action: RuleAllow,
},
err: ErrBotMustHaveUserAgentOrPath,
},
{
name: "both user-agent and path",
bot: BotConfig{
Name: "path-and-user-agent",
Action: RuleDeny,
UserAgentRegex: p("Mozilla"),
PathRegex: p("^/.secret-place/.*$"),
},
err: ErrBotMustHaveUserAgentOrPathNotBoth,
},
{
name: "unknown action",
bot: BotConfig{
Name: "Unknown action",
Action: RuleUnknown,
UserAgentRegex: p("Mozilla"),
},
err: ErrUnknownAction,
},
{
name: "invalid user agent regex",
bot: BotConfig{
Name: "mozilla-ua",
Action: RuleChallenge,
UserAgentRegex: p("a(b"),
},
err: ErrInvalidUserAgentRegex,
},
{
name: "invalid path regex",
bot: BotConfig{
Name: "mozilla-ua",
Action: RuleChallenge,
PathRegex: p("a(b"),
},
err: ErrInvalidPathRegex,
},
{
name: "invalid headers regex",
bot: BotConfig{
Name: "mozilla-ua",
Action: RuleChallenge,
HeadersRegex: map[string]string{
"Content-Type": "a(b",
},
PathRegex: p("a(b"),
},
err: ErrInvalidHeadersRegex,
},
{
name: "challenge difficulty too low",
bot: BotConfig{
Name: "mozilla-ua",
Action: RuleChallenge,
PathRegex: p("Mozilla"),
Challenge: &ChallengeRules{
Difficulty: -1,
ReportAs: 4,
Algorithm: "fast",
},
},
err: ErrChallengeDifficultyTooLow,
},
{
name: "challenge difficulty too high",
bot: BotConfig{
Name: "mozilla-ua",
Action: RuleChallenge,
PathRegex: p("Mozilla"),
Challenge: &ChallengeRules{
Difficulty: 420,
ReportAs: 4,
Algorithm: "fast",
},
},
err: ErrChallengeDifficultyTooHigh,
},
{
name: "invalid cidr range",
bot: BotConfig{
Name: "mozilla-ua",
Action: RuleAllow,
RemoteAddr: []string{"0.0.0.0/33"},
},
err: ErrInvalidCIDR,
},
{
name: "only filter by IP range",
bot: BotConfig{
Name: "mozilla-ua",
Action: RuleAllow,
RemoteAddr: []string{"0.0.0.0/0"},
},
err: nil,
},
{
name: "filter by user agent and IP range",
bot: BotConfig{
Name: "mozilla-ua",
Action: RuleAllow,
UserAgentRegex: p("Mozilla"),
RemoteAddr: []string{"0.0.0.0/0"},
},
err: nil,
},
{
name: "filter by path and IP range",
bot: BotConfig{
Name: "mozilla-ua",
Action: RuleAllow,
PathRegex: p("^.*$"),
RemoteAddr: []string{"0.0.0.0/0"},
},
err: nil,
},
{
name: "weight rule without weight",
bot: BotConfig{
Name: "weight-adjust-if-mozilla",
Action: RuleWeigh,
UserAgentRegex: p("Mozilla"),
},
},
{
name: "weight rule with weight adjust",
bot: BotConfig{
Name: "weight-adjust-if-mozilla",
Action: RuleWeigh,
UserAgentRegex: p("Mozilla"),
Weight: &Weight{
Adjust: 5,
},
},
},
}
for _, cs := range tests {
cs := cs
t.Run(cs.name, func(t *testing.T) {
err := cs.bot.Valid()
if err == nil && cs.err == nil {
return
}
if err == nil && cs.err != nil {
t.Errorf("didn't get an error, but wanted: %v", cs.err)
}
if !errors.Is(err, cs.err) {
t.Logf("got wrong error from Valid()")
t.Logf("wanted: %v", cs.err)
t.Logf("got: %v", err)
t.Errorf("got invalid error from check")
}
})
}
}
func TestConfigValidKnownGood(t *testing.T) {
finfos, err := os.ReadDir("testdata/good")
if err != nil {
t.Fatal(err)
}
for _, st := range finfos {
st := st
t.Run(st.Name(), func(t *testing.T) {
fin, err := os.Open(filepath.Join("testdata", "good", st.Name()))
if err != nil {
t.Fatal(err)
}
defer fin.Close()
c, err := Load(fin, st.Name())
if err != nil {
t.Fatal(err)
}
if err := c.Valid(); err != nil {
t.Error(err)
}
if len(c.Bots) == 0 {
t.Error("wanted more than 0 bots, got zero")
}
})
}
}
func TestImportStatement(t *testing.T) {
type testCase struct {
err error
name string
importPath string
}
var tests []testCase
for _, folderName := range []string{
"apps",
"bots",
"common",
"crawlers",
"meta",
} {
if err := fs.WalkDir(data.BotPolicies, folderName, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
if d.IsDir() {
return nil
}
if d.Name() == "README.md" {
return nil
}
tests = append(tests, testCase{
name: "(data)/" + path,
importPath: "(data)/" + path,
err: nil,
})
return nil
}); err != nil {
t.Fatal(err)
}
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
is := &ImportStatement{
Import: tt.importPath,
}
if err := is.Valid(); err != nil {
t.Errorf("validation error: %v", err)
}
if len(is.Bots) == 0 {
t.Error("wanted bot definitions, but got none")
}
})
}
}
func TestConfigValidBad(t *testing.T) {
finfos, err := os.ReadDir("testdata/bad")
if err != nil {
t.Fatal(err)
}
for _, st := range finfos {
st := st
t.Run(st.Name(), func(t *testing.T) {
fin, err := os.Open(filepath.Join("testdata", "bad", st.Name()))
if err != nil {
t.Fatal(err)
}
defer fin.Close()
_, err = Load(fin, filepath.Join("testdata", "bad", st.Name()))
if err == nil {
t.Fatal("validation should have failed but didn't somehow")
} else {
t.Log(err)
}
})
}
}
func TestBotConfigZero(t *testing.T) {
var b BotConfig
if !b.Zero() {
t.Error("zero value config.BotConfig is not zero value")
}
b.Name = "hi"
if b.Zero() {
t.Error("config.BotConfig with name is zero value")
}
b.UserAgentRegex = p(".*")
if b.Zero() {
t.Error("config.BotConfig with user agent regex is zero value")
}
b.PathRegex = p(".*")
if b.Zero() {
t.Error("config.BotConfig with path regex is zero value")
}
b.HeadersRegex = map[string]string{"hi": "there"}
if b.Zero() {
t.Error("config.BotConfig with headers regex is zero value")
}
b.Action = RuleAllow
if b.Zero() {
t.Error("config.BotConfig with action is zero value")
}
b.RemoteAddr = []string{"::/0"}
if b.Zero() {
t.Error("config.BotConfig with remote addresses is zero value")
}
b.Challenge = &ChallengeRules{
Difficulty: 4,
ReportAs: 4,
Algorithm: DefaultAlgorithm,
}
if b.Zero() {
t.Error("config.BotConfig with challenge rules is zero value")
}
}

View file

@ -1,130 +0,0 @@
package config
import (
"encoding/json"
"errors"
"fmt"
"slices"
"strings"
)
var (
ErrExpressionOrListMustBeStringOrObject = errors.New("config: this must be a string or an object")
ErrExpressionEmpty = errors.New("config: this expression is empty")
ErrExpressionCantHaveBoth = errors.New("config: expression block can't contain multiple expression types")
)
type ExpressionOrList struct {
Expression string `json:"-" yaml:"-"`
All []string `json:"all,omitempty" yaml:"all,omitempty"`
Any []string `json:"any,omitempty" yaml:"any,omitempty"`
}
func (eol ExpressionOrList) String() string {
switch {
case len(eol.Expression) != 0:
return eol.Expression
case len(eol.All) != 0:
var sb strings.Builder
for i, pred := range eol.All {
if i != 0 {
fmt.Fprintf(&sb, " && ")
}
fmt.Fprintf(&sb, "( %s )", pred)
}
return sb.String()
case len(eol.Any) != 0:
var sb strings.Builder
for i, pred := range eol.Any {
if i != 0 {
fmt.Fprintf(&sb, " || ")
}
fmt.Fprintf(&sb, "( %s )", pred)
}
return sb.String()
}
panic("this should not happen")
}
func (eol ExpressionOrList) Equal(rhs *ExpressionOrList) bool {
if eol.Expression != rhs.Expression {
return false
}
if !slices.Equal(eol.All, rhs.All) {
return false
}
if !slices.Equal(eol.Any, rhs.Any) {
return false
}
return true
}
func (eol *ExpressionOrList) MarshalYAML() (any, error) {
switch {
case len(eol.All) == 1 && len(eol.Any) == 0:
eol.Expression = eol.All[0]
eol.All = nil
case len(eol.Any) == 1 && len(eol.All) == 0:
eol.Expression = eol.Any[0]
eol.Any = nil
}
if eol.Expression != "" {
return eol.Expression, nil
}
type RawExpressionOrList ExpressionOrList
return RawExpressionOrList(*eol), nil
}
func (eol *ExpressionOrList) MarshalJSON() ([]byte, error) {
switch {
case len(eol.All) == 1 && len(eol.Any) == 0:
eol.Expression = eol.All[0]
eol.All = nil
case len(eol.Any) == 1 && len(eol.All) == 0:
eol.Expression = eol.Any[0]
eol.Any = nil
}
if eol.Expression != "" {
return json.Marshal(string(eol.Expression))
}
type RawExpressionOrList ExpressionOrList
val := RawExpressionOrList(*eol)
return json.Marshal(val)
}
func (eol *ExpressionOrList) UnmarshalJSON(data []byte) error {
switch string(data[0]) {
case `"`: // string
return json.Unmarshal(data, &eol.Expression)
case "{": // object
type RawExpressionOrList ExpressionOrList
var val RawExpressionOrList
if err := json.Unmarshal(data, &val); err != nil {
return err
}
eol.All = val.All
eol.Any = val.Any
return nil
}
return ErrExpressionOrListMustBeStringOrObject
}
func (eol *ExpressionOrList) Valid() error {
if eol.Expression == "" && len(eol.All) == 0 && len(eol.Any) == 0 {
return ErrExpressionEmpty
}
if len(eol.All) != 0 && len(eol.Any) != 0 {
return ErrExpressionCantHaveBoth
}
return nil
}

View file

@ -1,266 +0,0 @@
package config
import (
"bytes"
"encoding/json"
"errors"
"testing"
yaml "sigs.k8s.io/yaml/goyaml.v3"
)
func TestExpressionOrListMarshalJSON(t *testing.T) {
for _, tt := range []struct {
err error
input *ExpressionOrList
name string
output []byte
}{
{
name: "single expression",
input: &ExpressionOrList{
Expression: "true",
},
output: []byte(`"true"`),
err: nil,
},
{
name: "all",
input: &ExpressionOrList{
All: []string{"true", "true"},
},
output: []byte(`{"all":["true","true"]}`),
err: nil,
},
{
name: "all one",
input: &ExpressionOrList{
All: []string{"true"},
},
output: []byte(`"true"`),
err: nil,
},
{
name: "any",
input: &ExpressionOrList{
Any: []string{"true", "false"},
},
output: []byte(`{"any":["true","false"]}`),
err: nil,
},
{
name: "any one",
input: &ExpressionOrList{
Any: []string{"true"},
},
output: []byte(`"true"`),
err: nil,
},
} {
t.Run(tt.name, func(t *testing.T) {
result, err := json.Marshal(tt.input)
if !errors.Is(err, tt.err) {
t.Errorf("wanted marshal error: %v but got: %v", tt.err, err)
}
if !bytes.Equal(result, tt.output) {
t.Logf("wanted: %s", string(tt.output))
t.Logf("got: %s", string(result))
t.Error("mismatched output")
}
})
}
}
func TestExpressionOrListMarshalYAML(t *testing.T) {
for _, tt := range []struct {
err error
input *ExpressionOrList
name string
output []byte
}{
{
name: "single expression",
input: &ExpressionOrList{
Expression: "true",
},
output: []byte(`"true"`),
err: nil,
},
{
name: "all",
input: &ExpressionOrList{
All: []string{"true", "true"},
},
output: []byte(`all:
- "true"
- "true"`),
err: nil,
},
{
name: "all one",
input: &ExpressionOrList{
All: []string{"true"},
},
output: []byte(`"true"`),
err: nil,
},
{
name: "any",
input: &ExpressionOrList{
Any: []string{"true", "false"},
},
output: []byte(`any:
- "true"
- "false"`),
err: nil,
},
{
name: "any one",
input: &ExpressionOrList{
Any: []string{"true"},
},
output: []byte(`"true"`),
err: nil,
},
} {
t.Run(tt.name, func(t *testing.T) {
result, err := yaml.Marshal(tt.input)
if !errors.Is(err, tt.err) {
t.Errorf("wanted marshal error: %v but got: %v", tt.err, err)
}
result = bytes.TrimSpace(result)
if !bytes.Equal(result, tt.output) {
t.Logf("wanted: %q", string(tt.output))
t.Logf("got: %q", string(result))
t.Error("mismatched output")
}
})
}
}
func TestExpressionOrListUnmarshalJSON(t *testing.T) {
for _, tt := range []struct {
err error
validErr error
result *ExpressionOrList
name string
inp string
}{
{
name: "simple",
inp: `"\"User-Agent\" in headers"`,
result: &ExpressionOrList{
Expression: `"User-Agent" in headers`,
},
},
{
name: "object-and",
inp: `{
"all": ["\"User-Agent\" in headers"]
}`,
result: &ExpressionOrList{
All: []string{
`"User-Agent" in headers`,
},
},
},
{
name: "object-or",
inp: `{
"any": ["\"User-Agent\" in headers"]
}`,
result: &ExpressionOrList{
Any: []string{
`"User-Agent" in headers`,
},
},
},
{
name: "both-or-and",
inp: `{
"all": ["\"User-Agent\" in headers"],
"any": ["\"User-Agent\" in headers"]
}`,
validErr: ErrExpressionCantHaveBoth,
},
{
name: "expression-empty",
inp: `{
"any": []
}`,
validErr: ErrExpressionEmpty,
},
} {
t.Run(tt.name, func(t *testing.T) {
var eol ExpressionOrList
if err := json.Unmarshal([]byte(tt.inp), &eol); !errors.Is(err, tt.err) {
t.Errorf("wanted unmarshal error: %v but got: %v", tt.err, err)
}
if tt.result != nil && !eol.Equal(tt.result) {
t.Logf("wanted: %#v", tt.result)
t.Logf("got: %#v", &eol)
t.Fatal("parsed expression is not what was expected")
}
if err := eol.Valid(); !errors.Is(err, tt.validErr) {
t.Errorf("wanted validation error: %v but got: %v", tt.err, err)
}
})
}
}
func TestExpressionOrListString(t *testing.T) {
for _, tt := range []struct {
name string
out string
in ExpressionOrList
}{
{
name: "single expression",
in: ExpressionOrList{
Expression: "true",
},
out: "true",
},
{
name: "all",
in: ExpressionOrList{
All: []string{"true"},
},
out: "( true )",
},
{
name: "all with &&",
in: ExpressionOrList{
All: []string{"true", "true"},
},
out: "( true ) && ( true )",
},
{
name: "any",
in: ExpressionOrList{
All: []string{"true"},
},
out: "( true )",
},
{
name: "any with ||",
in: ExpressionOrList{
Any: []string{"true", "true"},
},
out: "( true ) || ( true )",
},
} {
t.Run(tt.name, func(t *testing.T) {
result := tt.in.String()
if result != tt.out {
t.Errorf("wanted %q, got: %q", tt.out, result)
}
})
}
}

View file

@ -1,36 +0,0 @@
package config
import (
"errors"
"fmt"
"regexp"
"strings"
)
var (
countryCodeRegexp = regexp.MustCompile(`^[a-zA-Z]{2}$`)
ErrNotCountryCode = errors.New("config.Bot: invalid country code")
)
type GeoIP struct {
Countries []string `json:"countries"`
}
func (g *GeoIP) Valid() error {
var errs []error
for i, cc := range g.Countries {
if !countryCodeRegexp.MatchString(cc) {
errs = append(errs, fmt.Errorf("%w: %s", ErrNotCountryCode, cc))
}
g.Countries[i] = strings.ToLower(cc)
}
if len(errs) != 0 {
return fmt.Errorf("bot.GeoIP: invalid GeoIP settings: %w", errors.Join(errs...))
}
return nil
}

View file

@ -1,36 +0,0 @@
package config
import (
"errors"
"testing"
)
func TestGeoIPValid(t *testing.T) {
for _, tt := range []struct {
err error
input *GeoIP
name string
}{
{
name: "basic valid",
input: &GeoIP{
Countries: []string{"CA"},
},
},
{
name: "invalid country",
input: &GeoIP{
Countries: []string{"XOB"},
},
err: ErrNotCountryCode,
},
} {
t.Run(tt.name, func(t *testing.T) {
if err := tt.input.Valid(); !errors.Is(err, tt.err) {
t.Logf("want: %v", tt.err)
t.Logf("got: %v", err)
t.Error("got wrong validation error")
}
})
}
}

View file

@ -1,71 +0,0 @@
package config
import (
"context"
"errors"
"fmt"
"io"
)
var ErrMissingValue = errors.New("config: missing value")
type Impressum struct {
Footer string `json:"footer" yaml:"footer"`
Page ImpressumPage `json:"page" yaml:"page"`
}
func (i Impressum) Render(_ context.Context, w io.Writer) error {
if _, err := fmt.Fprint(w, i.Footer); err != nil {
return err
}
return nil
}
func (i Impressum) Valid() error {
var errs []error
if len(i.Footer) == 0 {
errs = append(errs, fmt.Errorf("%w: impressum footer must be defined", ErrMissingValue))
}
if err := i.Page.Valid(); err != nil {
errs = append(errs, err)
}
if len(errs) != 0 {
return errors.Join(errs...)
}
return nil
}
type ImpressumPage struct {
Title string `json:"title" yaml:"title"`
Body string `json:"body" yaml:"body"`
}
func (ip ImpressumPage) Render(_ context.Context, w io.Writer) error {
if _, err := fmt.Fprint(w, ip.Body); err != nil {
return err
}
return nil
}
func (ip ImpressumPage) Valid() error {
var errs []error
if len(ip.Title) == 0 {
errs = append(errs, fmt.Errorf("%w: impressum page title must be defined", ErrMissingValue))
}
if len(ip.Body) == 0 {
errs = append(errs, fmt.Errorf("%w: impressum body title must be defined", ErrMissingValue))
}
if len(errs) != 0 {
return errors.Join(errs...)
}
return nil
}

View file

@ -1,62 +0,0 @@
package config
import (
"bytes"
"errors"
"testing"
)
func TestImpressumValid(t *testing.T) {
for _, cs := range []struct {
err error
inp Impressum
name string
}{
{
name: "basic happy path",
inp: Impressum{
Footer: "<p>Website hosted by Techaro.<p>",
Page: ImpressumPage{
Title: "Techaro Imprint",
Body: "<p>This is an imprint page.</p>",
},
},
err: nil,
},
{
name: "no footer",
inp: Impressum{
Footer: "",
Page: ImpressumPage{
Title: "Techaro Imprint",
Body: "<p>This is an imprint page.</p>",
},
},
err: ErrMissingValue,
},
{
name: "page not valid",
inp: Impressum{
Footer: "test page please ignore",
},
err: ErrMissingValue,
},
} {
t.Run(cs.name, func(t *testing.T) {
if err := cs.inp.Valid(); !errors.Is(err, cs.err) {
t.Logf("want: %v", cs.err)
t.Logf("got: %v", err)
t.Error("validation failed")
}
var buf bytes.Buffer
if err := cs.inp.Render(t.Context(), &buf); err != nil {
t.Errorf("can't render footer: %v", err)
}
if err := cs.inp.Page.Render(t.Context(), &buf); err != nil {
t.Errorf("can't render page: %v", err)
}
})
}
}

View file

@ -1,51 +0,0 @@
package config
import (
"errors"
"fmt"
"time"
)
var (
ErrInvalidOpenGraphConfig = errors.New("config.OpenGraph: invalid OpenGraph configuration")
ErrOpenGraphTTLDoesNotParse = errors.New("config.OpenGraph: ttl does not parse as a Duration, see https://pkg.go.dev/time#ParseDuration (formatted like 5m -> 5 minutes, 2h -> 2 hours, etc)")
ErrOpenGraphMissingProperty = errors.New("config.OpenGraph: default opengraph tags missing a property")
)
type openGraphFileConfig struct {
Override map[string]string `json:"override,omitempty" yaml:"override,omitempty"`
TimeToLive string `json:"ttl" yaml:"ttl"`
Enabled bool `json:"enabled" yaml:"enabled"`
ConsiderHost bool `json:"considerHost" yaml:"enabled"`
}
type OpenGraph struct {
Override map[string]string `json:"override,omitempty" yaml:"override,omitempty"`
TimeToLive time.Duration `json:"ttl" yaml:"ttl"`
Enabled bool `json:"enabled" yaml:"enabled"`
ConsiderHost bool `json:"considerHost" yaml:"enabled"`
}
func (og *openGraphFileConfig) Valid() error {
var errs []error
if _, err := time.ParseDuration(og.TimeToLive); err != nil {
errs = append(errs, fmt.Errorf("%w: ParseDuration(%q) returned: %w", ErrOpenGraphTTLDoesNotParse, og.TimeToLive, err))
}
if len(og.Override) != 0 {
for _, tag := range []string{
"og:title",
} {
if _, ok := og.Override[tag]; !ok {
errs = append(errs, fmt.Errorf("%w: %s", ErrOpenGraphMissingProperty, tag))
}
}
}
if len(errs) != 0 {
return errors.Join(ErrInvalidOpenGraphConfig, errors.Join(errs...))
}
return nil
}

View file

@ -1,67 +0,0 @@
package config
import (
"errors"
"testing"
)
func TestOpenGraphFileConfigValid(t *testing.T) {
for _, tt := range []struct {
err error
input *openGraphFileConfig
name string
}{
{
name: "basic happy path",
input: &openGraphFileConfig{
Enabled: true,
ConsiderHost: false,
TimeToLive: "1h",
Override: map[string]string{},
},
err: nil,
},
{
name: "basic happy path with default",
input: &openGraphFileConfig{
Enabled: true,
ConsiderHost: false,
TimeToLive: "1h",
Override: map[string]string{
"og:title": "foobar",
},
},
err: nil,
},
{
name: "invalid time duration",
input: &openGraphFileConfig{
Enabled: true,
ConsiderHost: false,
TimeToLive: "taco",
Override: map[string]string{},
},
err: ErrOpenGraphTTLDoesNotParse,
},
{
name: "missing og:title in defaults",
input: &openGraphFileConfig{
Enabled: true,
ConsiderHost: false,
TimeToLive: "1h",
Override: map[string]string{
"description": "foobar",
},
},
err: ErrOpenGraphMissingProperty,
},
} {
t.Run(tt.name, func(t *testing.T) {
if err := tt.input.Valid(); !errors.Is(err, tt.err) {
t.Logf("wanted error: %v", tt.err)
t.Logf("got error: %v", err)
t.Error("validation failed")
}
})
}
}

View file

@ -1,44 +0,0 @@
package config
import (
"encoding/json"
"errors"
"fmt"
"github.com/TecharoHQ/anubis/lib/store"
_ "github.com/TecharoHQ/anubis/lib/store/all"
)
var (
ErrNoStoreBackend = errors.New("config.Store: no backend defined")
ErrUnknownStoreBackend = errors.New("config.Store: unknown backend")
)
type Store struct {
Backend string `json:"backend"`
Parameters json.RawMessage `json:"parameters"`
}
func (s *Store) Valid() error {
var errs []error
if len(s.Backend) == 0 {
errs = append(errs, ErrNoStoreBackend)
}
fac, ok := store.Get(s.Backend)
switch ok {
case true:
if err := fac.Valid(s.Parameters); err != nil {
errs = append(errs, err)
}
case false:
errs = append(errs, fmt.Errorf("%w: %q", ErrUnknownStoreBackend, s.Backend))
}
if len(errs) != 0 {
return errors.Join(errs...)
}
return nil
}

View file

@ -1,84 +0,0 @@
package config_test
import (
"encoding/json"
"errors"
"testing"
"github.com/TecharoHQ/anubis/lib/policy/config"
"github.com/TecharoHQ/anubis/lib/store/bbolt"
"github.com/TecharoHQ/anubis/lib/store/valkey"
)
func TestStoreValid(t *testing.T) {
for _, tt := range []struct {
err error
name string
input config.Store
}{
{
name: "no backend",
input: config.Store{},
err: config.ErrNoStoreBackend,
},
{
name: "in-memory backend",
input: config.Store{
Backend: "memory",
},
},
{
name: "bbolt backend",
input: config.Store{
Backend: "bbolt",
Parameters: json.RawMessage(`{"path": "/tmp/foo", "bucket": "bar"}`),
},
},
{
name: "valkey backend",
input: config.Store{
Backend: "valkey",
Parameters: json.RawMessage(`{"url": "redis://valkey:6379/0"}`),
},
},
{
name: "valkey backend no URL",
input: config.Store{
Backend: "valkey",
Parameters: json.RawMessage(`{}`),
},
err: valkey.ErrNoURL,
},
{
name: "valkey backend bad URL",
input: config.Store{
Backend: "valkey",
Parameters: json.RawMessage(`{"url": "http://anubis.techaro.lol"}`),
},
err: valkey.ErrBadURL,
},
{
name: "bbolt backend no path",
input: config.Store{
Backend: "bbolt",
Parameters: json.RawMessage(`{"path": "", "bucket": "bar"}`),
},
err: bbolt.ErrMissingPath,
},
{
name: "unknown backend",
input: config.Store{
Backend: "taco salad",
},
err: config.ErrUnknownStoreBackend,
},
} {
t.Run(tt.name, func(t *testing.T) {
if err := tt.input.Valid(); !errors.Is(err, tt.err) {
t.Logf("want: %v", tt.err)
t.Logf("got: %v", err)
t.Error("invalid error returned")
}
})
}
}

View file

@ -1,21 +0,0 @@
{
"bots": [
{
"name": "path-bad",
"path_regex": "a(b",
"action": "DENY"
},
{
"name": "user-agent-bad",
"user_agent_regex": "a(b",
"action": "DENY"
},
{
"name": "headers-bad",
"headers": {
"Accept-Encoding": "a(b"
},
"action": "DENY"
}
]
}

View file

@ -1,7 +0,0 @@
bots:
- name: path-bad
path_regex: "a(b"
action: DENY
- name: user-agent-bad
user_agent_regex: "a(b"
action: DENY

View file

@ -1,10 +0,0 @@
{
"bots": [
{
"import": "(data)/bots/ai-catchall.yaml",
"name": "generic-browser",
"user_agent_regex": "Mozilla|Opera\n",
"action": "CHALLENGE"
}
]
}

View file

@ -1,6 +0,0 @@
bots:
- import: (data)/bots/ai-catchall.yaml
name: generic-browser
user_agent_regex: >
Mozilla|Opera
action: CHALLENGE

View file

@ -1,7 +0,0 @@
{
"bots": [
{
"import": "(data)/does-not-exist-fake-file.yaml"
}
]
}

View file

@ -1,2 +0,0 @@
bots:
- import: (data)/does-not-exist-fake-file.yaml

View file

@ -1,11 +0,0 @@
bots:
- name: simple-weight-adjust
action: WEIGH
user_agent_regex: Mozilla
weight:
adjust: 5
impressum:
page:
title: Test
body: <p>This is a test</p>

View file

@ -1,10 +0,0 @@
bots:
- name: simple-weight-adjust
action: WEIGH
user_agent_regex: Mozilla
weight:
adjust: 5
impressum:
footer: "Hi there these are WORDS on the INTERNET."
page: {}

View file

@ -1,5 +0,0 @@
{
"bots": [
{}
]
}

View file

@ -1 +0,0 @@
bots: []

View file

@ -1,17 +0,0 @@
{
"bots": [
{
"name": "multiple-expression-types",
"action": "ALLOW",
"expression": {
"all": [
"userAgent.startsWith(\"git/\") || userAgent.contains(\"libgit\")",
"\"Git-Protocol\" in headers && headers[\"Git-Protocol\"] == \"version=2\"\n"
],
"any": [
"userAgent.startsWith(\"evilbot/\")"
]
}
}
]
}

View file

@ -1,10 +0,0 @@
bots:
- name: multiple-expression-types
action: ALLOW
expression:
all:
- userAgent.startsWith("git/") || userAgent.contains("libgit")
- >
"Git-Protocol" in headers && headers["Git-Protocol"] == "version=2"
any:
- userAgent.startsWith("evilbot/")

View file

@ -1 +0,0 @@
{}

View file

@ -1 +0,0 @@
{}

View file

@ -1,12 +0,0 @@
bots:
- name: everything
user_agent_regex: .*
action: DENY
openGraph:
enabled: true
considerHost: false
ttl: taco
default:
"og:title": "Xe's magic land of fun"
"og:description": "We're no strangers to love, you know the rules and so do I"

View file

@ -1,21 +0,0 @@
{
"bots": [
{
"name": "user-agent-ends-newline",
"user_agent_regex": "Mozilla\n",
"action": "CHALLENGE"
},
{
"name": "path-ends-newline",
"path_regex": "^/evil/.*$\n",
"action": "CHALLENGE"
},
{
"name": "headers-ends-newline",
"headers_regex": {
"CF-Worker": ".*\n"
},
"action": "CHALLENGE"
}
]
}

View file

@ -1,17 +0,0 @@
bots:
- name: user-agent-ends-newline
# Subtle bug: this ends with a newline
user_agent_regex: >
Mozilla
action: CHALLENGE
- name: path-ends-newline
# Subtle bug: this ends with a newline
path_regex: >
^/evil/.*$
action: CHALLENGE
- name: headers-ends-newline
# Subtle bug: this ends with a newline
headers_regex:
CF-Worker: >
.*
action: CHALLENGE

View file

@ -1,13 +0,0 @@
{
"bots": [
{
"name": "everything",
"user_agent_regex": ".*",
"action": "DENY"
}
],
"status_codes": {
"CHALLENGE": 0,
"DENY": 0
}
}

View file

@ -1,8 +0,0 @@
bots:
- name: everything
user_agent_regex: .*
action: DENY
status_codes:
CHALLENGE: 0
DENY: 0

View file

@ -1,11 +0,0 @@
bots:
- name: simple-weight-adjust
action: WEIGH
user_agent_regex: Mozilla
weight:
adjust: 5
thresholds:
- name: extreme-suspicion
expression: "true"
action: WEIGH

View file

@ -1,15 +0,0 @@
bots:
- name: simple-weight-adjust
action: WEIGH
user_agent_regex: Mozilla
weight:
adjust: 5
thresholds:
- name: extreme-suspicion
expression: "true"
action: WEIGH
challenge:
algorithm: fast
difficulty: 4
report_as: 4

View file

@ -1 +0,0 @@
}

View file

@ -1 +0,0 @@
}

View file

@ -1,12 +0,0 @@
{
"bots": [
{
"name": "everyones-invited",
"remote_addresses": [
"0.0.0.0/0",
"::/0"
],
"action": "ALLOW"
}
]
}

View file

@ -1,6 +0,0 @@
bots:
- name: everyones-invited
remote_addresses:
- "0.0.0.0/0"
- "::/0"
action: ALLOW

View file

@ -1,12 +0,0 @@
{
"bots": [
{
"name": "Cloudflare Workers",
"headers_regex": {
"CF-Worker": ".*"
},
"action": "DENY"
}
],
"dnsbl": false
}

View file

@ -1,5 +0,0 @@
bots:
- name: cloudflare-workers
headers_regex:
CF-Worker: .*
action: DENY

View file

@ -1,6 +0,0 @@
bots:
- name: challenge-cloudflare
action: CHALLENGE
asns:
match:
- 13335 # Cloudflare

View file

@ -1,9 +0,0 @@
{
"bots": [
{
"name": "generic-browser",
"user_agent_regex": "Mozilla",
"action": "CHALLENGE"
}
]
}

View file

@ -1,4 +0,0 @@
bots:
- name: generic-browser
user_agent_regex: Mozilla
action: CHALLENGE

View file

@ -1,8 +0,0 @@
bots:
- name: total-randomness
action: ALLOW
expression:
all:
- '"Accept" in headers'
- headers["Accept"].contains("text/html")
- randInt(1) == 0

View file

@ -1,10 +0,0 @@
{
"bots": [
{
"name": "everything",
"user_agent_regex": ".*",
"action": "DENY"
}
],
"dnsbl": false
}

View file

@ -1,4 +0,0 @@
bots:
- name: everything
user_agent_regex: .*
action: DENY

View file

@ -1,6 +0,0 @@
bots:
- name: compute-tarrif-us
action: CHALLENGE
geoip:
countries:
- US

View file

@ -1,14 +0,0 @@
{
"bots": [
{
"name": "allow-git-clients",
"action": "ALLOW",
"expression": {
"all": [
"userAgent.startsWith(\"git/\") || userAgent.contains(\"libgit\")",
"\"Git-Protocol\" in headers && headers[\"Git-Protocol\"] == \"version=2\""
]
}
}
]
}

View file

@ -1,8 +0,0 @@
bots:
- name: allow-git-clients
action: ALLOW
expression:
all:
- userAgent.startsWith("git/") || userAgent.contains("libgit")
- >
"Git-Protocol" in headers && headers["Git-Protocol"] == "version=2"

View file

@ -1,7 +0,0 @@
{
"bots": [
{
"import": "./testdata/hack-test.json"
}
]
}

View file

@ -1,2 +0,0 @@
bots:
- import: ./testdata/hack-test.yaml

View file

@ -1,7 +0,0 @@
{
"bots": [
{
"import": "(data)/common/keep-internet-working.yaml"
}
]
}

View file

@ -1,2 +0,0 @@
bots:
- import: (data)/common/keep-internet-working.yaml

View file

@ -1,10 +0,0 @@
bots:
- name: simple
action: CHALLENGE
user_agent_regex: Mozilla
impressum:
footer: "Hi these are WORDS on the INTERNET."
page:
title: Test
body: <p>This is a test</p>

View file

@ -1,8 +0,0 @@
bots:
- name: simple-weight-adjust
action: WEIGH
user_agent_regex: Mozilla
weight:
adjust: 5
thresholds: []

View file

@ -1,79 +0,0 @@
{
"bots": [
{
"name": "amazonbot",
"user_agent_regex": "Amazonbot",
"action": "DENY"
},
{
"name": "googlebot",
"user_agent_regex": "\\+http\\:\\/\\/www\\.google\\.com/bot\\.html",
"action": "ALLOW"
},
{
"name": "bingbot",
"user_agent_regex": "\\+http\\:\\/\\/www\\.bing\\.com/bingbot\\.htm",
"action": "ALLOW"
},
{
"name": "qwantbot",
"user_agent_regex": "\\+https\\:\\/\\/help\\.qwant\\.com/bot/",
"action": "ALLOW"
},
{
"name": "discordbot",
"user_agent_regex": "Discordbot/2\\.0; \\+https\\:\\/\\/discordapp\\.com",
"action": "ALLOW"
},
{
"name": "blueskybot",
"user_agent_regex": "Bluesky Cardyb",
"action": "ALLOW"
},
{
"name": "us-artificial-intelligence-scraper",
"user_agent_regex": "\\+https\\:\\/\\/github\\.com\\/US-Artificial-Intelligence\\/scraper",
"action": "DENY"
},
{
"name": "well-known",
"path_regex": "^/.well-known/.*$",
"action": "ALLOW"
},
{
"name": "favicon",
"path_regex": "^/favicon.ico$",
"action": "ALLOW"
},
{
"name": "robots-txt",
"path_regex": "^/robots.txt$",
"action": "ALLOW"
},
{
"name": "rss-readers",
"path_regex": ".*\\.(rss|xml|atom|json)$",
"action": "ALLOW"
},
{
"name": "lightpanda",
"user_agent_regex": "^Lightpanda/.*$",
"action": "DENY"
},
{
"name": "headless-chrome",
"user_agent_regex": "HeadlessChrome",
"action": "DENY"
},
{
"name": "headless-chromium",
"user_agent_regex": "HeadlessChromium",
"action": "DENY"
},
{
"name": "generic-browser",
"user_agent_regex": "Mozilla",
"action": "CHALLENGE"
}
]
}

View file

@ -1,12 +0,0 @@
bots:
- name: everything
user_agent_regex: .*
action: DENY
openGraph:
enabled: true
considerHost: false
ttl: 1h
default:
"og:title": "Xe's magic land of fun"
"og:description": "We're no strangers to love, you know the rules and so do I"

View file

@ -1,6 +0,0 @@
bots:
- name: simple-weight-adjust
action: WEIGH
user_agent_regex: Mozilla
weight:
adjust: 5

View file

@ -1,13 +0,0 @@
{
"bots": [
{
"name": "everything",
"user_agent_regex": ".*",
"action": "DENY"
}
],
"status_codes": {
"CHALLENGE": 200,
"DENY": 200
}
}

View file

@ -1,8 +0,0 @@
bots:
- name: everything
user_agent_regex: .*
action: DENY
status_codes:
CHALLENGE: 200
DENY: 200

View file

@ -1,13 +0,0 @@
{
"bots": [
{
"name": "everything",
"user_agent_regex": ".*",
"action": "DENY"
}
],
"status_codes": {
"CHALLENGE": 403,
"DENY": 403
}
}

View file

@ -1,8 +0,0 @@
bots:
- name: everything
user_agent_regex: .*
action: DENY
status_codes:
CHALLENGE: 403
DENY: 403

View file

@ -1,38 +0,0 @@
bots:
- name: simple-weight-adjust
action: WEIGH
user_agent_regex: Mozilla
weight:
adjust: 5
thresholds:
- name: minimal-suspicion
expression: weight < 0
action: ALLOW
- name: mild-suspicion
expression:
all:
- weight >= 0
- weight < 10
action: CHALLENGE
challenge:
algorithm: metarefresh
difficulty: 1
report_as: 1
- name: moderate-suspicion
expression:
all:
- weight >= 10
- weight < 20
action: CHALLENGE
challenge:
algorithm: fast
difficulty: 2
report_as: 2
- name: extreme-suspicion
expression: weight >= 20
action: CHALLENGE
challenge:
algorithm: fast
difficulty: 4
report_as: 4

View file

@ -1,4 +0,0 @@
bots:
- name: weight
action: WEIGH
user_agent_regex: Mozilla

View file

@ -1,9 +0,0 @@
[
{
"name": "ipv6-ula",
"action": "ALLOW",
"remote_addresses": [
"fc00::/7"
]
}
]

View file

@ -1,3 +0,0 @@
- name: well-known
path_regex: ^/.well-known/.*$
action: ALLOW

View file

@ -1,80 +0,0 @@
package config
import (
"errors"
"fmt"
"github.com/TecharoHQ/anubis"
)
var (
ErrNoThresholdRulesDefined = errors.New("config: no thresholds defined")
ErrThresholdMustHaveName = errors.New("config.Threshold: must set name")
ErrThresholdMustHaveExpression = errors.New("config.Threshold: must set expression")
ErrThresholdChallengeMustHaveChallenge = errors.New("config.Threshold: a threshold with the CHALLENGE action must have challenge set")
ErrThresholdCannotHaveWeighAction = errors.New("config.Threshold: a threshold cannot have the WEIGH action")
DefaultThresholds = []Threshold{
{
Name: "legacy-anubis-behaviour",
Expression: &ExpressionOrList{
Expression: "weight > 0",
},
Action: RuleChallenge,
Challenge: &ChallengeRules{
Algorithm: "fast",
Difficulty: anubis.DefaultDifficulty,
ReportAs: anubis.DefaultDifficulty,
},
},
}
)
type Threshold struct {
Expression *ExpressionOrList `json:"expression" yaml:"expression"`
Challenge *ChallengeRules `json:"challenge" yaml:"challenge"`
Name string `json:"name" yaml:"name"`
Action Rule `json:"action" yaml:"action"`
}
func (t Threshold) Valid() error {
var errs []error
if len(t.Name) == 0 {
errs = append(errs, ErrThresholdMustHaveName)
}
if t.Expression == nil {
errs = append(errs, ErrThresholdMustHaveExpression)
}
if t.Expression != nil {
if err := t.Expression.Valid(); err != nil {
errs = append(errs, err)
}
}
if err := t.Action.Valid(); err != nil {
errs = append(errs, err)
}
if t.Action == RuleWeigh {
errs = append(errs, ErrThresholdCannotHaveWeighAction)
}
if t.Action == RuleChallenge && t.Challenge == nil {
errs = append(errs, ErrThresholdChallengeMustHaveChallenge)
}
if t.Challenge != nil {
if err := t.Challenge.Valid(); err != nil {
errs = append(errs, err)
}
}
if len(errs) != 0 {
return fmt.Errorf("config: threshold entry for %q is not valid:\n%w", t.Name, errors.Join(errs...))
}
return nil
}

View file

@ -1,111 +0,0 @@
package config
import (
"errors"
"fmt"
"os"
"path/filepath"
"testing"
)
func TestThresholdValid(t *testing.T) {
for _, tt := range []struct {
err error
input *Threshold
name string
}{
{
name: "basic allow",
input: &Threshold{
Name: "basic-allow",
Expression: &ExpressionOrList{Expression: "true"},
Action: RuleAllow,
},
err: nil,
},
{
name: "basic challenge",
input: &Threshold{
Name: "basic-challenge",
Expression: &ExpressionOrList{Expression: "true"},
Action: RuleChallenge,
Challenge: &ChallengeRules{
Algorithm: "fast",
Difficulty: 1,
ReportAs: 1,
},
},
err: nil,
},
{
name: "no name",
input: &Threshold{},
err: ErrThresholdMustHaveName,
},
{
name: "no expression",
input: &Threshold{},
err: ErrThresholdMustHaveName,
},
{
name: "invalid expression",
input: &Threshold{
Expression: &ExpressionOrList{},
},
err: ErrExpressionEmpty,
},
{
name: "invalid action",
input: &Threshold{},
err: ErrUnknownAction,
},
{
name: "challenge action but no challenge",
input: &Threshold{
Action: RuleChallenge,
},
err: ErrThresholdChallengeMustHaveChallenge,
},
{
name: "challenge invalid",
input: &Threshold{
Action: RuleChallenge,
Challenge: &ChallengeRules{Difficulty: -1, ReportAs: -1},
},
err: ErrChallengeDifficultyTooLow,
},
} {
t.Run(tt.name, func(t *testing.T) {
if err := tt.input.Valid(); !errors.Is(err, tt.err) {
t.Errorf("threshold is invalid: %v", err)
}
})
}
}
func TestDefaultThresholdsValid(t *testing.T) {
for i, th := range DefaultThresholds {
t.Run(fmt.Sprintf("%d %s", i, th.Name), func(t *testing.T) {
if err := th.Valid(); err != nil {
t.Errorf("threshold invalid: %v", err)
}
})
}
}
func TestLoadActuallyLoadsThresholds(t *testing.T) {
fin, err := os.Open(filepath.Join(".", "testdata", "good", "thresholds.yaml"))
if err != nil {
t.Fatal(err)
}
defer fin.Close()
c, err := Load(fin, fin.Name())
if err != nil {
t.Fatal(err)
}
if len(c.Thresholds) != 4 {
t.Errorf("wanted 4 thresholds, got %d thresholds", len(c.Thresholds))
}
}

View file

@ -1,5 +0,0 @@
package config
type Weight struct {
Adjust int `json:"adjust" yaml:"adjust"`
}

View file

@ -6,12 +6,16 @@ import (
"fmt"
"io"
"log/slog"
"os"
"sync/atomic"
"time"
"github.com/TecharoHQ/anubis/internal"
"github.com/TecharoHQ/anubis/lib/config"
"github.com/TecharoHQ/anubis/lib/policy/checker"
"github.com/TecharoHQ/anubis/lib/policy/config"
"github.com/TecharoHQ/anubis/lib/store"
"github.com/TecharoHQ/anubis/lib/thoth"
"github.com/fahedouch/go-logrotate"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
@ -38,6 +42,7 @@ type ParsedConfig struct {
StatusCodes config.StatusCodes
DefaultDifficulty int
DNSBL bool
Logger *slog.Logger
}
func newParsedConfig(orig *config.Config) *ParsedConfig {
@ -48,7 +53,7 @@ func newParsedConfig(orig *config.Config) *ParsedConfig {
}
}
func ParseConfig(ctx context.Context, fin io.Reader, fname string, defaultDifficulty int) (*ParsedConfig, error) {
func ParseConfig(ctx context.Context, fin io.Reader, fname string, defaultDifficulty int, logLevel string) (*ParsedConfig, error) {
c, err := config.Load(fin, fname)
if err != nil {
return nil, err
@ -202,6 +207,27 @@ func ParseConfig(ctx context.Context, fin io.Reader, fname string, defaultDiffic
validationErrs = append(validationErrs, config.ErrUnknownStoreBackend)
}
if c.Logging.Level != nil {
logLevel = c.Logging.Level.String()
}
switch c.Logging.Sink {
case config.LogSinkStdio:
result.Logger = internal.InitSlog(logLevel, os.Stderr)
case config.LogSinkFile:
out := &logrotate.Logger{
Filename: c.Logging.Parameters.Filename,
FilenameTimeFormat: time.RFC3339,
MaxBytes: c.Logging.Parameters.MaxBytes,
MaxAge: c.Logging.Parameters.MaxAge,
MaxBackups: c.Logging.Parameters.MaxBackups,
LocalTime: c.Logging.Parameters.UseLocalTime,
Compress: c.Logging.Parameters.Compress,
}
result.Logger = internal.InitSlog(logLevel, out)
}
if len(validationErrs) > 0 {
return nil, fmt.Errorf("errors validating policy config JSON %s: %w", fname, errors.Join(validationErrs...))
}

View file

@ -19,14 +19,14 @@ func TestDefaultPolicyMustParse(t *testing.T) {
}
defer fin.Close()
if _, err := ParseConfig(ctx, fin, "botPolicies.yaml", anubis.DefaultDifficulty); err != nil {
if _, err := ParseConfig(ctx, fin, "botPolicies.yaml", anubis.DefaultDifficulty, "info"); err != nil {
t.Fatalf("can't parse config: %v", err)
}
}
func TestGoodConfigs(t *testing.T) {
finfos, err := os.ReadDir("config/testdata/good")
finfos, err := os.ReadDir("../config/testdata/good")
if err != nil {
t.Fatal(err)
}
@ -35,26 +35,26 @@ func TestGoodConfigs(t *testing.T) {
st := st
t.Run(st.Name(), func(t *testing.T) {
t.Run("with-thoth", func(t *testing.T) {
fin, err := os.Open(filepath.Join("config", "testdata", "good", st.Name()))
fin, err := os.Open(filepath.Join("..", "config", "testdata", "good", st.Name()))
if err != nil {
t.Fatal(err)
}
defer fin.Close()
ctx := thothmock.WithMockThoth(t)
if _, err := ParseConfig(ctx, fin, fin.Name(), anubis.DefaultDifficulty); err != nil {
if _, err := ParseConfig(ctx, fin, fin.Name(), anubis.DefaultDifficulty, "info"); err != nil {
t.Fatal(err)
}
})
t.Run("without-thoth", func(t *testing.T) {
fin, err := os.Open(filepath.Join("config", "testdata", "good", st.Name()))
fin, err := os.Open(filepath.Join("..", "config", "testdata", "good", st.Name()))
if err != nil {
t.Fatal(err)
}
defer fin.Close()
if _, err := ParseConfig(t.Context(), fin, fin.Name(), anubis.DefaultDifficulty); err != nil {
if _, err := ParseConfig(t.Context(), fin, fin.Name(), anubis.DefaultDifficulty, "info"); err != nil {
t.Fatal(err)
}
})
@ -65,7 +65,7 @@ func TestGoodConfigs(t *testing.T) {
func TestBadConfigs(t *testing.T) {
ctx := thothmock.WithMockThoth(t)
finfos, err := os.ReadDir("config/testdata/bad")
finfos, err := os.ReadDir("../config/testdata/bad")
if err != nil {
t.Fatal(err)
}
@ -73,13 +73,13 @@ func TestBadConfigs(t *testing.T) {
for _, st := range finfos {
st := st
t.Run(st.Name(), func(t *testing.T) {
fin, err := os.Open(filepath.Join("config", "testdata", "bad", st.Name()))
fin, err := os.Open(filepath.Join("..", "config", "testdata", "bad", st.Name()))
if err != nil {
t.Fatal(err)
}
defer fin.Close()
if _, err := ParseConfig(ctx, fin, fin.Name(), anubis.DefaultDifficulty); err == nil {
if _, err := ParseConfig(ctx, fin, fin.Name(), anubis.DefaultDifficulty, "info"); err == nil {
t.Fatal(err)
} else {
t.Log(err)

View file

@ -1,7 +1,7 @@
package policy
import (
"github.com/TecharoHQ/anubis/lib/policy/config"
"github.com/TecharoHQ/anubis/lib/config"
"github.com/TecharoHQ/anubis/lib/policy/expressions"
"github.com/google/cel-go/cel"
)