feat(config): custom weight thresholds via CEL (#688)

* feat(config): add Thresholds to the top level config file Signed-off-by: Xe Iaso <me@xeiaso.net> * chore(config): make String() on ExpressionOrList join the component expressions Signed-off-by: Xe Iaso <me@xeiaso.net> * test(config): ensure unparseable json fails Signed-off-by: Xe Iaso <me@xeiaso.net> * fix(config): if no thresholds are set, use the default thresholds Signed-off-by: Xe Iaso <me@xeiaso.net> * feat(policy): half implement thresholds Signed-off-by: Xe Iaso <me@xeiaso.net> * chore(policy): continue wiring things up Signed-off-by: Xe Iaso <me@xeiaso.net> * feat(lib): wire up thresholds Signed-off-by: Xe Iaso <me@xeiaso.net> * test(lib): handle behavior from legacy configurations Signed-off-by: Xe Iaso <me@xeiaso.net> * docs: document thresholds Signed-off-by: Xe Iaso <me@xeiaso.net> * docs: update CHANGELOG, refer to threshold configuration Signed-off-by: Xe Iaso <me@xeiaso.net> * fix(lib): fix build Signed-off-by: Xe Iaso <me@xeiaso.net> * chore(lib): fix U1000 Signed-off-by: Xe Iaso <me@xeiaso.net> --------- Signed-off-by: Xe Iaso <me@xeiaso.net> Signed-off-by: Jason Cameron <git@jasoncameron.dev> Co-authored-by: Jason Cameron <git@jasoncameron.dev>
2025-06-18 16:58:31 -04:00 · 2025-06-18 16:58:31 -04:00 · 226cf36bf7
commit 226cf36bf7
parent 1d5fa49eb0
22 changed files with 683 additions and 305 deletions
--- a/lib/policy/config/config.go
+++ b/lib/policy/config/config.go
@ -43,6 +43,15 @@ const (
 	RuleBenchmark Rule = "DEBUG_BENCHMARK"
 )

+func (r Rule) Valid() error {
+	switch r {
+	case RuleAllow, RuleDeny, RuleChallenge, RuleWeigh, RuleBenchmark:
+		return nil
+	default:
+		return ErrUnknownAction
+	}
+}
+
 const DefaultAlgorithm = "fast"

 type BotConfig struct {
@ -184,13 +193,18 @@ type ChallengeRules struct {
 }

 var (
-	ErrChallengeDifficultyTooLow  = errors.New("config.Bot.ChallengeRules: difficulty is too low (must be >= 1)")
-	ErrChallengeDifficultyTooHigh = errors.New("config.Bot.ChallengeRules: difficulty is too high (must be <= 64)")
+	ErrChallengeDifficultyTooLow  = errors.New("config.ChallengeRules: difficulty is too low (must be >= 1)")
+	ErrChallengeDifficultyTooHigh = errors.New("config.ChallengeRules: difficulty is too high (must be <= 64)")
+	ErrChallengeMustHaveAlgorithm = errors.New("config.ChallengeRules: must have algorithm name set")
 )

 func (cr ChallengeRules) Valid() error {
 	var errs []error

+	if cr.Algorithm == "" {
+		errs = append(errs, ErrChallengeMustHaveAlgorithm)
+	}
+
 	if cr.Difficulty < 1 {
 		errs = append(errs, fmt.Errorf("%w, got: %d", ErrChallengeDifficultyTooLow, cr.Difficulty))
 	}
@ -312,18 +326,19 @@ type fileConfig struct {
 	Bots        []BotOrImport `json:"bots"`
 	DNSBL       bool          `json:"dnsbl"`
 	StatusCodes StatusCodes   `json:"status_codes"`
+	Thresholds  []Threshold   `json:"threshold"`
 }

-func (c fileConfig) Valid() error {
+func (c *fileConfig) Valid() error {
 	var errs []error

 	if len(c.Bots) == 0 {
 		errs = append(errs, ErrNoBotRulesDefined)
 	}

-	for _, b := range c.Bots {
+	for i, b := range c.Bots {
 		if err := b.Valid(); err != nil {
-			errs = append(errs, err)
+			errs = append(errs, fmt.Errorf("bot %d: %w", i, err))
 		}
 	}

@ -331,6 +346,16 @@ func (c fileConfig) Valid() error {
 		errs = append(errs, err)
 	}

+	if len(c.Thresholds) == 0 {
+		errs = append(errs, ErrNoThresholdRulesDefined)
+	}
+
+	for i, t := range c.Thresholds {
+		if err := t.Valid(); err != nil {
+			errs = append(errs, fmt.Errorf("threshold %d: %w", i, err))
+		}
+	}
+
 	if len(errs) != 0 {
 		return fmt.Errorf("config is not valid:\n%w", errors.Join(errs...))
 	}
@ -339,11 +364,14 @@ func (c fileConfig) Valid() error {
 }

 func Load(fin io.Reader, fname string) (*Config, error) {
-	var c fileConfig
-	c.StatusCodes = StatusCodes{
-		Challenge: http.StatusOK,
-		Deny:      http.StatusOK,
+	c := &fileConfig{
+		StatusCodes: StatusCodes{
+			Challenge: http.StatusOK,
+			Deny:      http.StatusOK,
+		},
+		Thresholds: DefaultThresholds,
 	}
+
 	if err := yaml.NewYAMLToJSONDecoder(fin).Decode(&c); err != nil {
 		return nil, fmt.Errorf("can't parse policy config YAML %s: %w", fname, err)
 	}
@ -379,6 +407,15 @@ func Load(fin io.Reader, fname string) (*Config, error) {
 		}
 	}

+	for _, t := range c.Thresholds {
+		if err := t.Valid(); err != nil {
+			validationErrs = append(validationErrs, err)
+			continue
+		}
+
+		result.Thresholds = append(result.Thresholds, t)
+	}
+
 	if len(validationErrs) > 0 {
 		return nil, fmt.Errorf("errors validating policy config %s: %w", fname, errors.Join(validationErrs...))
 	}
@ -388,6 +425,7 @@ func Load(fin io.Reader, fname string) (*Config, error) {

 type Config struct {
 	Bots        []BotConfig
+	Thresholds  []Threshold
 	DNSBL       bool
 	StatusCodes StatusCodes
 }
--- a/lib/policy/config/config_test.go
+++ b/lib/policy/config/config_test.go
@ -8,7 +8,6 @@ import (
 	"testing"

 	"github.com/TecharoHQ/anubis/data"
-	"k8s.io/apimachinery/pkg/util/yaml"
 )

 func p[V any](v V) *V { return &v }
@ -313,12 +312,8 @@ func TestConfigValidBad(t *testing.T) {
 			}
 			defer fin.Close()

-			var c fileConfig
-			if err := yaml.NewYAMLToJSONDecoder(fin).Decode(&c); err != nil {
-				t.Fatalf("can't decode file: %v", err)
-			}
-
-			if err := c.Valid(); err == nil {
+			_, err = Load(fin, filepath.Join("testdata", "bad", st.Name()))
+			if err == nil {
 				t.Fatal("validation should have failed but didn't somehow")
 			} else {
 				t.Log(err)
--- a/lib/policy/config/expressionorlist.go
+++ b/lib/policy/config/expressionorlist.go
@ -3,7 +3,9 @@ package config
 import (
 	"encoding/json"
 	"errors"
+	"fmt"
 	"slices"
+	"strings"
 )

 var (
@ -18,6 +20,32 @@ type ExpressionOrList struct {
 	Any        []string `json:"any,omitempty" yaml:"any,omitempty"`
 }

+func (eol ExpressionOrList) String() string {
+	switch {
+	case len(eol.Expression) != 0:
+		return eol.Expression
+	case len(eol.All) != 0:
+		var sb strings.Builder
+		for i, pred := range eol.All {
+			if i != 0 {
+				fmt.Fprintf(&sb, " && ")
+			}
+			fmt.Fprintf(&sb, "( %s )", pred)
+		}
+		return sb.String()
+	case len(eol.Any) != 0:
+		var sb strings.Builder
+		for i, pred := range eol.Any {
+			if i != 0 {
+				fmt.Fprintf(&sb, " || ")
+			}
+			fmt.Fprintf(&sb, "( %s )", pred)
+		}
+		return sb.String()
+	}
+	panic("this should not happen")
+}
+
 func (eol ExpressionOrList) Equal(rhs *ExpressionOrList) bool {
 	if eol.Expression != rhs.Expression {
 		return false
--- a/lib/policy/config/expressionorlist_test.go
+++ b/lib/policy/config/expressionorlist_test.go
@ -213,3 +213,54 @@ func TestExpressionOrListUnmarshalJSON(t *testing.T) {
 		})
 	}
 }
+
+func TestExpressionOrListString(t *testing.T) {
+	for _, tt := range []struct {
+		name string
+		in   ExpressionOrList
+		out  string
+	}{
+		{
+			name: "single expression",
+			in: ExpressionOrList{
+				Expression: "true",
+			},
+			out: "true",
+		},
+		{
+			name: "all",
+			in: ExpressionOrList{
+				All: []string{"true"},
+			},
+			out: "( true )",
+		},
+		{
+			name: "all with &&",
+			in: ExpressionOrList{
+				All: []string{"true", "true"},
+			},
+			out: "( true ) && ( true )",
+		},
+		{
+			name: "any",
+			in: ExpressionOrList{
+				All: []string{"true"},
+			},
+			out: "( true )",
+		},
+		{
+			name: "any with ||",
+			in: ExpressionOrList{
+				Any: []string{"true", "true"},
+			},
+			out: "( true ) || ( true )",
+		},
+	} {
+		t.Run(tt.name, func(t *testing.T) {
+			result := tt.in.String()
+			if result != tt.out {
+				t.Errorf("wanted %q, got: %q", tt.out, result)
+			}
+		})
+	}
+}
--- a/lib/policy/config/testdata/bad/unparseable.json
+++ b/lib/policy/config/testdata/bad/unparseable.json
@ -0,0 +1 @@
+}
--- a/lib/policy/config/testdata/bad/unparseable.yaml
+++ b/lib/policy/config/testdata/bad/unparseable.yaml
@ -0,0 +1 @@
+}
--- a/lib/policy/config/testdata/good/no-thresholds.yaml
+++ b/lib/policy/config/testdata/good/no-thresholds.yaml
@ -0,0 +1,8 @@
+bots:
+  - name: simple-weight-adjust
+    action: WEIGH
+    user_agent_regex: Mozilla
+    weight:
+      adjust: 5
+
+thresholds: []
--- a/lib/policy/config/testdata/good/thresholds.yaml
+++ b/lib/policy/config/testdata/good/thresholds.yaml
@ -0,0 +1,38 @@
+bots:
+  - name: simple-weight-adjust
+    action: WEIGH
+    user_agent_regex: Mozilla
+    weight:
+      adjust: 5
+
+thresholds:
+  - name: minimal-suspicion
+    expression: weight < 0
+    action: ALLOW
+  - name: mild-suspicion
+    expression:
+      all:
+        - weight >= 0
+        - weight < 10
+    action: CHALLENGE
+    challenge:
+      algorithm: metarefresh
+      difficulty: 1
+      report_as: 1
+  - name: moderate-suspicion
+    expression:
+      all:
+        - weight >= 10
+        - weight < 20
+    action: CHALLENGE
+    challenge:
+      algorithm: fast
+      difficulty: 2
+      report_as: 2
+  - name: extreme-suspicion
+    expression: weight >= 20
+    action: CHALLENGE
+    challenge:
+      algorithm: fast
+      difficulty: 4
+      report_as: 4
--- a/lib/policy/config/threshold.go
+++ b/lib/policy/config/threshold.go
@ -0,0 +1,80 @@
+package config
+
+import (
+	"errors"
+	"fmt"
+
+	"github.com/TecharoHQ/anubis"
+)
+
+var (
+	ErrNoThresholdRulesDefined             = errors.New("config: no thresholds defined")
+	ErrThresholdMustHaveName               = errors.New("config.Threshold: must set name")
+	ErrThresholdMustHaveExpression         = errors.New("config.Threshold: must set expression")
+	ErrThresholdChallengeMustHaveChallenge = errors.New("config.Threshold: a threshold with the CHALLENGE action must have challenge set")
+	ErrThresholdCannotHaveWeighAction      = errors.New("config.Threshold: a threshold cannot have the WEIGH action")
+
+	DefaultThresholds = []Threshold{
+		{
+			Name: "legacy-anubis-behaviour",
+			Expression: &ExpressionOrList{
+				Expression: "weight > 0",
+			},
+			Action: RuleChallenge,
+			Challenge: &ChallengeRules{
+				Algorithm:  "fast",
+				Difficulty: anubis.DefaultDifficulty,
+				ReportAs:   anubis.DefaultDifficulty,
+			},
+		},
+	}
+)
+
+type Threshold struct {
+	Name       string            `json:"name" yaml:"name"`
+	Expression *ExpressionOrList `json:"expression" yaml:"expression"`
+	Action     Rule              `json:"action" yaml:"action"`
+	Challenge  *ChallengeRules   `json:"challenge" yaml:"challenge"`
+}
+
+func (t Threshold) Valid() error {
+	var errs []error
+
+	if len(t.Name) == 0 {
+		errs = append(errs, ErrThresholdMustHaveName)
+	}
+
+	if t.Expression == nil {
+		errs = append(errs, ErrThresholdMustHaveExpression)
+	}
+
+	if t.Expression != nil {
+		if err := t.Expression.Valid(); err != nil {
+			errs = append(errs, err)
+		}
+	}
+
+	if err := t.Action.Valid(); err != nil {
+		errs = append(errs, err)
+	}
+
+	if t.Action == RuleWeigh {
+		errs = append(errs, ErrThresholdCannotHaveWeighAction)
+	}
+
+	if t.Action == RuleChallenge && t.Challenge == nil {
+		errs = append(errs, ErrThresholdChallengeMustHaveChallenge)
+	}
+
+	if t.Challenge != nil {
+		if err := t.Challenge.Valid(); err != nil {
+			errs = append(errs, err)
+		}
+	}
+
+	if len(errs) != 0 {
+		return fmt.Errorf("config: threshold entry for %q is not valid:\n%w", t.Name, errors.Join(errs...))
+	}
+
+	return nil
+}
--- a/lib/policy/config/threshold_test.go
+++ b/lib/policy/config/threshold_test.go
@ -0,0 +1,92 @@
+package config
+
+import (
+	"errors"
+	"fmt"
+	"testing"
+)
+
+func TestThresholdValid(t *testing.T) {
+	for _, tt := range []struct {
+		name  string
+		input *Threshold
+		err   error
+	}{
+		{
+			name: "basic allow",
+			input: &Threshold{
+				Name:       "basic-allow",
+				Expression: &ExpressionOrList{Expression: "true"},
+				Action:     RuleAllow,
+			},
+			err: nil,
+		},
+		{
+			name: "basic challenge",
+			input: &Threshold{
+				Name:       "basic-challenge",
+				Expression: &ExpressionOrList{Expression: "true"},
+				Action:     RuleChallenge,
+				Challenge: &ChallengeRules{
+					Algorithm:  "fast",
+					Difficulty: 1,
+					ReportAs:   1,
+				},
+			},
+			err: nil,
+		},
+		{
+			name:  "no name",
+			input: &Threshold{},
+			err:   ErrThresholdMustHaveName,
+		},
+		{
+			name:  "no expression",
+			input: &Threshold{},
+			err:   ErrThresholdMustHaveName,
+		},
+		{
+			name: "invalid expression",
+			input: &Threshold{
+				Expression: &ExpressionOrList{},
+			},
+			err: ErrExpressionEmpty,
+		},
+		{
+			name:  "invalid action",
+			input: &Threshold{},
+			err:   ErrUnknownAction,
+		},
+		{
+			name: "challenge action but no challenge",
+			input: &Threshold{
+				Action: RuleChallenge,
+			},
+			err: ErrThresholdChallengeMustHaveChallenge,
+		},
+		{
+			name: "challenge invalid",
+			input: &Threshold{
+				Action:    RuleChallenge,
+				Challenge: &ChallengeRules{Difficulty: 0, ReportAs: 0},
+			},
+			err: ErrChallengeDifficultyTooLow,
+		},
+	} {
+		t.Run(tt.name, func(t *testing.T) {
+			if err := tt.input.Valid(); !errors.Is(err, tt.err) {
+				t.Errorf("threshold is invalid: %v", err)
+			}
+		})
+	}
+}
+
+func TestDefaultThresholdsValid(t *testing.T) {
+	for i, th := range DefaultThresholds {
+		t.Run(fmt.Sprintf("%d %s", i, th.Name), func(t *testing.T) {
+			if err := th.Valid(); err != nil {
+				t.Errorf("threshold invalid: %v", err)
+			}
+		})
+	}
+}