From 4ead3ed16ef037f22ace6966130bf21f65f3df7c Mon Sep 17 00:00:00 2001 From: Xe Iaso Date: Tue, 25 Nov 2025 23:25:17 -0500 Subject: [PATCH] fix(config): deprecate the report_as field for challenges (#1311) * fix(config): deprecate the report_as field for challenges This was a bad idea when it was added and it is irresponsible to continue to have it. It causes more UX problems than it fixes with slight of hand. Closes: #1310 Closes: #1307 Signed-off-by: Xe Iaso * fix(policy): use the new logger for config validation messages Signed-off-by: Xe Iaso * docs(admin/thresholds): remove this report_as setting Signed-off-by: Xe Iaso --------- Signed-off-by: Xe Iaso --- data/botPolicies.yaml | 7 +-- data/meta/default-config.yaml | 1 - docs/docs/CHANGELOG.md | 36 +++++++++++ .../configuration/challenges/metarefresh.mdx | 1 - .../admin/configuration/challenges/preact.mdx | 1 - docs/docs/admin/configuration/thresholds.mdx | 4 -- docs/docs/admin/policies.mdx | 2 - docs/manifest/cfg/anubis/botPolicies.yaml | 5 -- lib/anubis.go | 5 +- lib/anubis_test.go | 4 -- lib/challenge/proofofwork/proofofwork_test.go | 1 - lib/config/config_test.go | 3 - lib/config/testdata/good/thresholds.yaml | 3 - lib/config/threshold.go | 1 - lib/config/threshold_test.go | 1 - lib/policy/policy.go | 60 ++++++++++--------- lib/testdata/invalid-challenge-method.yaml | 1 - lib/testdata/test_config.yaml | 1 - lib/testdata/zero_difficulty.yaml | 1 - test/palemoon/anubis/anubis.yaml | 1 - web/js/main.ts | 4 +- 21 files changed, 73 insertions(+), 70 deletions(-) diff --git a/data/botPolicies.yaml b/data/botPolicies.yaml index 9cdf68f..1288f63 100644 --- a/data/botPolicies.yaml +++ b/data/botPolicies.yaml @@ -50,8 +50,7 @@ bots: # user_agent_regex: (?i:bot|crawler) # action: CHALLENGE # challenge: - # difficulty: 16 # impossible - # report_as: 4 # lie to the operator + # difficulty: 16 # impossible # algorithm: slow # intentionally waste CPU cycles and time # Requires a subscription to Thoth to use, see @@ -249,7 +248,6 @@ thresholds: # https://anubis.techaro.lol/docs/admin/configuration/challenges/metarefresh algorithm: metarefresh difficulty: 1 - report_as: 1 # For clients that are browser-like but have either gained points from custom rules or # report as a standard browser. - name: moderate-suspicion @@ -262,7 +260,6 @@ thresholds: # https://anubis.techaro.lol/docs/admin/configuration/challenges/proof-of-work algorithm: fast difficulty: 2 # two leading zeros, very fast for most clients - report_as: 2 - name: mild-proof-of-work expression: all: @@ -273,7 +270,6 @@ thresholds: # https://anubis.techaro.lol/docs/admin/configuration/challenges/proof-of-work algorithm: fast difficulty: 4 - report_as: 4 # For clients that are browser like and have gained many points from custom rules - name: extreme-suspicion expression: weight >= 30 @@ -282,4 +278,3 @@ thresholds: # https://anubis.techaro.lol/docs/admin/configuration/challenges/proof-of-work algorithm: fast difficulty: 6 - report_as: 6 diff --git a/data/meta/default-config.yaml b/data/meta/default-config.yaml index 1b1ad1e..9658e38 100644 --- a/data/meta/default-config.yaml +++ b/data/meta/default-config.yaml @@ -35,7 +35,6 @@ # action: CHALLENGE # challenge: # difficulty: 16 # impossible -# report_as: 4 # lie to the operator # algorithm: slow # intentionally waste CPU cycles and time # Requires a subscription to Thoth to use, see diff --git a/docs/docs/CHANGELOG.md b/docs/docs/CHANGELOG.md index 3f7eeaf..e5ef418 100644 --- a/docs/docs/CHANGELOG.md +++ b/docs/docs/CHANGELOG.md @@ -25,6 +25,42 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Stabilize the CVE-2025-24369 regression test by always submitting an invalid proof instead of relying on random POW failures. - Add Polish locale ([#1292](https://github.com/TecharoHQ/anubis/pull/1309)) +### Deprecate `report_as` in challenge configuration + +Previously Anubis let you lie to users about the difficulty of a challenge to interfere with operators of malicious scrapers as a psychological attack: + +```yaml +bots: + # Punish any bot with "bot" in the user-agent string + # This is known to have a high false-positive rate, use at your own risk + - name: generic-bot-catchall + user_agent_regex: (?i:bot|crawler) + action: CHALLENGE + challenge: + difficulty: 16 # impossible + report_as: 4 # lie to the operator + algorithm: slow # intentionally waste CPU cycles and time +``` + +This has turned out to be a bad idea because it has caused massive user experience problems and has been removed. If you are using this setting, you will get a warning in your logs like this: + +```json +{ + "time": "2025-11-25T23:10:31.092201549-05:00", + "level": "WARN", + "source": { + "function": "github.com/TecharoHQ/anubis/lib/policy.ParseConfig", + "file": "/home/xe/code/TecharoHQ/anubis/lib/policy/policy.go", + "line": 201 + }, + "msg": "use of deprecated report_as setting detected, please remove this from your policy file when possible", + "at": "config-validate", + "name": "mild-suspicion" +} +``` + +To remove this warning, remove this setting from your policy file. + ### Logging customization Anubis now supports the ability to log to multiple backends ("sinks"). This allows you to have Anubis [log to a file](./admin/policies.mdx#file-sink) instead of just logging to standard out. You can also customize the [logging level](./admin/policies.mdx#log-levels) in the policy file: diff --git a/docs/docs/admin/configuration/challenges/metarefresh.mdx b/docs/docs/admin/configuration/challenges/metarefresh.mdx index 730c5bb..dcf876f 100644 --- a/docs/docs/admin/configuration/challenges/metarefresh.mdx +++ b/docs/docs/admin/configuration/challenges/metarefresh.mdx @@ -12,7 +12,6 @@ To use it in your Anubis configuration: action: CHALLENGE challenge: difficulty: 1 # Number of seconds to wait before refreshing the page - report_as: 4 # Unused by this challenge method algorithm: metarefresh # Specify a non-JS challenge method ``` diff --git a/docs/docs/admin/configuration/challenges/preact.mdx b/docs/docs/admin/configuration/challenges/preact.mdx index 5721490..35f9883 100644 --- a/docs/docs/admin/configuration/challenges/preact.mdx +++ b/docs/docs/admin/configuration/challenges/preact.mdx @@ -12,7 +12,6 @@ To use it in your Anubis configuration: action: CHALLENGE challenge: difficulty: 1 # Number of seconds to wait before refreshing the page - report_as: 4 # Unused by this challenge method algorithm: preact ``` diff --git a/docs/docs/admin/configuration/thresholds.mdx b/docs/docs/admin/configuration/thresholds.mdx index 2c99ae7..189558d 100644 --- a/docs/docs/admin/configuration/thresholds.mdx +++ b/docs/docs/admin/configuration/thresholds.mdx @@ -41,7 +41,6 @@ thresholds: challenge: algorithm: metarefresh difficulty: 1 - report_as: 1 - name: moderate-suspicion expression: @@ -52,7 +51,6 @@ thresholds: challenge: algorithm: fast difficulty: 2 - report_as: 2 - name: extreme-suspicion expression: weight >= 20 @@ -60,7 +58,6 @@ thresholds: challenge: algorithm: fast difficulty: 4 - report_as: 4 ``` This defines a suite of 4 thresholds: @@ -130,7 +127,6 @@ action: CHALLENGE challenge: algorithm: metarefresh difficulty: 1 - report_as: 1 ``` diff --git a/docs/docs/admin/policies.mdx b/docs/docs/admin/policies.mdx index a3a2e7d..159d1ab 100644 --- a/docs/docs/admin/policies.mdx +++ b/docs/docs/admin/policies.mdx @@ -84,7 +84,6 @@ This rule has been known to have a high false positive rate in testing. Please u action: CHALLENGE challenge: difficulty: 16 # impossible - report_as: 4 # lie to the operator algorithm: slow # intentionally waste CPU cycles and time ``` @@ -93,7 +92,6 @@ Challenges can be configured with these settings: | Key | Example | Description | | :----------- | :------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `difficulty` | `4` | The challenge difficulty (number of leading zeros) for proof-of-work. See [Why does Anubis use Proof-of-Work?](/docs/design/why-proof-of-work) for more details. | -| `report_as` | `4` | What difficulty the UI should report to the user. Useful for messing with industrial-scale scraping efforts. | | `algorithm` | `"fast"` | The challenge method to use. See [the list of challenge methods](./configuration/challenges/) for more information. | ### Remote IP based filtering diff --git a/docs/manifest/cfg/anubis/botPolicies.yaml b/docs/manifest/cfg/anubis/botPolicies.yaml index ffb2d15..e5be287 100644 --- a/docs/manifest/cfg/anubis/botPolicies.yaml +++ b/docs/manifest/cfg/anubis/botPolicies.yaml @@ -49,7 +49,6 @@ bots: # action: CHALLENGE # challenge: # difficulty: 16 # impossible - # report_as: 4 # lie to the operator # algorithm: slow # intentionally waste CPU cycles and time - name: rss-feed-blog @@ -105,7 +104,6 @@ thresholds: # https://anubis.techaro.lol/docs/admin/configuration/challenges/metarefresh algorithm: metarefresh difficulty: 1 - report_as: 1 # For clients that are browser-like but have either gained points from custom rules or # report as a standard browser. - name: moderate-suspicion @@ -122,7 +120,6 @@ thresholds: # challenge data, and forwards that to the client. algorithm: preact difficulty: 1 - report_as: 1 - name: mild-proof-of-work expression: all: @@ -133,7 +130,6 @@ thresholds: # https://anubis.techaro.lol/docs/admin/configuration/challenges/proof-of-work algorithm: fast difficulty: 2 # two leading zeros, very fast for most clients - report_as: 2 # For clients that are browser like and have gained many points from custom rules - name: extreme-suspicion expression: weight >= 30 @@ -142,7 +138,6 @@ thresholds: # https://anubis.techaro.lol/docs/admin/configuration/challenges/proof-of-work algorithm: fast difficulty: 4 - report_as: 4 dnsbl: false diff --git a/lib/anubis.go b/lib/anubis.go index 4fc154e..feff53a 100644 --- a/lib/anubis.go +++ b/lib/anubis.go @@ -167,8 +167,8 @@ func (s *Server) hydrateChallengeRule(rule *policy.Bot, chall *challenge.Challen if rule.Challenge.Difficulty == 0 { rule.Challenge.Difficulty = chall.Difficulty } - if rule.Challenge.ReportAs == 0 { - rule.Challenge.ReportAs = chall.Difficulty + if rule.Challenge.ReportAs != 0 { + s.logger.Warn("[DEPRECATION] the report_as field in this bot rule is deprecated, see https://github.com/TecharoHQ/anubis/issues/1310 for more information", "bot_name", rule.Name, "difficulty", rule.Challenge.Difficulty, "report_as", rule.Challenge.ReportAs) } if rule.Challenge.Algorithm == "" { rule.Challenge.Algorithm = chall.Method @@ -648,7 +648,6 @@ func (s *Server) check(r *http.Request, lg *slog.Logger) (policy.CheckResult, *p return cr("default/allow", config.RuleAllow, weight), &policy.Bot{ Challenge: &config.ChallengeRules{ Difficulty: s.policy.DefaultDifficulty, - ReportAs: s.policy.DefaultDifficulty, Algorithm: config.DefaultAlgorithm, }, Rules: &checker.List{}, diff --git a/lib/anubis_test.go b/lib/anubis_test.go index 78e602d..07785d3 100644 --- a/lib/anubis_test.go +++ b/lib/anubis_test.go @@ -464,10 +464,6 @@ func TestCheckDefaultDifficultyMatchesPolicy(t *testing.T) { if bot.Challenge.Difficulty != i { t.Errorf("Challenge.Difficulty is wrong, wanted %d, got: %d", i, bot.Challenge.Difficulty) } - - if bot.Challenge.ReportAs != i { - t.Errorf("Challenge.ReportAs is wrong, wanted %d, got: %d", i, bot.Challenge.ReportAs) - } }) } } diff --git a/lib/challenge/proofofwork/proofofwork_test.go b/lib/challenge/proofofwork/proofofwork_test.go index 6b12afe..069636b 100644 --- a/lib/challenge/proofofwork/proofofwork_test.go +++ b/lib/challenge/proofofwork/proofofwork_test.go @@ -36,7 +36,6 @@ func TestBasic(t *testing.T) { Challenge: &config.ChallengeRules{ Algorithm: "fast", Difficulty: 0, - ReportAs: 0, }, } const challengeStr = "hunter" diff --git a/lib/config/config_test.go b/lib/config/config_test.go index 8702780..1b93375 100644 --- a/lib/config/config_test.go +++ b/lib/config/config_test.go @@ -110,7 +110,6 @@ func TestBotValid(t *testing.T) { PathRegex: p("Mozilla"), Challenge: &ChallengeRules{ Difficulty: -1, - ReportAs: 4, Algorithm: "fast", }, }, @@ -124,7 +123,6 @@ func TestBotValid(t *testing.T) { PathRegex: p("Mozilla"), Challenge: &ChallengeRules{ Difficulty: 420, - ReportAs: 4, Algorithm: "fast", }, }, @@ -361,7 +359,6 @@ func TestBotConfigZero(t *testing.T) { b.Challenge = &ChallengeRules{ Difficulty: 4, - ReportAs: 4, Algorithm: DefaultAlgorithm, } if b.Zero() { diff --git a/lib/config/testdata/good/thresholds.yaml b/lib/config/testdata/good/thresholds.yaml index 75e1788..9365c71 100644 --- a/lib/config/testdata/good/thresholds.yaml +++ b/lib/config/testdata/good/thresholds.yaml @@ -18,7 +18,6 @@ thresholds: challenge: algorithm: metarefresh difficulty: 1 - report_as: 1 - name: moderate-suspicion expression: all: @@ -28,11 +27,9 @@ thresholds: challenge: algorithm: fast difficulty: 2 - report_as: 2 - name: extreme-suspicion expression: weight >= 20 action: CHALLENGE challenge: algorithm: fast difficulty: 4 - report_as: 4 diff --git a/lib/config/threshold.go b/lib/config/threshold.go index dd82892..5b8555d 100644 --- a/lib/config/threshold.go +++ b/lib/config/threshold.go @@ -24,7 +24,6 @@ var ( Challenge: &ChallengeRules{ Algorithm: "fast", Difficulty: anubis.DefaultDifficulty, - ReportAs: anubis.DefaultDifficulty, }, }, } diff --git a/lib/config/threshold_test.go b/lib/config/threshold_test.go index fce15d0..120e7ed 100644 --- a/lib/config/threshold_test.go +++ b/lib/config/threshold_test.go @@ -32,7 +32,6 @@ func TestThresholdValid(t *testing.T) { Challenge: &ChallengeRules{ Algorithm: "fast", Difficulty: 1, - ReportAs: 1, }, }, err: nil, diff --git a/lib/policy/policy.go b/lib/policy/policy.go index 8f6dfaa..afb9b77 100644 --- a/lib/policy/policy.go +++ b/lib/policy/policy.go @@ -66,6 +66,29 @@ func ParseConfig(ctx context.Context, fin io.Reader, fname string, defaultDiffic result := newParsedConfig(c) result.DefaultDifficulty = defaultDifficulty + if c.Logging.Level != nil { + logLevel = c.Logging.Level.String() + } + + switch c.Logging.Sink { + case config.LogSinkStdio: + result.Logger = internal.InitSlog(logLevel, os.Stderr) + case config.LogSinkFile: + out := &logrotate.Logger{ + Filename: c.Logging.Parameters.Filename, + FilenameTimeFormat: time.RFC3339, + MaxBytes: c.Logging.Parameters.MaxBytes, + MaxAge: c.Logging.Parameters.MaxAge, + MaxBackups: c.Logging.Parameters.MaxBackups, + LocalTime: c.Logging.Parameters.UseLocalTime, + Compress: c.Logging.Parameters.Compress, + } + + result.Logger = internal.InitSlog(logLevel, out) + } + + lg := result.Logger.With("at", "config-validate") + for _, b := range c.Bots { if berr := b.Valid(); berr != nil { validationErrs = append(validationErrs, berr) @@ -126,7 +149,7 @@ func ParseConfig(ctx context.Context, fin io.Reader, fname string, defaultDiffic if b.ASNs != nil { if !hasThothClient { - slog.Warn("You have specified a Thoth specific check but you have no Thoth client configured. Please read https://anubis.techaro.lol/docs/admin/thoth for more information", "check", "asn", "settings", b.ASNs) + lg.Warn("You have specified a Thoth specific check but you have no Thoth client configured. Please read https://anubis.techaro.lol/docs/admin/thoth for more information", "check", "asn", "settings", b.ASNs) continue } @@ -135,7 +158,7 @@ func ParseConfig(ctx context.Context, fin io.Reader, fname string, defaultDiffic if b.GeoIP != nil { if !hasThothClient { - slog.Warn("You have specified a Thoth specific check but you have no Thoth client configured. Please read https://anubis.techaro.lol/docs/admin/thoth for more information", "check", "geoip", "settings", b.GeoIP) + lg.Warn("You have specified a Thoth specific check but you have no Thoth client configured. Please read https://anubis.techaro.lol/docs/admin/thoth for more information", "check", "geoip", "settings", b.GeoIP) continue } @@ -145,7 +168,6 @@ func ParseConfig(ctx context.Context, fin io.Reader, fname string, defaultDiffic if b.Challenge == nil { parsedBot.Challenge = &config.ChallengeRules{ Difficulty: defaultDifficulty, - ReportAs: defaultDifficulty, Algorithm: "fast", } } else { @@ -155,7 +177,7 @@ func ParseConfig(ctx context.Context, fin io.Reader, fname string, defaultDiffic } if parsedBot.Challenge.Algorithm == "slow" { - slog.Warn("use of deprecated algorithm \"slow\" detected, please update this to \"fast\" when possible", "name", parsedBot.Name) + lg.Warn("use of deprecated algorithm \"slow\" detected, please update this to \"fast\" when possible", "name", parsedBot.Name) } } @@ -172,17 +194,20 @@ func ParseConfig(ctx context.Context, fin io.Reader, fname string, defaultDiffic for _, t := range c.Thresholds { if t.Challenge != nil && t.Challenge.Algorithm == "slow" { - slog.Warn("use of deprecated algorithm \"slow\" detected, please update this to \"fast\" when possible", "name", t.Name) + lg.Warn("use of deprecated algorithm \"slow\" detected, please update this to \"fast\" when possible", "name", t.Name) + } + + if t.Challenge != nil && t.Challenge.ReportAs != 0 { + lg.Warn("use of deprecated report_as setting detected, please remove this from your policy file when possible", "name", t.Name) } if t.Name == "legacy-anubis-behaviour" && t.Expression.String() == "true" { if !warnedAboutThresholds.Load() { - slog.Warn("configuration file does not contain thresholds, see docs for details on how to upgrade", "fname", fname, "docs_url", "https://anubis.techaro.lol/docs/admin/configuration/thresholds/") + lg.Warn("configuration file does not contain thresholds, see docs for details on how to upgrade", "fname", fname, "docs_url", "https://anubis.techaro.lol/docs/admin/configuration/thresholds/") warnedAboutThresholds.Store(true) } t.Challenge.Difficulty = defaultDifficulty - t.Challenge.ReportAs = defaultDifficulty } threshold, err := ParsedThresholdFromConfig(t) @@ -207,27 +232,6 @@ func ParseConfig(ctx context.Context, fin io.Reader, fname string, defaultDiffic validationErrs = append(validationErrs, config.ErrUnknownStoreBackend) } - if c.Logging.Level != nil { - logLevel = c.Logging.Level.String() - } - - switch c.Logging.Sink { - case config.LogSinkStdio: - result.Logger = internal.InitSlog(logLevel, os.Stderr) - case config.LogSinkFile: - out := &logrotate.Logger{ - Filename: c.Logging.Parameters.Filename, - FilenameTimeFormat: time.RFC3339, - MaxBytes: c.Logging.Parameters.MaxBytes, - MaxAge: c.Logging.Parameters.MaxAge, - MaxBackups: c.Logging.Parameters.MaxBackups, - LocalTime: c.Logging.Parameters.UseLocalTime, - Compress: c.Logging.Parameters.Compress, - } - - result.Logger = internal.InitSlog(logLevel, out) - } - if len(validationErrs) > 0 { return nil, fmt.Errorf("errors validating policy config JSON %s: %w", fname, errors.Join(validationErrs...)) } diff --git a/lib/testdata/invalid-challenge-method.yaml b/lib/testdata/invalid-challenge-method.yaml index 24eccf7..a75dd7f 100644 --- a/lib/testdata/invalid-challenge-method.yaml +++ b/lib/testdata/invalid-challenge-method.yaml @@ -4,5 +4,4 @@ bots: action: CHALLENGE challenge: difficulty: 16 - report_as: 4 algorithm: hunter2 # invalid algorithm diff --git a/lib/testdata/test_config.yaml b/lib/testdata/test_config.yaml index 9047dcb..cb8cd48 100644 --- a/lib/testdata/test_config.yaml +++ b/lib/testdata/test_config.yaml @@ -42,4 +42,3 @@ thresholds: challenge: algorithm: fast difficulty: 1 - report_as: 1 diff --git a/lib/testdata/zero_difficulty.yaml b/lib/testdata/zero_difficulty.yaml index 75382db..43ba4ec 100644 --- a/lib/testdata/zero_difficulty.yaml +++ b/lib/testdata/zero_difficulty.yaml @@ -42,4 +42,3 @@ thresholds: challenge: algorithm: fast difficulty: 0 - report_as: 0 diff --git a/test/palemoon/anubis/anubis.yaml b/test/palemoon/anubis/anubis.yaml index c85e797..e665f82 100644 --- a/test/palemoon/anubis/anubis.yaml +++ b/test/palemoon/anubis/anubis.yaml @@ -4,7 +4,6 @@ bots: action: CHALLENGE challenge: difficulty: 2 - report_as: 2 algorithm: fast status_codes: diff --git a/web/js/main.ts b/web/js/main.ts index e37c536..fbedb3a 100644 --- a/web/js/main.ts +++ b/web/js/main.ts @@ -155,7 +155,7 @@ const t = (key) => translations[`js_${key}`] || translations[key] || key; return; } - status.innerHTML = `${t('calculating_difficulty')} ${rules.report_as}, `; + status.innerHTML = `${t('calculating_difficulty')} ${rules.difficulty}, `; progress.style.display = "inline-block"; // the whole text, including "Speed:", as a single node, because some browsers @@ -166,7 +166,7 @@ const t = (key) => translations[`js_${key}`] || translations[key] || key; let lastSpeedUpdate = 0; let showingApology = false; - const likelihood = Math.pow(16, -rules.report_as); + const likelihood = Math.pow(16, -rules.difficulty); try { const t0 = Date.now();