* feat(lib): implement request weight Replaces #608 This is a big one and will be what makes Anubis a generic web application firewall. This introduces the WEIGH option, allowing administrators to have facets of request metadata add or remove "weight", or the level of suspicion. This really makes Anubis weigh the soul of requests. Signed-off-by: Xe Iaso <me@xeiaso.net> * fix(lib): maintain legacy challenge behavior Signed-off-by: Xe Iaso <me@xeiaso.net> * fix(lib): make weight have dedicated checkers for the hashes Signed-off-by: Xe Iaso <me@xeiaso.net> * feat(data): convert some rules over to weight points Signed-off-by: Xe Iaso <me@xeiaso.net> * docs: document request weight Signed-off-by: Xe Iaso <me@xeiaso.net> * fix(CHANGELOG): spelling error Signed-off-by: Xe Iaso <me@xeiaso.net> * chore: spelling Signed-off-by: Xe Iaso <me@xeiaso.net> * docs: fix links to challenge information Signed-off-by: Xe Iaso <me@xeiaso.net> * docs(policies): fix formatting Signed-off-by: Xe Iaso <me@xeiaso.net> * fix(config): make default weight adjustment 5 Signed-off-by: Xe Iaso <me@xeiaso.net> --------- Signed-off-by: Xe Iaso <me@xeiaso.net>
70 lines
2.6 KiB
YAML
70 lines
2.6 KiB
YAML
## Anubis has the ability to let you import snippets of configuration into the main
|
|
## configuration file. This allows you to break up your config into smaller parts
|
|
## that get logically assembled into one big file.
|
|
##
|
|
## Of note, a bot rule can either have inline bot configuration or import a
|
|
## bot config snippet. You cannot do both in a single bot rule.
|
|
##
|
|
## Import paths can either be prefixed with (data) to import from the common/shared
|
|
## rules in the data folder in the Anubis source tree or will point to absolute/relative
|
|
## paths in your filesystem. If you don't have access to the Anubis source tree, check
|
|
## /usr/share/docs/anubis/data or in the tarball you extracted Anubis from.
|
|
|
|
bots:
|
|
# Pathological bots to deny
|
|
- # This correlates to data/bots/deny-pathological.yaml in the source tree
|
|
# https://github.com/TecharoHQ/anubis/blob/main/data/bots/deny-pathological.yaml
|
|
import: (data)/bots/_deny-pathological.yaml
|
|
- import: (data)/bots/aggressive-brazilian-scrapers.yaml
|
|
|
|
# Aggressively block AI/LLM related bots/agents by default
|
|
- import: (data)/meta/ai-block-aggressive.yaml
|
|
|
|
# Consider replacing the aggressive AI policy with more selective policies:
|
|
# - import: (data)/meta/ai-block-moderate.yaml
|
|
# - import: (data)/meta/ai-block-permissive.yaml
|
|
|
|
# Search engine crawlers to allow, defaults to:
|
|
# - Google (so they don't try to bypass Anubis)
|
|
# - Apple
|
|
# - Bing
|
|
# - DuckDuckGo
|
|
# - Qwant
|
|
# - The Internet Archive
|
|
# - Kagi
|
|
# - Marginalia
|
|
# - Mojeek
|
|
- import: (data)/crawlers/_allow-good.yaml
|
|
# Challenge Firefox AI previews
|
|
- import: (data)/clients/x-firefox-ai.yaml
|
|
|
|
# Allow common "keeping the internet working" routes (well-known, favicon, robots.txt)
|
|
- import: (data)/common/keep-internet-working.yaml
|
|
|
|
# # Punish any bot with "bot" in the user-agent string
|
|
# # This is known to have a high false-positive rate, use at your own risk
|
|
# - name: generic-bot-catchall
|
|
# user_agent_regex: (?i:bot|crawler)
|
|
# action: CHALLENGE
|
|
# challenge:
|
|
# difficulty: 16 # impossible
|
|
# report_as: 4 # lie to the operator
|
|
# algorithm: slow # intentionally waste CPU cycles and time
|
|
|
|
# Generic catchall rule
|
|
- name: generic-browser
|
|
user_agent_regex: >-
|
|
Mozilla|Opera
|
|
action: WEIGH
|
|
weight:
|
|
adjust: 10
|
|
|
|
dnsbl: false
|
|
|
|
# By default, send HTTP 200 back to clients that either get issued a challenge
|
|
# or a denial. This seems weird, but this is load-bearing due to the fact that
|
|
# the most aggressive scraper bots seem to really, really, want an HTTP 200 and
|
|
# will stop sending requests once they get it.
|
|
status_codes:
|
|
CHALLENGE: 200
|
|
DENY: 200
|