nuke/data/botPolicies.yaml
Xe Iaso 4948036f39
feat: add default OpenGraph tags to configuration file (#694)
* feat(config): opengraph passthrough configuration

Signed-off-by: Xe Iaso <me@xeiaso.net>

* chore(ogtags): use config.OpenGraph for configuration

Signed-off-by: Xe Iaso <me@xeiaso.net>

* chore: wire up ogtags config in most of the app

Signed-off-by: Xe Iaso <me@xeiaso.net>

* feat(ogtags): return default tags if they are supplied

Signed-off-by: Xe Iaso <me@xeiaso.net>

* chore: make OpenGraph legal so we have some sanity in reviewing

Signed-off-by: Xe Iaso <me@xeiaso.net>

* chore: spelling

Signed-off-by: Xe Iaso <me@xeiaso.net>

* fix(lib): use OpenGraph.Enabled

Signed-off-by: Xe Iaso <me@xeiaso.net>

* test(lib): load default config file if one is not specified in spawnAnubis

Signed-off-by: Xe Iaso <me@xeiaso.net>

* chore(config): fix ST1005

Signed-off-by: Xe Iaso <me@xeiaso.net>

* docs: document open graph defaults and its new home in the policy file

Signed-off-by: Xe Iaso <me@xeiaso.net>

* docs(installation): point to weight threshold new home

Signed-off-by: Xe Iaso <me@xeiaso.net>

* chore: rename default to override

Signed-off-by: Xe Iaso <me@xeiaso.net>

* chore(default-config): add off-by-default opengraph settings to bot policy file

Signed-off-by: Xe Iaso <me@xeiaso.net>

* fix(anubis): make build

Signed-off-by: Xe Iaso <me@xeiaso.net>

* test(lib): fix build

Signed-off-by: Xe Iaso <me@xeiaso.net>

---------

Signed-off-by: Xe Iaso <me@xeiaso.net>
2025-06-19 18:00:44 -04:00

171 lines
6.1 KiB
YAML

## Anubis has the ability to let you import snippets of configuration into the main
## configuration file. This allows you to break up your config into smaller parts
## that get logically assembled into one big file.
##
## Of note, a bot rule can either have inline bot configuration or import a
## bot config snippet. You cannot do both in a single bot rule.
##
## Import paths can either be prefixed with (data) to import from the common/shared
## rules in the data folder in the Anubis source tree or will point to absolute/relative
## paths in your filesystem. If you don't have access to the Anubis source tree, check
## /usr/share/docs/anubis/data or in the tarball you extracted Anubis from.
bots:
# Pathological bots to deny
- # This correlates to data/bots/deny-pathological.yaml in the source tree
# https://github.com/TecharoHQ/anubis/blob/main/data/bots/deny-pathological.yaml
import: (data)/bots/_deny-pathological.yaml
- import: (data)/bots/aggressive-brazilian-scrapers.yaml
# Aggressively block AI/LLM related bots/agents by default
- import: (data)/meta/ai-block-aggressive.yaml
# Consider replacing the aggressive AI policy with more selective policies:
# - import: (data)/meta/ai-block-moderate.yaml
# - import: (data)/meta/ai-block-permissive.yaml
# Search engine crawlers to allow, defaults to:
# - Google (so they don't try to bypass Anubis)
# - Apple
# - Bing
# - DuckDuckGo
# - Qwant
# - The Internet Archive
# - Kagi
# - Marginalia
# - Mojeek
- import: (data)/crawlers/_allow-good.yaml
# Challenge Firefox AI previews
- import: (data)/clients/x-firefox-ai.yaml
# Allow common "keeping the internet working" routes (well-known, favicon, robots.txt)
- import: (data)/common/keep-internet-working.yaml
# # Punish any bot with "bot" in the user-agent string
# # This is known to have a high false-positive rate, use at your own risk
# - name: generic-bot-catchall
# user_agent_regex: (?i:bot|crawler)
# action: CHALLENGE
# challenge:
# difficulty: 16 # impossible
# report_as: 4 # lie to the operator
# algorithm: slow # intentionally waste CPU cycles and time
# Requires a subscription to Thoth to use, see
# https://anubis.techaro.lol/docs/admin/thoth#geoip-based-filtering
- name: countries-with-aggressive-scrapers
action: WEIGH
geoip:
countries:
- BR
- CN
weight:
adjust: 10
# Requires a subscription to Thoth to use, see
# https://anubis.techaro.lol/docs/admin/thoth#asn-based-filtering
- name: aggressive-asns-without-functional-abuse-contact
action: WEIGH
asns:
match:
- 13335 # Cloudflare
- 136907 # Huawei Cloud
- 45102 # Alibaba Cloud
weight:
adjust: 10
# Generic catchall rule
- name: generic-browser
user_agent_regex: >-
Mozilla|Opera
action: WEIGH
weight:
adjust: 10
dnsbl: false
# Open Graph passthrough configuration, see here for more information:
# https://anubis.techaro.lol/docs/admin/configuration/open-graph/
openGraph:
# Enables Open Graph passthrough
enabled: false
# Enables the use of the HTTP host in the cache key, this enables
# caching metadata for multiple http hosts at once.
considerHost: false
# How long cached OpenGraph metadata should last in memory
ttl: 24h
# # If set, return these opengraph values instead of looking them up with
# # the target service.
# #
# # Correlates to properties in https://ogp.me/
# override:
# # og:title is required, it is the title of the website
# "og:title": "Techaro Anubis"
# "og:description": >-
# Anubis is a Web AI Firewall Utility that helps you fight the bots
# away so that you can maintain uptime at work!
# "description": >-
# Anubis is a Web AI Firewall Utility that helps you fight the bots
# away so that you can maintain uptime at work!
# By default, send HTTP 200 back to clients that either get issued a challenge
# or a denial. This seems weird, but this is load-bearing due to the fact that
# the most aggressive scraper bots seem to really, really, want an HTTP 200 and
# will stop sending requests once they get it.
status_codes:
CHALLENGE: 200
DENY: 200
# The weight thresholds for when to trigger individual challenges. Any
# CHALLENGE will take precedence over this.
#
# A threshold has four configuration options:
#
# - name: the name that is reported down the stack and used for metrics
# - expression: A CEL expression with the request weight in the variable
# weight
# - action: the Anubis action to apply, similar to in a bot policy
# - challenge: which challenge to send to the user, similar to in a bot policy
#
# See https://anubis.techaro.lol/docs/admin/configuration/thresholds for more
# information.
thresholds:
# By default Anubis ships with the following thresholds:
- name: minimal-suspicion # This client is likely fine, its soul is lighter than a feather
expression: weight < 0 # a feather weighs zero units
action: ALLOW # Allow the traffic through
# For clients that had some weight reduced through custom rules, give them a
# lightweight challenge.
- name: mild-suspicion
expression:
all:
- weight >= 0
- weight < 10
action: CHALLENGE
challenge:
# https://anubis.techaro.lol/docs/admin/configuration/challenges/metarefresh
algorithm: metarefresh
difficulty: 1
report_as: 1
# For clients that are browser-like but have either gained points from custom rules or
# report as a standard browser.
- name: moderate-suspicion
expression:
all:
- weight >= 10
- weight < 20
action: CHALLENGE
challenge:
# https://anubis.techaro.lol/docs/admin/configuration/challenges/proof-of-work
algorithm: fast
difficulty: 2 # two leading zeros, very fast for most clients
report_as: 2
# For clients that are browser like and have gained many points from custom rules
- name: extreme-suspicion
expression: weight >= 20
action: CHALLENGE
challenge:
# https://anubis.techaro.lol/docs/admin/configuration/challenges/proof-of-work
algorithm: fast
difficulty: 4
report_as: 4