diff --git a/data/common/acts-like-browser.yaml b/data/common/acts-like-browser.yaml new file mode 100644 index 0000000..a4dc0ee --- /dev/null +++ b/data/common/acts-like-browser.yaml @@ -0,0 +1,55 @@ +# Assert behaviour that only genuine browsers display. This ensures that modern Chrome +# or Firefox versions will get through without a challenge. +# +# These rules have been known to be bypassed by some of the worst automated scrapers. +# Use at your own risk. + +- name: realistic-browser-catchall + expression: + all: + - '"User-Agent" in headers' + - '( userAgent.contains("Firefox") ) || ( userAgent.contains("Chrome") ) || ( userAgent.contains("Safari") )' + - '"Accept" in headers' + - '"Sec-Fetch-Dest" in headers' + - '"Sec-Fetch-Mode" in headers' + - '"Sec-Fetch-Site" in headers' + - '"Accept-Encoding" in headers' + - '( headers["Accept-Encoding"].contains("zstd") || headers["Accept-Encoding"].contains("br") )' + - '"Accept-Language" in headers' + action: WEIGH + weight: + adjust: -10 + +# The Upgrade-Insecure-Requests header is typically sent by browsers, but not always +- name: upgrade-insecure-requests + expression: '"Upgrade-Insecure-Requests" in headers' + action: WEIGH + weight: + adjust: -2 + +# Chrome should behave like Chrome +- name: chrome-is-proper + expression: + all: + - userAgent.contains("Chrome") + - '"Sec-Ch-Ua" in headers' + - 'headers["Sec-Ch-Ua"].contains("Chromium")' + - '"Sec-Ch-Ua-Mobile" in headers' + - '"Sec-Ch-Ua-Platform" in headers' + action: WEIGH + weight: + adjust: -5 + +- name: should-have-accept + expression: '!("Accept" in headers)' + action: WEIGH + weight: + adjust: 5 + +# Generic catchall rule +- name: generic-browser + user_agent_regex: >- + Mozilla|Opera + action: WEIGH + weight: + adjust: 10 diff --git a/docs/docs/admin/configuration/import.mdx b/docs/docs/admin/configuration/import.mdx index b8fdd2e..1b5f1c3 100644 --- a/docs/docs/admin/configuration/import.mdx +++ b/docs/docs/admin/configuration/import.mdx @@ -13,6 +13,8 @@ bots: - # This correlates to data/bots/ai-catchall.yaml in the source tree import: (data)/bots/ai-catchall.yaml - import: (data)/bots/cloudflare-workers.yaml + # Import all the rules in the default configuration + - import: (data)/meta/default-config.yaml ``` Of note, a bot rule can either have inline bot configuration or import a bot config snippet. You cannot do both in a single bot rule. @@ -35,6 +37,33 @@ config.BotOrImport: rule definition is invalid, you must set either bot rules or Paths can either be prefixed with `(data)` to import from the [the data folder in the Anubis source tree](https://github.com/TecharoHQ/anubis/tree/main/data) or anywhere on the filesystem. If you don't have access to the Anubis source tree, check /usr/share/docs/anubis/data or in the tarball you extracted Anubis from. +## Importing the default configuration + +If you want to base your configuration off of the default configuration, import `(data)/meta/default-config.yaml`: + +```yaml +bots: + - import: (data)/meta/default-config.yaml + # Write your rules here +``` + +This will keep your configuration up to date as Anubis adapts to emerging threats. + +## How do I exempt most modern browsers from Anubis challenges? + +If you want to exempt most modern browsers from Anubis challenges, import `(data)/common/acts-like-browser.yaml`: + +```yaml +bots: + - import: (data)/meta/default-config.yaml + - import: (data)/common/acts-like-browser.yaml + # Write your rules here +``` + +These rules will allow traffic that "looks like" it's from a modern copy of Edge, Safari, Chrome, or Firefox. These rules used to be enabled by default, however user reports have suggested that AI scraper bots have adapted to conform to these rules to scrape without regard for the infrastructure they are attacking. + +Use these rules at your own risk. + ## Importing from imports You can also import from an imported file in case you want to import an entire folder of rules at once.