nuke/data/common/acts-like-browser.yaml
Xe Iaso 6fc2c3c857
docs: document how to import the default config (#1392)
Signed-off-by: Xe Iaso <me@xeiaso.net>
2026-01-08 16:14:52 +00:00

55 lines
1.6 KiB
YAML

# Assert behaviour that only genuine browsers display. This ensures that modern Chrome
# or Firefox versions will get through without a challenge.
#
# These rules have been known to be bypassed by some of the worst automated scrapers.
# Use at your own risk.
- name: realistic-browser-catchall
expression:
all:
- '"User-Agent" in headers'
- '( userAgent.contains("Firefox") ) || ( userAgent.contains("Chrome") ) || ( userAgent.contains("Safari") )'
- '"Accept" in headers'
- '"Sec-Fetch-Dest" in headers'
- '"Sec-Fetch-Mode" in headers'
- '"Sec-Fetch-Site" in headers'
- '"Accept-Encoding" in headers'
- '( headers["Accept-Encoding"].contains("zstd") || headers["Accept-Encoding"].contains("br") )'
- '"Accept-Language" in headers'
action: WEIGH
weight:
adjust: -10
# The Upgrade-Insecure-Requests header is typically sent by browsers, but not always
- name: upgrade-insecure-requests
expression: '"Upgrade-Insecure-Requests" in headers'
action: WEIGH
weight:
adjust: -2
# Chrome should behave like Chrome
- name: chrome-is-proper
expression:
all:
- userAgent.contains("Chrome")
- '"Sec-Ch-Ua" in headers'
- 'headers["Sec-Ch-Ua"].contains("Chromium")'
- '"Sec-Ch-Ua-Mobile" in headers'
- '"Sec-Ch-Ua-Platform" in headers'
action: WEIGH
weight:
adjust: -5
- name: should-have-accept
expression: '!("Accept" in headers)'
action: WEIGH
weight:
adjust: 5
# Generic catchall rule
- name: generic-browser
user_agent_regex: >-
Mozilla|Opera
action: WEIGH
weight:
adjust: 10