feat(checker): add CEL for matching complicated expressions (#421)
* feat(lib/policy): add support for CEL checkers
This adds the ability for administrators to use Common Expression
Language[0] (CEL) for more advanced check logic than Anubis previously
offered.
These can be as simple as:
```yaml
- name: allow-api-routes
action: ALLOW
expression:
and:
- '!(method == "HEAD" || method == "GET")'
- path.startsWith("/api/")
```
or get as complicated as:
```yaml
- name: allow-git-clients
action: ALLOW
expression:
and:
- userAgent.startsWith("git/") || userAgent.contains("libgit") || userAgent.startsWith("go-git") || userAgent.startsWith("JGit/") || userAgent.startsWith("JGit-")
- >
"Git-Protocol" in headers && headers["Git-Protocol"] == "version=2"
```
Internally these are compiled and evaluated with cel-go[1]. This also
leaves room for extensibility should that be desired in the future. This
will intersect with #338 and eventually intersect with TLS fingerprints
as in #337.
[0]: https://cel.dev/
[1]: https://github.com/google/cel-go
Signed-off-by: Xe Iaso <me@xeiaso.net>
* feat(data/apps): add API route allow rule for non-HEAD/GET
Signed-off-by: Xe Iaso <me@xeiaso.net>
* docs: document expression syntax
Signed-off-by: Xe Iaso <me@xeiaso.net>
* fix: fixes in review
Signed-off-by: Xe Iaso <me@xeiaso.net>
---------
Signed-off-by: Xe Iaso <me@xeiaso.net>
This commit is contained in:
parent
af07691139
commit
865d513e35
39 changed files with 1166 additions and 14 deletions
6
data/apps/allow-api-routes.yaml
Normal file
6
data/apps/allow-api-routes.yaml
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
- name: allow-api-routes
|
||||
action: ALLOW
|
||||
expression:
|
||||
all:
|
||||
- '!(method == "HEAD" || method == "GET")'
|
||||
- path.startsWith("/api/")
|
||||
|
|
@ -9,6 +9,9 @@
|
|||
{
|
||||
"import": "(data)/crawlers/_allow-good.yaml"
|
||||
},
|
||||
{
|
||||
"import": "(data)/bots/aggressive-brazilian-scrapers.yaml"
|
||||
},
|
||||
{
|
||||
"import": "(data)/common/keep-internet-working.yaml"
|
||||
},
|
||||
|
|
@ -23,4 +26,4 @@
|
|||
"CHALLENGE": 200,
|
||||
"DENY": 200
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -15,6 +15,7 @@ bots:
|
|||
- # This correlates to data/bots/deny-pathological.yaml in the source tree
|
||||
# https://github.com/TecharoHQ/anubis/blob/main/data/bots/deny-pathological.yaml
|
||||
import: (data)/bots/_deny-pathological.yaml
|
||||
- import: (data)/bots/aggressive-brazilian-scrapers.yaml
|
||||
|
||||
# Enforce https://github.com/ai-robots-txt/ai.robots.txt
|
||||
- import: (data)/bots/ai-robots-txt.yaml
|
||||
|
|
|
|||
28
data/bots/aggressive-brazilian-scrapers.yaml
Normal file
28
data/bots/aggressive-brazilian-scrapers.yaml
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
- name: deny-aggressive-brazilian-scrapers
|
||||
action: DENY
|
||||
expression:
|
||||
any:
|
||||
# Internet Explorer should be out of support
|
||||
- userAgent.contains("MSIE")
|
||||
# Trident is the Internet Explorer browser engine
|
||||
- userAgent.contains("Trident")
|
||||
# Opera is a fork of chrome now
|
||||
- userAgent.contains("Presto")
|
||||
# Windows CE is discontinued
|
||||
- userAgent.contains("Windows CE")
|
||||
# Windows 95 is discontinued
|
||||
- userAgent.contains("Windows 95")
|
||||
# Windows 98 is discontinued
|
||||
- userAgent.contains("Windows 98")
|
||||
# Windows 9.x is discontinued
|
||||
- userAgent.contains("Win 9x")
|
||||
# Amazon does not have an Alexa Toolbar.
|
||||
- userAgent.contains("Alexa Toolbar")
|
||||
- name: challenge-aggressive-brazilian-scrapers
|
||||
action: CHALLENGE
|
||||
expression:
|
||||
any:
|
||||
# This is not released, even Windows 11 calls itself Windows 10
|
||||
- userAgent.contains("Windows NT 11.0")
|
||||
# iPods are not in common use
|
||||
- userAgent.contains("iPod")
|
||||
9
data/bots/irc-bots/archlinux-phrik.yaml
Normal file
9
data/bots/irc-bots/archlinux-phrik.yaml
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
# phrik in the Arch Linux IRC channels
|
||||
- name: archlinux-phrik
|
||||
action: ALLOW
|
||||
expression:
|
||||
all:
|
||||
- remoteAddress == "159.69.213.214"
|
||||
- userAgent == "Mozilla/5.0 (compatible; utils.web Limnoria module)"
|
||||
- '"X-Http-Version" in headers'
|
||||
- headers["X-Http-Version"] == "HTTP/1.1"
|
||||
9
data/bots/irc-bots/gentoo-chat.yaml
Normal file
9
data/bots/irc-bots/gentoo-chat.yaml
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
# chat in the gentoo IRC channels
|
||||
- name: gentoo-chat
|
||||
action: ALLOW
|
||||
expression:
|
||||
all:
|
||||
- remoteAddress == "45.76.166.57"
|
||||
- userAgent == "Mozilla/5.0 (Linux x86_64; rv:76.0) Gecko/20100101 Firefox/76.0"
|
||||
- '"X-Http-Version" in headers'
|
||||
- headers["X-Http-Version"] == "HTTP/1.1"
|
||||
14
data/clients/git.yaml
Normal file
14
data/clients/git.yaml
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
- name: allow-git-clients
|
||||
action: ALLOW
|
||||
expression:
|
||||
all:
|
||||
- >
|
||||
(
|
||||
userAgent.startsWith("git/") ||
|
||||
userAgent.contains("libgit") ||
|
||||
userAgent.startsWith("go-git") ||
|
||||
userAgent.startsWith("JGit/") ||
|
||||
userAgent.startsWith("JGit-")
|
||||
)
|
||||
- '"Git-Protocol" in headers'
|
||||
- headers["Git-Protocol"] == "version=2"
|
||||
7
data/clients/go-get.yaml
Normal file
7
data/clients/go-get.yaml
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
- name: go-get
|
||||
action: ALLOW
|
||||
expression:
|
||||
all:
|
||||
- userAgent.startsWith("Go-http-client/")
|
||||
- '"go-get" in query'
|
||||
- query["go-get"] == "1"
|
||||
6
data/common/allow-api-like.yaml
Normal file
6
data/common/allow-api-like.yaml
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
- name: allow-api-routes
|
||||
action: ALLOW
|
||||
expression:
|
||||
all:
|
||||
- '!(method == "HEAD" || method == "GET")'
|
||||
- path.startsWith("/api/")
|
||||
7
data/common/json-api.yaml
Normal file
7
data/common/json-api.yaml
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
- name: allow-api-requests
|
||||
action: ALLOW
|
||||
expression:
|
||||
all:
|
||||
- '"Accept" in headers'
|
||||
- 'headers["Accept"] == "application/json"'
|
||||
- 'path.startsWith("/api/")'
|
||||
|
|
@ -7,4 +7,7 @@
|
|||
action: ALLOW
|
||||
- name: robots-txt
|
||||
path_regex: ^/robots.txt$
|
||||
action: ALLOW
|
||||
- name: sitemap
|
||||
path_regex: ^/sitemap.xml$
|
||||
action: ALLOW
|
||||
3
data/common/rfc-violations.yaml
Normal file
3
data/common/rfc-violations.yaml
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
- name: no-user-agent-string
|
||||
action: DENY
|
||||
expression: userAgent == ""
|
||||
|
|
@ -3,6 +3,6 @@ package data
|
|||
import "embed"
|
||||
|
||||
var (
|
||||
//go:embed botPolicies.yaml botPolicies.json all:apps all:bots all:common all:crawlers
|
||||
//go:embed botPolicies.yaml botPolicies.json all:apps all:bots all:clients all:common all:crawlers
|
||||
BotPolicies embed.FS
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue