Split up AI filtering files (#592)

* Split up AI filtering files

Create aggressive/moderate/permissive policies to allow administrators to choose their AI/LLM stance.

Aggressive policy matches existing default in Anubis.

Removes `Google-Extended` flag from `ai-robots-txt.yaml` as it doesn't exist in requests.

Rename `ai-robots-txt.yaml` to `ai-catchall.yaml` as the file is no longer a copy of the source repo/file.

* chore: spelling

* chore: fix embeds

* chore: fix data includes

* chore: fix file name typo

* chore: Ignore READMEs in configs

* chore(lib/policy/config): go tool goimports -w

Signed-off-by: Xe Iaso <me@xeiaso.net>

---------

Signed-off-by: Xe Iaso <me@xeiaso.net>
Co-authored-by: Xe Iaso <me@xeiaso.net>
This commit is contained in:
Corry Haines 2025-06-01 13:21:18 -07:00 committed by GitHub
parent 77e0bbbce9
commit de7dbfe6d6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
19 changed files with 107 additions and 18 deletions

View file

@ -251,6 +251,7 @@ func TestImportStatement(t *testing.T) {
"bots",
"common",
"crawlers",
"meta",
} {
if err := fs.WalkDir(data.BotPolicies, folderName, func(path string, d fs.DirEntry, err error) error {
if err != nil {
@ -259,6 +260,9 @@ func TestImportStatement(t *testing.T) {
if d.IsDir() {
return nil
}
if d.Name() == "README.md" {
return nil
}
tests = append(tests, testCase{
name: "(data)/" + path,

View file

@ -1,7 +1,7 @@
{
"bots": [
{
"import": "(data)/bots/ai-robots-txt.yaml",
"import": "(data)/bots/ai-catchall.yaml",
"name": "generic-browser",
"user_agent_regex": "Mozilla|Opera\n",
"action": "CHALLENGE"

View file

@ -1,5 +1,5 @@
bots:
- import: (data)/bots/ai-robots-txt.yaml
- import: (data)/bots/ai-catchall.yaml
name: generic-browser
user_agent_regex: >
Mozilla|Opera