Opt-in policies for OpenAI and MistralAI bots (#590)
* Define OpenAI bot ALLOW policies Allows OpenAI bots to be allowlisted at the choice of the Anubis administrator. None are enabled by default. * Define MistralAI bot ALLOW policy * chore: spelling
This commit is contained in:
parent
ba00cdacd2
commit
0d9ebebff6
5 changed files with 135 additions and 0 deletions
16
data/crawlers/openai-gptbot.yaml
Normal file
16
data/crawlers/openai-gptbot.yaml
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
# Collects AI training data
|
||||
# https://platform.openai.com/docs/bots/overview-of-openai-crawlers
|
||||
- name: openai-gptbot
|
||||
user_agent_regex: GPTBot/1\.1; \+https\://openai\.com/gptbot
|
||||
action: ALLOW
|
||||
# https://openai.com/gptbot.json
|
||||
remote_addresses: [
|
||||
"52.230.152.0/24",
|
||||
"20.171.206.0/24",
|
||||
"20.171.207.0/24",
|
||||
"4.227.36.0/25",
|
||||
"20.125.66.80/28",
|
||||
"172.182.204.0/24",
|
||||
"172.182.214.0/24",
|
||||
"172.182.215.0/24",
|
||||
]
|
||||
13
data/crawlers/openai-searchbot.yaml
Normal file
13
data/crawlers/openai-searchbot.yaml
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
# Indexing for search, does not collect training data
|
||||
# https://platform.openai.com/docs/bots/overview-of-openai-crawlers
|
||||
- name: openai-searchbot
|
||||
user_agent_regex: OAI-SearchBot/1\.0; \+https\://openai\.com/searchbot
|
||||
action: ALLOW
|
||||
# https://openai.com/searchbot.json
|
||||
remote_addresses: [
|
||||
"20.42.10.176/28",
|
||||
"172.203.190.128/28",
|
||||
"104.210.140.128/28",
|
||||
"51.8.102.0/24",
|
||||
"135.234.64.0/24"
|
||||
]
|
||||
Loading…
Add table
Add a link
Reference in a new issue