Add IP mapped Perplexity user agents (#1393)
Perplexity has some proper documentation available for their crawlers, with published IP addresses: https://docs.perplexity.ai/guides/bots. Signed-off-by: Timon de Groot <timon.degroot@team.blue>
This commit is contained in:
parent
186ffeb744
commit
57c0b2b22c
6 changed files with 36 additions and 4 deletions
|
|
@ -4,5 +4,5 @@
|
||||||
# - Claude-User: No published IP allowlist
|
# - Claude-User: No published IP allowlist
|
||||||
- name: "ai-clients"
|
- name: "ai-clients"
|
||||||
user_agent_regex: >-
|
user_agent_regex: >-
|
||||||
ChatGPT-User|Claude-User|MistralAI-User
|
ChatGPT-User|Claude-User|MistralAI-User|Perplexity-User
|
||||||
action: DENY
|
action: DENY
|
||||||
|
|
|
||||||
12
data/clients/perplexity-user.yaml
Normal file
12
data/clients/perplexity-user.yaml
Normal file
|
|
@ -0,0 +1,12 @@
|
||||||
|
# Acts on behalf of user requests
|
||||||
|
# https://docs.perplexity.ai/guides/bots
|
||||||
|
- name: perplexity-user
|
||||||
|
user_agent_regex: Perplexity-User/.+; \+https\://perplexity\.ai/perplexity-user
|
||||||
|
action: ALLOW
|
||||||
|
# https://www.perplexity.com/perplexity-user.json
|
||||||
|
remote_addresses: [
|
||||||
|
"44.208.221.197/32",
|
||||||
|
"34.193.163.52/32",
|
||||||
|
"18.97.21.0/30",
|
||||||
|
"18.97.43.80/29",
|
||||||
|
]
|
||||||
|
|
@ -4,5 +4,5 @@
|
||||||
# - Claude-SearchBot: No published IP allowlist
|
# - Claude-SearchBot: No published IP allowlist
|
||||||
- name: "ai-crawlers-search"
|
- name: "ai-crawlers-search"
|
||||||
user_agent_regex: >-
|
user_agent_regex: >-
|
||||||
OAI-SearchBot|Claude-SearchBot
|
OAI-SearchBot|Claude-SearchBot|PerplexityBot
|
||||||
action: DENY
|
action: DENY
|
||||||
|
|
|
||||||
16
data/crawlers/perplexitybot.yaml
Normal file
16
data/crawlers/perplexitybot.yaml
Normal file
|
|
@ -0,0 +1,16 @@
|
||||||
|
# Indexing for search, does not collect training data
|
||||||
|
# https://docs.perplexity.ai/guides/bots
|
||||||
|
- name: perplexitybot
|
||||||
|
user_agent_regex: PerplexityBot/.+; \+https\://perplexity\.ai/perplexitybot
|
||||||
|
action: ALLOW
|
||||||
|
# https://www.perplexity.com/perplexitybot.json
|
||||||
|
remote_addresses: [
|
||||||
|
"107.20.236.150/32",
|
||||||
|
"3.224.62.45/32",
|
||||||
|
"18.210.92.235/32",
|
||||||
|
"3.222.232.239/32",
|
||||||
|
"3.211.124.183/32",
|
||||||
|
"3.231.139.107/32",
|
||||||
|
"18.97.1.228/30",
|
||||||
|
"18.97.9.96/29",
|
||||||
|
]
|
||||||
|
|
@ -3,5 +3,7 @@
|
||||||
- import: (data)/bots/ai-catchall.yaml
|
- import: (data)/bots/ai-catchall.yaml
|
||||||
- import: (data)/crawlers/ai-training.yaml
|
- import: (data)/crawlers/ai-training.yaml
|
||||||
- import: (data)/crawlers/openai-searchbot.yaml
|
- import: (data)/crawlers/openai-searchbot.yaml
|
||||||
|
- import: (data)/crawlers/perplexitybot.yaml
|
||||||
- import: (data)/clients/openai-chatgpt-user.yaml
|
- import: (data)/clients/openai-chatgpt-user.yaml
|
||||||
- import: (data)/clients/mistral-mistralai-user.yaml
|
- import: (data)/clients/mistral-mistralai-user.yaml
|
||||||
|
- import: (data)/clients/perplexity-user.yaml
|
||||||
|
|
|
||||||
|
|
@ -2,5 +2,7 @@
|
||||||
- import: (data)/bots/ai-catchall.yaml
|
- import: (data)/bots/ai-catchall.yaml
|
||||||
- import: (data)/crawlers/openai-searchbot.yaml
|
- import: (data)/crawlers/openai-searchbot.yaml
|
||||||
- import: (data)/crawlers/openai-gptbot.yaml
|
- import: (data)/crawlers/openai-gptbot.yaml
|
||||||
|
- import: (data)/crawlers/perplexitybot.yaml
|
||||||
- import: (data)/clients/openai-chatgpt-user.yaml
|
- import: (data)/clients/openai-chatgpt-user.yaml
|
||||||
- import: (data)/clients/mistral-mistralai-user.yaml
|
- import: (data)/clients/mistral-mistralai-user.yaml
|
||||||
|
- import: (data)/clients/perplexity-user.yaml
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue