From 57c0b2b22ca4e41be26c31233183a05334ee3039 Mon Sep 17 00:00:00 2001 From: Timon de Groot Date: Fri, 16 Jan 2026 01:57:31 +0100 Subject: [PATCH] Add IP mapped Perplexity user agents (#1393) Perplexity has some proper documentation available for their crawlers, with published IP addresses: https://docs.perplexity.ai/guides/bots. Signed-off-by: Timon de Groot --- data/clients/ai.yaml | 2 +- data/clients/perplexity-user.yaml | 12 ++++++++++++ data/crawlers/ai-search.yaml | 2 +- data/crawlers/perplexitybot.yaml | 16 ++++++++++++++++ data/meta/ai-block-moderate.yaml | 4 +++- data/meta/ai-block-permissive.yaml | 4 +++- 6 files changed, 36 insertions(+), 4 deletions(-) create mode 100644 data/clients/perplexity-user.yaml create mode 100644 data/crawlers/perplexitybot.yaml diff --git a/data/clients/ai.yaml b/data/clients/ai.yaml index 0ec7022..7453134 100644 --- a/data/clients/ai.yaml +++ b/data/clients/ai.yaml @@ -4,5 +4,5 @@ # - Claude-User: No published IP allowlist - name: "ai-clients" user_agent_regex: >- - ChatGPT-User|Claude-User|MistralAI-User + ChatGPT-User|Claude-User|MistralAI-User|Perplexity-User action: DENY diff --git a/data/clients/perplexity-user.yaml b/data/clients/perplexity-user.yaml new file mode 100644 index 0000000..13f64fd --- /dev/null +++ b/data/clients/perplexity-user.yaml @@ -0,0 +1,12 @@ +# Acts on behalf of user requests +# https://docs.perplexity.ai/guides/bots +- name: perplexity-user + user_agent_regex: Perplexity-User/.+; \+https\://perplexity\.ai/perplexity-user + action: ALLOW + # https://www.perplexity.com/perplexity-user.json + remote_addresses: [ + "44.208.221.197/32", + "34.193.163.52/32", + "18.97.21.0/30", + "18.97.43.80/29", + ] diff --git a/data/crawlers/ai-search.yaml b/data/crawlers/ai-search.yaml index 91855bf..bf1bdd2 100644 --- a/data/crawlers/ai-search.yaml +++ b/data/crawlers/ai-search.yaml @@ -4,5 +4,5 @@ # - Claude-SearchBot: No published IP allowlist - name: "ai-crawlers-search" user_agent_regex: >- - OAI-SearchBot|Claude-SearchBot + OAI-SearchBot|Claude-SearchBot|PerplexityBot action: DENY diff --git a/data/crawlers/perplexitybot.yaml b/data/crawlers/perplexitybot.yaml new file mode 100644 index 0000000..d217162 --- /dev/null +++ b/data/crawlers/perplexitybot.yaml @@ -0,0 +1,16 @@ +# Indexing for search, does not collect training data +# https://docs.perplexity.ai/guides/bots +- name: perplexitybot + user_agent_regex: PerplexityBot/.+; \+https\://perplexity\.ai/perplexitybot + action: ALLOW + # https://www.perplexity.com/perplexitybot.json + remote_addresses: [ + "107.20.236.150/32", + "3.224.62.45/32", + "18.210.92.235/32", + "3.222.232.239/32", + "3.211.124.183/32", + "3.231.139.107/32", + "18.97.1.228/30", + "18.97.9.96/29", + ] diff --git a/data/meta/ai-block-moderate.yaml b/data/meta/ai-block-moderate.yaml index 3fb5fb9..904d962 100644 --- a/data/meta/ai-block-moderate.yaml +++ b/data/meta/ai-block-moderate.yaml @@ -3,5 +3,7 @@ - import: (data)/bots/ai-catchall.yaml - import: (data)/crawlers/ai-training.yaml - import: (data)/crawlers/openai-searchbot.yaml +- import: (data)/crawlers/perplexitybot.yaml - import: (data)/clients/openai-chatgpt-user.yaml -- import: (data)/clients/mistral-mistralai-user.yaml \ No newline at end of file +- import: (data)/clients/mistral-mistralai-user.yaml +- import: (data)/clients/perplexity-user.yaml diff --git a/data/meta/ai-block-permissive.yaml b/data/meta/ai-block-permissive.yaml index 09a3446..d3fd562 100644 --- a/data/meta/ai-block-permissive.yaml +++ b/data/meta/ai-block-permissive.yaml @@ -2,5 +2,7 @@ - import: (data)/bots/ai-catchall.yaml - import: (data)/crawlers/openai-searchbot.yaml - import: (data)/crawlers/openai-gptbot.yaml +- import: (data)/crawlers/perplexitybot.yaml - import: (data)/clients/openai-chatgpt-user.yaml -- import: (data)/clients/mistral-mistralai-user.yaml \ No newline at end of file +- import: (data)/clients/mistral-mistralai-user.yaml +- import: (data)/clients/perplexity-user.yaml