diff --git a/.github/workflows/smoke-tests.yml b/.github/workflows/smoke-tests.yml index 7b6eeae..b0b0cad 100644 --- a/.github/workflows/smoke-tests.yml +++ b/.github/workflows/smoke-tests.yml @@ -23,6 +23,7 @@ jobs: - i18n - palemoon/amd64 #- palemoon/i386 + - robots_txt runs-on: ubuntu-latest steps: - name: Checkout code diff --git a/docs/docs/CHANGELOG.md b/docs/docs/CHANGELOG.md index 39d09d0..d8f3896 100644 --- a/docs/docs/CHANGELOG.md +++ b/docs/docs/CHANGELOG.md @@ -13,6 +13,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 +- Fix `SERVE_ROBOTS_TXT` setting file after the double slash fix broke it. + ## v1.23.0: Lyse Hext - Add default tencent cloud DENY rule. diff --git a/lib/http.go b/lib/http.go index 60e51d4..053470f 100644 --- a/lib/http.go +++ b/lib/http.go @@ -345,6 +345,15 @@ func (s *Server) ServeHTTP(w http.ResponseWriter, r *http.Request) { return } + // Forward robots.txt requests to mux when ServeRobotsTXT is enabled + if s.opts.ServeRobotsTXT { + path := strings.TrimPrefix(r.URL.Path, anubis.BasePrefix) + if path == "/robots.txt" || path == "/.well-known/robots.txt" { + s.mux.ServeHTTP(w, r) + return + } + } + s.maybeReverseProxyOrPage(w, r) } diff --git a/test/robots_txt/anubis.yaml b/test/robots_txt/anubis.yaml new file mode 100644 index 0000000..9880a86 --- /dev/null +++ b/test/robots_txt/anubis.yaml @@ -0,0 +1,8 @@ +bots: + - name: challenge + user_agent_regex: CHALLENGE + action: CHALLENGE + +status_codes: + CHALLENGE: 200 + DENY: 403 diff --git a/test/robots_txt/test.mjs b/test/robots_txt/test.mjs new file mode 100644 index 0000000..ce62b9f --- /dev/null +++ b/test/robots_txt/test.mjs @@ -0,0 +1,27 @@ +async function getRobotsTxt() { + return fetch("http://localhost:8923/robots.txt", { + headers: { + "Accept-Language": "en", + "User-Agent": "Mozilla/5.0", + } + }) + .then(resp => { + if (resp.status !== 200) { + throw new Error(`wanted status 200, got status: ${resp.status}`); + } + return resp; + }) + .then(resp => resp.text()); +} + +(async () => { + const page = await getRobotsTxt(); + + if (page.includes(``)) { + console.log(page) + throw new Error("serve robots.txt smoke test failed"); + } + + console.log("serve-robots-txt serves robots.txt"); + process.exit(0); +})(); \ No newline at end of file diff --git a/test/robots_txt/test.sh b/test/robots_txt/test.sh new file mode 100755 index 0000000..327a15a --- /dev/null +++ b/test/robots_txt/test.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +set -euo pipefail + +function cleanup() { + pkill -P $$ +} + +trap cleanup EXIT SIGINT + +# Build static assets +(cd ../.. && npm ci && npm run assets) + +go tool anubis --help 2>/dev/null || : + +go run ../cmd/unixhttpd & + +go tool anubis \ + --policy-fname ./anubis.yaml \ + --use-remote-address \ + --serve-robots-txt \ + --target=unix://$(pwd)/unixhttpd.sock & + +backoff-retry node ./test.mjs diff --git a/test/robots_txt/var/.gitignore b/test/robots_txt/var/.gitignore new file mode 100644 index 0000000..c96a04f --- /dev/null +++ b/test/robots_txt/var/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore \ No newline at end of file