diff --git a/.github/actions/spelling/allow.txt b/.github/actions/spelling/allow.txt index ded7810..5e3002b 100644 --- a/.github/actions/spelling/allow.txt +++ b/.github/actions/spelling/allow.txt @@ -12,3 +12,9 @@ maintnotifications azurediamond cooldown verifyfcrdns +Spintax +spintax +clampip +pseudoprofound +reimagining +iocaine diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt index e6c2140..f2e4540 100644 --- a/.github/actions/spelling/expect.txt +++ b/.github/actions/spelling/expect.txt @@ -1,4 +1,3 @@ - acs Actorified actorifiedstore @@ -398,3 +397,13 @@ Zenos zizmor zombocom zos +GLM +iocaine +nikandfor +pagegen +pseudoprofound +reimagining +Rhul +shoneypot +spammer +Y'shtola diff --git a/docs/docs/CHANGELOG.md b/docs/docs/CHANGELOG.md index a6c4b01..9ad46f2 100644 --- a/docs/docs/CHANGELOG.md +++ b/docs/docs/CHANGELOG.md @@ -28,6 +28,12 @@ Anubis is back and better than ever! Lots of minor fixes with some big ones inte - Open Graph passthrough now reuses the configured target Host/SNI/TLS settings, so metadata fetches succeed when the upstream certificate differs from the public domain. ([1283](https://github.com/TecharoHQ/anubis/pull/1283)) - Stabilize the CVE-2025-24369 regression test by always submitting an invalid proof instead of relying on random POW failures. +### Dataset poisoning + +Anubis has the ability to engage in [dataset poisoning attacks](https://www.anthropic.com/research/small-samples-poison) using the [dataset poisoning subsystem](./admin/honeypot/overview.mdx). This allows every Anubis instance to be a honeypot to attract and flag abusive scrapers so that no administrator action is required to ban them. + +There is much more information about this feature in [the dataset poisoning subsystem documentation](./admin/honeypot/overview.mdx). Administrators that are interested in learning how this feature works should consult that documentation. + ### Deprecate `report_as` in challenge configuration Previously Anubis let you lie to users about the difficulty of a challenge to interfere with operators of malicious scrapers as a psychological attack: diff --git a/docs/docs/admin/honeypot/_category_.json b/docs/docs/admin/honeypot/_category_.json new file mode 100644 index 0000000..bc0581e --- /dev/null +++ b/docs/docs/admin/honeypot/_category_.json @@ -0,0 +1,8 @@ +{ + "label": "Honeypot", + "position": 40, + "link": { + "type": "generated-index", + "description": "Honeypot features in Anubis, allowing Anubis to passively detect malicious crawlers." + } +} \ No newline at end of file diff --git a/docs/docs/admin/honeypot/overview.mdx b/docs/docs/admin/honeypot/overview.mdx new file mode 100644 index 0000000..4ff18d6 --- /dev/null +++ b/docs/docs/admin/honeypot/overview.mdx @@ -0,0 +1,40 @@ +--- +title: Dataset poisoning +--- + +Anubis offers the ability to participate in [dataset poisoning](https://www.anthropic.com/research/small-samples-poison) attacks similar to what [iocaine](https://iocaine.madhouse-project.org/) and other similar tools offer. Currently this is in a preview state where a lot of details are hard-coded in order to test the viability of this approach. + +In essence, when Anubis challenge and error pages are rendered they include a small bit of HTML code that browsers will ignore but scrapers will interpret as a link to ingest. This will then create a small forest of recursive nothing pages that are designed according to the following principles: + +- These pages are _cheap_ to render, rendering in at most ten milliseconds on decently specced hardware. +- These pages are _vacuous_, meaning that they essentially are devoid of content such that a human would find it odd and click away, but a scraper would not be able to know that and would continue through the forest. +- These pages are _fairly large_ so that scrapers don't think that the pages are error pages or are otherwise devoid of content. +- These pages are _fully self-contained_ so that they load fast without incurring additional load from resource fetches. + +In this limited preview state, Anubis generates pages using [spintax](https://outboundly.ai/blogs/what-is-spintax-and-how-to-use-it/). Spintax is a syntax that is used to create different variants of utterances for use in marketing messages and email spam that evades word filtering. In its current form, Anubis' dataset poisoning has AI generated spintax that generates vapid LinkedIn posts with some western occultism thrown in for good measure. This results in utterances like the following: + +> There's a moment when visionaries are being called to realize that the work can't be reduced to optimization, but about resonance. We don't transform products by grinding endlessly, we do it by holding the vision. Because meaning can't be forced, it unfolds over time when culture are in integrity. This moment represents a fundamental reimagining in how we think about work. This isn't a framework, it's a lived truth that requires courage. When we get honest, we activate nonlinear growth that don't show up in dashboards, but redefine success anyway. + +This should be fairly transparent to humans that this is pseudoprofound anti-content and is a signal to click away. + +## Plans + +Future versions of this feature will allow for more customization. In the near future this will be configurable via the following mechanisms: + +- WebAssembly logic for customizing how the poisoning data is generated (with examples including the existing spintax method). +- Weight thresholds and logic for how they are interpreted by Anubis. +- Other configuration settings as facts and circumstances dictate. + +## Implementation notes + +In its current implementation, the Anubis dataset poisoning feature has the following flaws that may hinder production deployments: + +- All Anubis instances use the same method for generating dataset poisoning information. This may be easy for malicious actors to detect and ignore. +- Anubis dataset poisoning routes are under the `/.within.website/x/cmd/anubis` URL hierarchy. This may be easy for malicious actors to detect and ignore. + +Right now Anubis assigns 30 weight points if the following criteria are met: + +- A client's User-Agent has been observed in the dataset poisoning maze at least 25 times. +- The network-clamped IP address (/24 for IPv4 and /48 for IPv6) has been observed in the dataset poisoning maze at least 25 times. + +Additionally, when any given client by both User-Agent and network-clamped IP address has been observed, Anubis will emit log lines warning about it so that administrative action can be taken up to and including [filing abuse reports with the network owner](/blog/2025/file-abuse-reports). diff --git a/go.mod b/go.mod index 28f60c6..ac48abd 100644 --- a/go.mod +++ b/go.mod @@ -20,6 +20,7 @@ require ( github.com/joho/godotenv v1.5.1 github.com/lum8rjack/go-ja4h v0.0.0-20250828030157-fa5266d50650 github.com/nicksnyder/go-i18n/v2 v2.6.0 + github.com/nikandfor/spintax v0.0.0-20181023094358-fc346b245bb3 github.com/playwright-community/playwright-go v0.5200.1 github.com/prometheus/client_golang v1.23.2 github.com/redis/go-redis/v9 v9.17.2 diff --git a/go.sum b/go.sum index ea8e6f9..e8a4615 100644 --- a/go.sum +++ b/go.sum @@ -320,6 +320,8 @@ github.com/natefinch/atomic v1.0.1 h1:ZPYKxkqQOx3KZ+RsbnP/YsgvxWQPGxjC0oBt2AhwV0 github.com/natefinch/atomic v1.0.1/go.mod h1:N/D/ELrljoqDyT3rZrsUmtsuzvHkeB/wWjHV22AZRbM= github.com/nicksnyder/go-i18n/v2 v2.6.0 h1:C/m2NNWNiTB6SK4Ao8df5EWm3JETSTIGNXBpMJTxzxQ= github.com/nicksnyder/go-i18n/v2 v2.6.0/go.mod h1:88sRqr0C6OPyJn0/KRNaEz1uWorjxIKP7rUUcvycecE= +github.com/nikandfor/spintax v0.0.0-20181023094358-fc346b245bb3 h1:foZ9X1bz2KmW7b8Yx5V0LAQKhTazdllv5rnGUe6iGTY= +github.com/nikandfor/spintax v0.0.0-20181023094358-fc346b245bb3/go.mod h1:wwDYKfVF3WHdY0rugsAZoIpyQjDA3bn9wEzo/QXPx1Y= github.com/onsi/gomega v1.35.1 h1:Cwbd75ZBPxFSuZ6T+rN/WCb/gOc6YgFBXLlZLhC7Ds4= github.com/onsi/gomega v1.35.1/go.mod h1:PvZbdDc8J6XJEpDK4HCuRBm8a6Fzp9/DmhC9C7yFlog= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= diff --git a/internal/clampip.go b/internal/clampip.go new file mode 100644 index 0000000..e8220ab --- /dev/null +++ b/internal/clampip.go @@ -0,0 +1,33 @@ +package internal + +import "net/netip" + +func ClampIP(addr netip.Addr) (netip.Prefix, bool) { + switch { + case addr.Is4(): + result, err := addr.Prefix(24) + if err != nil { + return netip.Prefix{}, false + } + return result, true + + case addr.Is4In6(): + // Extract the IPv4 address from IPv4-mapped IPv6 and clamp it + ipv4 := addr.Unmap() + result, err := ipv4.Prefix(24) + if err != nil { + return netip.Prefix{}, false + } + return result, true + + case addr.Is6(): + result, err := addr.Prefix(48) + if err != nil { + return netip.Prefix{}, false + } + return result, true + + default: + return netip.Prefix{}, false + } +} diff --git a/internal/clampip_test.go b/internal/clampip_test.go new file mode 100644 index 0000000..ffdb53a --- /dev/null +++ b/internal/clampip_test.go @@ -0,0 +1,274 @@ +package internal + +import ( + "net/netip" + "testing" +) + +func TestClampIP(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + // IPv4 addresses + { + name: "IPv4 normal address", + input: "192.168.1.100", + expected: "192.168.1.0/24", + }, + { + name: "IPv4 boundary - network address", + input: "192.168.1.0", + expected: "192.168.1.0/24", + }, + { + name: "IPv4 boundary - broadcast address", + input: "192.168.1.255", + expected: "192.168.1.0/24", + }, + { + name: "IPv4 class A address", + input: "10.0.0.1", + expected: "10.0.0.0/24", + }, + { + name: "IPv4 loopback", + input: "127.0.0.1", + expected: "127.0.0.0/24", + }, + { + name: "IPv4 link-local", + input: "169.254.0.1", + expected: "169.254.0.0/24", + }, + { + name: "IPv4 public address", + input: "203.0.113.1", + expected: "203.0.113.0/24", + }, + + // IPv6 addresses + { + name: "IPv6 normal address", + input: "2001:db8::1", + expected: "2001:db8::/48", + }, + { + name: "IPv6 with full expansion", + input: "2001:0db8:0000:0000:0000:0000:0000:0001", + expected: "2001:db8::/48", + }, + { + name: "IPv6 loopback", + input: "::1", + expected: "::/48", + }, + { + name: "IPv6 unspecified address", + input: "::", + expected: "::/48", + }, + { + name: "IPv6 link-local", + input: "fe80::1", + expected: "fe80::/48", + }, + { + name: "IPv6 unique local", + input: "fc00::1", + expected: "fc00::/48", + }, + { + name: "IPv6 documentation prefix", + input: "2001:db8:abcd:ef01::1234", + expected: "2001:db8:abcd::/48", + }, + { + name: "IPv6 global unicast", + input: "2606:4700:4700::1111", + expected: "2606:4700:4700::/48", + }, + { + name: "IPv6 multicast", + input: "ff02::1", + expected: "ff02::/48", + }, + + // IPv4-mapped IPv6 addresses + { + name: "IPv4-mapped IPv6 address", + input: "::ffff:192.168.1.100", + expected: "192.168.1.0/24", + }, + { + name: "IPv4-mapped IPv6 with different format", + input: "::ffff:10.0.0.1", + expected: "10.0.0.0/24", + }, + { + name: "IPv4-mapped IPv6 loopback", + input: "::ffff:127.0.0.1", + expected: "127.0.0.0/24", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + addr := netip.MustParseAddr(tt.input) + + result, ok := ClampIP(addr) + if !ok { + t.Fatalf("ClampIP(%s) returned false, want true", tt.input) + } + + if result.String() != tt.expected { + t.Errorf("ClampIP(%s) = %s, want %s", tt.input, result.String(), tt.expected) + } + }) + } +} + +func TestClampIPSuccess(t *testing.T) { + // Test that valid inputs return success + tests := []struct { + name string + input string + }{ + { + name: "IPv4 address", + input: "192.168.1.100", + }, + { + name: "IPv6 address", + input: "2001:db8::1", + }, + { + name: "IPv4-mapped IPv6", + input: "::ffff:192.168.1.100", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + addr := netip.MustParseAddr(tt.input) + + result, ok := ClampIP(addr) + if !ok { + t.Fatalf("ClampIP(%s) returned false, want true", tt.input) + } + + // For valid inputs, we should get the clamped prefix + if addr.Is4() || addr.Is4In6() { + if result.Bits() != 24 { + t.Errorf("Expected 24 bits for IPv4, got %d", result.Bits()) + } + } else if addr.Is6() { + if result.Bits() != 48 { + t.Errorf("Expected 48 bits for IPv6, got %d", result.Bits()) + } + } + }) + } +} + +func TestClampIPZeroValue(t *testing.T) { + // Test that when ClampIP fails, it returns zero value + // Note: It's hard to make addr.Prefix() fail with valid inputs, + // so this test demonstrates the expected behavior + addr := netip.MustParseAddr("192.168.1.100") + + // Manually create a zero value for comparison + zeroPrefix := netip.Prefix{} + + // Call ClampIP - it should succeed with valid input + result, ok := ClampIP(addr) + + // Verify the function succeeded + if !ok { + t.Error("ClampIP should succeed with valid input") + } + + // Verify that the result is not a zero value + if result == zeroPrefix { + t.Error("Result should not be zero value for successful operation") + } +} + +func TestClampIPSpecialCases(t *testing.T) { + tests := []struct { + name string + input string + expectedPrefix int + expectedNetwork string + }{ + { + name: "Minimum IPv4", + input: "0.0.0.0", + expectedPrefix: 24, + expectedNetwork: "0.0.0.0", + }, + { + name: "Maximum IPv4", + input: "255.255.255.255", + expectedPrefix: 24, + expectedNetwork: "255.255.255.0", + }, + { + name: "Minimum IPv6", + input: "::", + expectedPrefix: 48, + expectedNetwork: "::", + }, + { + name: "Maximum IPv6 prefix part", + input: "ffff:ffff:ffff::", + expectedPrefix: 48, + expectedNetwork: "ffff:ffff:ffff::", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + addr := netip.MustParseAddr(tt.input) + + result, ok := ClampIP(addr) + if !ok { + t.Fatalf("ClampIP(%s) returned false, want true", tt.input) + } + + if result.Bits() != tt.expectedPrefix { + t.Errorf("ClampIP(%s) bits = %d, want %d", tt.input, result.Bits(), tt.expectedPrefix) + } + + if result.Addr().String() != tt.expectedNetwork { + t.Errorf("ClampIP(%s) network = %s, want %s", tt.input, result.Addr().String(), tt.expectedNetwork) + } + }) + } +} + +// Benchmark to ensure the function is performant +func BenchmarkClampIP(b *testing.B) { + ipv4 := netip.MustParseAddr("192.168.1.100") + ipv6 := netip.MustParseAddr("2001:db8::1") + ipv4mapped := netip.MustParseAddr("::ffff:192.168.1.100") + + b.Run("IPv4", func(b *testing.B) { + for i := 0; i < b.N; i++ { + ClampIP(ipv4) + } + }) + + b.Run("IPv6", func(b *testing.B) { + for i := 0; i < b.N; i++ { + ClampIP(ipv6) + } + }) + + b.Run("IPv4-mapped", func(b *testing.B) { + for i := 0; i < b.N; i++ { + ClampIP(ipv4mapped) + } + }) +} \ No newline at end of file diff --git a/internal/headers.go b/internal/headers.go index 60e5371..045f636 100644 --- a/internal/headers.go +++ b/internal/headers.go @@ -1,6 +1,7 @@ package internal import ( + "context" "errors" "fmt" "log/slog" @@ -13,6 +14,13 @@ import ( "github.com/sebest/xff" ) +type realIPKey struct{} + +func RealIP(r *http.Request) (netip.Addr, bool) { + result, ok := r.Context().Value(realIPKey{}).(netip.Addr) + return result, ok +} + // TODO: move into config type XFFComputePreferences struct { StripPrivate bool @@ -77,6 +85,9 @@ func RemoteXRealIP(useRemoteAddress bool, bindNetwork string, next http.Handler) panic(err) // this should never happen } r.Header.Set("X-Real-Ip", host) + if addr, err := netip.ParseAddr(host); err == nil { + r = r.WithContext(context.WithValue(r.Context(), realIPKey{}, addr)) + } next.ServeHTTP(w, r) }) } @@ -129,8 +140,6 @@ func XForwardedForUpdate(stripPrivate bool, next http.Handler) http.Handler { } else { r.Header.Set("X-Forwarded-For", xffHeaderString) } - - slog.Debug("updating X-Forwarded-For", "original", origXFFHeader, "new", xffHeaderString) }) } diff --git a/internal/honeypot/honeypot.go b/internal/honeypot/honeypot.go new file mode 100644 index 0000000..f03b2db --- /dev/null +++ b/internal/honeypot/honeypot.go @@ -0,0 +1,23 @@ +package honeypot + +import ( + "time" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" +) + +var Timings = promauto.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: "anubis", + Subsystem: "honeypot", + Name: "pagegen_timings", + Help: "The amount of time honeypot page generation takes per method", + Buckets: prometheus.ExponentialBuckets(0.5, 2, 32), +}, []string{"method"}) + +type Info struct { + CreatedAt time.Time `json:"createdAt"` + UserAgent string `json:"userAgent"` + IPAddress string `json:"ipAddress"` + HitCount int `json:"hitCount"` +} diff --git a/internal/honeypot/naive/100bytes.css b/internal/honeypot/naive/100bytes.css new file mode 100644 index 0000000..7de70f3 --- /dev/null +++ b/internal/honeypot/naive/100bytes.css @@ -0,0 +1,7 @@ +html { + max-width: 70ch; + padding: 3em 1em; + margin: auto; + line-height: 1.75; + font-size: 1.25em; +} diff --git a/internal/honeypot/naive/affirmations.txt b/internal/honeypot/naive/affirmations.txt new file mode 100644 index 0000000..2b568fb --- /dev/null +++ b/internal/honeypot/naive/affirmations.txt @@ -0,0 +1 @@ +{Yeah|Yep|Yup|Yes|Absolutely|Definitely|Sure|Sounds|That's|I'm|I am|Totally|Completely|Right|Correct|Exactly|Perfectly|Certainly|Of course|Naturally|Indeed|Awesome|Sweet|Cool|Neat|Great|Excellent|Fantastic|Wonderful|Amazing|Love it|Nice|Right on|You bet|For sure|No doubt|Without a doubt|Undoubtedly|Positively|Surely|Truly|Really|Genuinely|Honestly|Frankly|Literally|Precisely|Spot on|On point|Ideally|Optimally|Superbly|Brilliantly|Marvelously|Splendidly|Magnificently|Phenomenally|Extraordinarily|Remarkably|Exceptionally|Outstandingly|Impressively|Stunningly|Breathtakingly|Astonishingly|Surprisingly|Pleasantly|Delightfully|Charmingly|Appealingly|Attractively|Invitingly|Encouragingly|Motivatingly|Inspiringly|Upliftingly|Positive|Optimistic|Supportive|Approving|Favorable|Enthusiastic|Eager|Willing|Ready|Prepared|Set|Go|Let's|Alright|Okay|Sure thing|No problem|You got it|Consider it done|Will do|Roger that|Copy that|Got it|Understood|Acknowledged|Noted|Confirmed|Agreed|Approved|Accepted|Endorsed|Backed|Championed} {sounds|looks|seems|feels|is|appears|comes across|strikes me|hits me|registers|resonates|clicks|makes sense|fits|works|functions|operates|performs|delivers|succeeds|achieves|accomplishes|excels|shines|stands out|impresses|satisfies|meets expectations|exceeds expectations|delights|pleases|gratifies|fulfills|completes|finishes|concludes|wraps up|finalizes|settles|resolves|solves|fixes|addresses|handles|manages|tackles|conquers|overcomes|defeats|beats|wins|triumphs|prevails|dominates|leads|guides|directs|steers|navigates|paves the way|opens doors|creates opportunities|makes possible|enables|allows|permits|facilitates|drives|pushes|propels|launches|initiates|starts|begins|commences|kicks off|gets going|moves forward|progresses|advances|develops|evolves|grows|expands|improves|enhances|upgrades|optimizes|refines|perfects|polishes} {good|great|perfect|excellent|wonderful|fantastic|amazing|awesome|fine|okay|alright|nice|cool|spot on|reasonable|about right|superb|brilliant|marvelous|splendid|magnificent|phenomenal|extraordinary|remarkable|exceptional|outstanding|impressive|stunning|breathtaking|astonishing|surprising|pleasant|delightful|charming|appealing|attractive|inviting|positive|optimistic|supportive|approving|favorable|enthusiastic|eager|willing|ready|prepared|set|solid|strong|robust|powerful|effective|efficient|productive|successful|fruitful|beneficial|valuable|useful|helpful|advantageous|profitable|rewarding|satisfying|gratifying|fulfilling|complete|whole|total|entire|full|thorough|comprehensive|exhaustive|detailed|precise|accurate|correct|right|true|valid|sound|logical|rational|practical|realistic|feasible|possible|doable|achievable|attainable|obtainable|reachable|accessible|available|present|arranged|organized|structured|planned|scheduled|timed|well positioned|strategically located|ideally situated|well suited|well matched|compatible|harmonious|balanced|proportional|symmetrical|aesthetic|beautiful|gorgeous|lovely|pretty|handsome|striking|dramatic|bold|confident|assertive|decisive|clear|obvious|apparent|evident|manifest|plain|simple|easy|straightforward|uncomplicated|complex|intricate|nuanced|subtle|refined|elegant|sophisticated|advanced|progressive|innovative|creative|original|unique|special|distinctive|memorable|unforgettable|significant|important|major|key|critical|essential|vital|crucial|fundamental|basic|primary|principal|main|chief|leading|top|best|finest|ultimate|supreme|paramount|foremost|world class|professional|expert|master|skilled|talented|gifted|intelligent|smart|clever|wise|knowledgeable|informed|educated|learned|scholarly|theoretical|practical|applied|hands on|experienced|seasoned|veteran|mature|visionary|prophetic|intuitive|perceptive|insightful|sage|profound|deep|meaningful|substantial|considerable|influential|resilient|tough|durable|lasting|permanent|enduring|timeless|classic|traditional|conventional|standard|regular|normal|typical|usual|common|ordinary|average|fair|decent|respectable|acceptable|satisfactory|adequate|sufficient|enough|plentiful|abundant|ample|generous|rich|wealthy|prosperous|thriving|flourishing|blooming|superior|higher|elevated|modern|contemporary|current|fresh|novel|rare|uncommon|legendary|famous|well known|celebrated|accredited|honored|awarded|decorated|distinguished|illustrious|prestigious|reputable|admired|revered|beloved|cherished|treasured|prized|precious|close|intimate|personal|private|individual|priceless|worthwhile} {to me|for me|with me|I agree|I like it|let's do it|count me in|I'm on board|I'm in|I'm up for it|I'm down for that|I'm all for it|I'm good with that|I'm happy with that|I'm cool with that|let's go with that|let's make it happen|that works|that'll work|sounds like a plan|that's a good idea|that's a great choice|I think so too|my thoughts exactly|you read my mind|couldn't agree more|absolutely right|you nailed it|let's go|game on|challenge accepted|say no more|you had me at hello|I'm sold|sign me up|be there|definitely|for sure|sounds good|looks good|seems good|feels good|is good|let's do this|time to rock|let's roll|here we go|off we go|moving forward|full steam ahead|all systems go|green light|clear for takeoff|ready when you are|on your mark|get set|let's begin|commence operation|initiate protocol|execute plan|implement strategy|deploy solution|activate system|engage process|start procedure|begin sequence|launch project|kick off event|open doors|make way|clear path|pave way|create opportunity|make possible|enable success|facilitate growth|support development|encourage progress|inspire change|motivate action|drive results|push boundaries|break barriers|overcome challenges|solve problems|fix issues|address concerns|handle situations|manage difficulties|tackle obstacles|conquer fears|defeat doubts|win battles|triumph over adversity|prevail against odds|rise above|excel beyond|achieve greatness|reach heights|attain goals|accomplish dreams|realize potential|fulfill destiny|complete journey|finish race|cross finish line|arrive at destination|reach summit|climb mountain|sail seas|fly skies|explore worlds|discover truths|find answers|solve mysteries|uncover secrets|reveal wonders|share insights|spread joy|create happiness|build relationships|strengthen bonds|foster community|grow together|learn constantly|improve daily|evolve continuously|adapt quickly|change rapidly|transform completely|renew fully|refresh completely|restart anew|begin again|start fresh|clean slate|new chapter|fresh start|bright future|promising tomorrow|better days|good times|great moments|wonderful experiences|fantastic adventures|amazing journeys|awesome memories|precious moments|valuable lessons|helpful advice|useful tips|practical solutions|effective strategies|successful methods|proven approaches|tested techniques|reliable systems|dependable support|consistent performance|steady progress|continuous improvement|ongoing development|perpetual growth|endless possibilities|unlimited potential|infinite opportunities|boundless horizons|vast expanses|wide ranges|broad spectrums|diverse options|multiple choices|various paths|different routes|alternative ways|other methods|additional approaches|extra techniques|supplementary tools|auxiliary resources|backup plans|contingency options|emergency measures|safety nets|security blankets|comfort zones|safe spaces|peaceful havens|tranquil sanctuaries|serene environments|calm atmospheres|relaxed vibes|easy feelings|comfortable sensations|pleasant experiences|enjoyable moments|delightful times|charming encounters|appealing situations|attractive prospects|inviting opportunities|encouraging signs|motivating factors|inspiring elements|uplifting aspects|positive features|optimistic views|encouraging outlooks|supportive attitudes|approving perspectives|favorable opinions|enthusiastic responses|eager reactions|willing participants|ready volunteers|prepared individuals|set teams|organized groups|structured units|planned initiatives|scheduled events|timed activities|well positioned assets|strategically located resources|ideally situated elements|perfectly suited components|well matched partners|compatible collaborations|harmonious relationships|balanced arrangements|proportional distributions|symmetrical designs|aesthetic presentations|beautiful displays|gorgeous exhibitions|lovely shows|pretty sights|attractive views|striking scenes|dramatic performances|bold statements|confident expressions|decisive actions|clear communications|obvious demonstrations|apparent revelations|evident truths|manifest realities|plain facts|simple solutions|easy implementations|straightforward processes|uncomplicated procedures|complex systems|intricate networks|detailed analyses|nuanced discussions|subtle distinctions|refined approaches|elegant solutions|sophisticated methods|advanced technologies|progressive ideas|innovative concepts|creative designs|original works|unique creations|special projects|distinctive features|memorable experiences|unforgettable moments|legendary achievements|famous accomplishments|well recognized contributions|acknowledged impacts|celebrated successes|acclaimed performances|honored achievements|awarded excellence|decorated heroes|distinguished leaders|illustrious careers|prestigious positions|reputable organizations|respected institutions|admired figures|revered icons|beloved personalities|cherished treasures|valued possessions|prized collections|precious artifacts|dear friends|close companions|intimate partners|personal connections|individual expressions|unique perspectives|special talents|one of a kind gifts|irreplaceable values|invaluable insights|priceless wisdom|worthwhile endeavors|valuable investments|useful tools|beneficial resources|helpful services|advantageous positions|profitable ventures|rewarding careers|satisfying lives|gratifying experiences|fulfilling purposes|complete beings|whole persons|total entities|entire systems|full cycles|perfect circles|ideal forms|ultimate goals|best practices|finest qualities|supreme achievements|excellent results|outstanding performances|superior outcomes|exceptional contributions|remarkable discoveries|extraordinary breakthroughs|special recognitions|unique innovations|distinctive designs|memorable impacts|impressive feats|dramatic transformations|powerful changes|strong foundations|effective actions|efficient operations|successful missions|productive endeavors|fruitful partnerships|beneficial collaborations|valuable connections|helpful networks|worthwhile projects|rewarding adventures|satisfying journeys|gratifying accomplishments|fulfilling destinies}{|!|, let's go!|, amazing!|, fantastic!|, wonderful!|, perfect!|, brilliant!|, excellent!|, outstanding!|, superb!|, great!|, nice!|, cool!|, sweet!|, awesome!|, love it!|, beautiful!|, gorgeous!|, stunning!|, breathtaking!|, phenomenal!|, extraordinary!|, remarkable!|, exceptional!|, impressive!|, striking!|, dramatic!|, powerful!|, magnificent!|, splendid!|, marvelous!|, terrific!|, superb!|, divine!|, heavenly!|, celestial!|, transcendent!|, sublime!|, perfect!|, flawless!|, impeccable!|, ideal!|, ultimate!|, supreme!|, paramount!|, unbeatable!|, unstoppable!|, incredible!|, unbelievable!|, astounding!|, mind-blowing!|, jaw-dropping!|, spectacular!|, epic!|, legendary!|, iconic!|, classic!|, timeless!|, eternal!|, infinite!|, boundless!|, limitless!|, endless!|, forever!|, always!|, never-ending!|, perpetual!|, constant!|, steady!|, solid!|, rock-solid!|, unshakeable!|, unbreakable!|, invincible!|, indestructible!|, immortal!|, everlasting!|, undying!|, living!|, vibrant!|, dynamic!|, energetic!|, lively!|, spirited!|, enthusiastic!|, passionate!|, fervent!|, zealous!|, dedicated!|, committed!|, devoted!|, loyal!|, faithful!|, true!|, real!|, authentic!|, genuine!|, legit!|, certified!|, proven!|, tested!|, verified!|, confirmed!|, validated!|, approved!|, endorsed!|, supported!|, backed!|, guaranteed!|, assured!|, certain!|, sure!|, positive!|, confident!|, secure!|, safe!|, protected!|, covered!|, sheltered!|, guarded!|, watched over!|, cared for!|, nurtured!|, cherished!|, treasured!|, valued!|, respected!|, admired!|, appreciated!|, recognized!|, acknowledged!|, celebrated!|, honored!|, praised!|, applauded!|, cheered!|, supported!|, embraced!|, welcomed!|, accepted!|, included!|, belonging!|, connected!|, united!|, joined!|, together!|, as one!|, in harmony!|, in sync!|, aligned!|, balanced!|, centered!|, grounded!|, rooted!|, established!|, settled!|, calm!|, peaceful!|, serene!|, tranquil!|, quiet!|, still!|, at ease!|, comfortable!|, relaxed!|, content!|, happy!|, joyful!|, delighted!|, thrilled!|, excited!|, elated!|, ecstatic!|, overjoyed!|, euphoric!|, blissful!|, radiant!|, glowing!|, shining!|, sparkling!|, dazzling!|, brilliant!|, bright!|, luminous!|, illuminated!|, enlightened!|, inspired!|, uplifted!|, elevated!|, empowered!|, strengthened!|, fortified!|, revitalized!|, renewed!|, refreshed!|, recharged!|, energized!|, activated!|, awakened!|, alive!|, thriving!|, flourishing!|, blooming!|, growing!|, expanding!|, developing!|, evolving!|, transforming!|, becoming!|, emerging!|, rising!|, ascending!|, climbing!|, reaching!|, achieving!|, succeeding!|, winning!|, triumphing!|, conquering!|, overcoming!|, mastering!|, perfecting!|, completing!|, fulfilling!|, realizing!|, manifesting!|, creating!|, building!|, making!|, doing!|, being!|, living!|, breathing!|, existing!|, present!|, here!|, now!|, always!|, forever!|, eternally!} \ No newline at end of file diff --git a/internal/honeypot/naive/naive.go b/internal/honeypot/naive/naive.go new file mode 100644 index 0000000..81fdd2b --- /dev/null +++ b/internal/honeypot/naive/naive.go @@ -0,0 +1,206 @@ +package naive + +import ( + "context" + _ "embed" + "fmt" + "log/slog" + "math/rand/v2" + "net/http" + "time" + + "github.com/TecharoHQ/anubis/internal" + "github.com/TecharoHQ/anubis/internal/honeypot" + "github.com/TecharoHQ/anubis/lib/policy/checker" + "github.com/TecharoHQ/anubis/lib/store" + "github.com/a-h/templ" + "github.com/google/uuid" + "github.com/nikandfor/spintax" +) + +//go:generate go tool github.com/a-h/templ/cmd/templ generate + +// XXX(Xe): All of this was generated by ChatGPT, GLM 4.6, and GPT-OSS 120b. This is pseudoprofound bullshit in spintax[1] format so that the bullshit generator can emit plausibly human-authored text while being very computationally cheap. +// +// It feels somewhat poetic to use spammer technology in Anubis. +// +// [1]: https://outboundly.ai/blogs/what-is-spintax-and-how-to-use-it/ +// +//go:embed spintext.txt +var spintext string + +//go:embed titles.txt +var titles string + +//go:embed affirmations.txt +var affirmations string + +func New(st store.Interface, lg *slog.Logger) (*Impl, error) { + affirmation, err := spintax.Parse(affirmations) + if err != nil { + return nil, fmt.Errorf("can't parse affirmations: %w", err) + } + + body, err := spintax.Parse(spintext) + if err != nil { + return nil, fmt.Errorf("can't parse bodies: %w", err) + } + + title, err := spintax.Parse(titles) + if err != nil { + return nil, fmt.Errorf("can't parse titles: %w", err) + } + + lg.Debug("initialized basic bullshit generator", "affirmations", affirmation.Count(), "bodies", body.Count(), "titles", title.Count()) + + return &Impl{ + st: st, + infos: store.JSON[honeypot.Info]{Underlying: st, Prefix: "honeypot:info"}, + uaWeight: store.JSON[int]{Underlying: st, Prefix: "honeypot:user-agent"}, + networkWeight: store.JSON[int]{Underlying: st, Prefix: "honeypot:network"}, + affirmation: affirmation, + body: body, + title: title, + lg: lg.With("component", "honeypot/naive"), + }, nil +} + +type Impl struct { + st store.Interface + infos store.JSON[honeypot.Info] + uaWeight store.JSON[int] + networkWeight store.JSON[int] + lg *slog.Logger + + affirmation, body, title spintax.Spintax +} + +func (i *Impl) incrementUA(ctx context.Context, userAgent string) int { + result, _ := i.uaWeight.Get(ctx, internal.SHA256sum(userAgent)) + result++ + i.uaWeight.Set(ctx, internal.SHA256sum(userAgent), result, time.Hour) + return result +} + +func (i *Impl) incrementNetwork(ctx context.Context, network string) int { + result, _ := i.networkWeight.Get(ctx, internal.SHA256sum(network)) + result++ + i.networkWeight.Set(ctx, internal.SHA256sum(network), result, time.Hour) + return result +} + +func (i *Impl) CheckUA() checker.Impl { + return checker.Func(func(r *http.Request) (bool, error) { + result, _ := i.uaWeight.Get(r.Context(), internal.SHA256sum(r.UserAgent())) + if result >= 25 { + return true, nil + } + + return false, nil + }) +} + +func (i *Impl) CheckNetwork() checker.Impl { + return checker.Func(func(r *http.Request) (bool, error) { + result, _ := i.uaWeight.Get(r.Context(), internal.SHA256sum(r.UserAgent())) + if result >= 25 { + return true, nil + } + + return false, nil + }) +} + +func (i *Impl) Hash() string { + return internal.SHA256sum("naive honeypot") +} + +func (i *Impl) makeAffirmations() []string { + count := rand.IntN(5) + 1 + + var result []string + for j := 0; j < count; j++ { + result = append(result, i.affirmation.Spin()) + } + + return result +} + +func (i *Impl) makeSpins() []string { + count := rand.IntN(5) + 1 + + var result []string + for j := 0; j < count; j++ { + result = append(result, i.body.Spin()) + } + + return result +} + +func (i *Impl) makeTitle() string { + return i.title.Spin() +} + +func (i *Impl) ServeHTTP(w http.ResponseWriter, r *http.Request) { + t0 := time.Now() + lg := internal.GetRequestLogger(i.lg, r) + + id := r.PathValue("id") + if id == "" { + id = uuid.NewString() + } + + realIP, _ := internal.RealIP(r) + if !realIP.IsValid() { + lg.Error("the real IP is somehow invalid, bad middleware stack?") + http.Error(w, "The cake is a lie", http.StatusTeapot) + return + } + + network, ok := internal.ClampIP(realIP) + if !ok { + lg.Error("clampIP failed", "output", network, "ok", ok) + http.Error(w, "The cake is a lie", http.StatusTeapot) + return + } + + networkCount := i.incrementNetwork(r.Context(), network.String()) + uaCount := i.incrementUA(r.Context(), r.UserAgent()) + + stage := r.PathValue("stage") + + if stage == "init" { + lg.Debug("found new entrance point", "id", id, "stage", stage, "userAgent", r.UserAgent(), "clampedIP", network) + } else { + switch { + case networkCount%256 == 0, uaCount%256 == 0: + lg.Warn("found possible crawler", "id", id, "network", network) + } + } + + spins := i.makeSpins() + affirmations := i.makeAffirmations() + title := i.makeTitle() + + var links []link + for _, affirmation := range affirmations { + links = append(links, link{ + href: uuid.NewString(), + body: affirmation, + }) + } + + templ.Handler( + base(title, i.maze(spins, links)), + templ.WithStreaming(), + templ.WithStatus(http.StatusOK), + ).ServeHTTP(w, r) + + t1 := time.Since(t0) + honeypot.Timings.WithLabelValues("naive").Observe(float64(t1.Milliseconds())) +} + +type link struct { + href string + body string +} diff --git a/internal/honeypot/naive/page.templ b/internal/honeypot/naive/page.templ new file mode 100644 index 0000000..eb3ccec --- /dev/null +++ b/internal/honeypot/naive/page.templ @@ -0,0 +1,36 @@ +package naive + +import "fmt" + +templ base(title string, body templ.Component) { + + +
+ +{ paragraph }
+ } +") + if templ_7745c5c3_Err != nil { + return templ_7745c5c3_Err + } + var templ_7745c5c3_Var5 string + templ_7745c5c3_Var5, templ_7745c5c3_Err = templ.JoinStringErrs(paragraph) + if templ_7745c5c3_Err != nil { + return templ.Error{Err: templ_7745c5c3_Err, FileName: `page.templ`, Line: 29, Col: 16} + } + _, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var5)) + if templ_7745c5c3_Err != nil { + return templ_7745c5c3_Err + } + templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 6, "
") + if templ_7745c5c3_Err != nil { + return templ_7745c5c3_Err + } + } + templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 7, "