feat: first implementation of honeypot logic (#1342)

* feat: first implementation of honeypot logic

This is a bit of an experiment, stick with me.

The core idea here is that badly written crawlers are that: badly
written. They look for anything that contains `<a href="whatever" />`
tags and will blindly use those values to recurse. This takes advantage
of that by hiding a link in a `<script>` tag like this:

```html
<script type="ignore"><a href="/bots-only">Don't click</a></script>
```

Browsers will ignore it because they have no handler for the "ignore"
script type.

This current draft is very unoptimized (it takes like 7 seconds to
generate a page on my tower), however switching spintax libraries will
make this much faster.

The hope is to make this pluggable with WebAssembly such that we force
administrators to choose a storage method. First we crawl before we
walk.

The AI involvement in this commit is limited to the spintax in
affirmations.txt, spintext.txt, and titles.txt. This generates a bunch
of "pseudoprofound bullshit" like the following:

> This Restoration to Balance & Alignment
>
> There's a moment when creators are being called to realize that the work
> can't be reduced to results, but about energy. We don't innovate products
> by pushing harder, we do it by holding the vision. Because momentum can't
> be forced, it unfolds over time when culture are moving in the same
> direction. We're being invited into a paradigm shift in how we think
> about innovation. [...]

This is intended to "look" like normal article text. As this is a first
draft, this sucks and will be improved upon.

Assisted-by: GLM 4.6, ChatGPT, GPT-OSS 120b
Signed-off-by: Xe Iaso <me@xeiaso.net>

* fix(honeypot/naive): optimize hilariously

Signed-off-by: Xe Iaso <me@xeiaso.net>

* feat(honeypot/naive): attempt to automatically filter out based on crawling

Signed-off-by: Xe Iaso <me@xeiaso.net>

* fix(lib): use mazeGen instead of bsGen

Signed-off-by: Xe Iaso <me@xeiaso.net>

* docs: add honeypot docs

Signed-off-by: Xe Iaso <me@xeiaso.net>

* chore(test): go mod tidy

Signed-off-by: Xe Iaso <me@xeiaso.net>

* chore: fix spelling metadata

Signed-off-by: Xe Iaso <me@xeiaso.net>

* chore: spelling

Signed-off-by: Xe Iaso <me@xeiaso.net>

---------

Signed-off-by: Xe Iaso <me@xeiaso.net>
This commit is contained in:
Xe Iaso 2025-12-16 04:14:29 -05:00 committed by GitHub
parent cb91145352
commit 122e4bc072
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
25 changed files with 968 additions and 84 deletions

View file

@ -0,0 +1,23 @@
package honeypot
import (
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)
var Timings = promauto.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "anubis",
Subsystem: "honeypot",
Name: "pagegen_timings",
Help: "The amount of time honeypot page generation takes per method",
Buckets: prometheus.ExponentialBuckets(0.5, 2, 32),
}, []string{"method"})
type Info struct {
CreatedAt time.Time `json:"createdAt"`
UserAgent string `json:"userAgent"`
IPAddress string `json:"ipAddress"`
HitCount int `json:"hitCount"`
}

View file

@ -0,0 +1,7 @@
html {
max-width: 70ch;
padding: 3em 1em;
margin: auto;
line-height: 1.75;
font-size: 1.25em;
}

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,206 @@
package naive
import (
"context"
_ "embed"
"fmt"
"log/slog"
"math/rand/v2"
"net/http"
"time"
"github.com/TecharoHQ/anubis/internal"
"github.com/TecharoHQ/anubis/internal/honeypot"
"github.com/TecharoHQ/anubis/lib/policy/checker"
"github.com/TecharoHQ/anubis/lib/store"
"github.com/a-h/templ"
"github.com/google/uuid"
"github.com/nikandfor/spintax"
)
//go:generate go tool github.com/a-h/templ/cmd/templ generate
// XXX(Xe): All of this was generated by ChatGPT, GLM 4.6, and GPT-OSS 120b. This is pseudoprofound bullshit in spintax[1] format so that the bullshit generator can emit plausibly human-authored text while being very computationally cheap.
//
// It feels somewhat poetic to use spammer technology in Anubis.
//
// [1]: https://outboundly.ai/blogs/what-is-spintax-and-how-to-use-it/
//
//go:embed spintext.txt
var spintext string
//go:embed titles.txt
var titles string
//go:embed affirmations.txt
var affirmations string
func New(st store.Interface, lg *slog.Logger) (*Impl, error) {
affirmation, err := spintax.Parse(affirmations)
if err != nil {
return nil, fmt.Errorf("can't parse affirmations: %w", err)
}
body, err := spintax.Parse(spintext)
if err != nil {
return nil, fmt.Errorf("can't parse bodies: %w", err)
}
title, err := spintax.Parse(titles)
if err != nil {
return nil, fmt.Errorf("can't parse titles: %w", err)
}
lg.Debug("initialized basic bullshit generator", "affirmations", affirmation.Count(), "bodies", body.Count(), "titles", title.Count())
return &Impl{
st: st,
infos: store.JSON[honeypot.Info]{Underlying: st, Prefix: "honeypot:info"},
uaWeight: store.JSON[int]{Underlying: st, Prefix: "honeypot:user-agent"},
networkWeight: store.JSON[int]{Underlying: st, Prefix: "honeypot:network"},
affirmation: affirmation,
body: body,
title: title,
lg: lg.With("component", "honeypot/naive"),
}, nil
}
type Impl struct {
st store.Interface
infos store.JSON[honeypot.Info]
uaWeight store.JSON[int]
networkWeight store.JSON[int]
lg *slog.Logger
affirmation, body, title spintax.Spintax
}
func (i *Impl) incrementUA(ctx context.Context, userAgent string) int {
result, _ := i.uaWeight.Get(ctx, internal.SHA256sum(userAgent))
result++
i.uaWeight.Set(ctx, internal.SHA256sum(userAgent), result, time.Hour)
return result
}
func (i *Impl) incrementNetwork(ctx context.Context, network string) int {
result, _ := i.networkWeight.Get(ctx, internal.SHA256sum(network))
result++
i.networkWeight.Set(ctx, internal.SHA256sum(network), result, time.Hour)
return result
}
func (i *Impl) CheckUA() checker.Impl {
return checker.Func(func(r *http.Request) (bool, error) {
result, _ := i.uaWeight.Get(r.Context(), internal.SHA256sum(r.UserAgent()))
if result >= 25 {
return true, nil
}
return false, nil
})
}
func (i *Impl) CheckNetwork() checker.Impl {
return checker.Func(func(r *http.Request) (bool, error) {
result, _ := i.uaWeight.Get(r.Context(), internal.SHA256sum(r.UserAgent()))
if result >= 25 {
return true, nil
}
return false, nil
})
}
func (i *Impl) Hash() string {
return internal.SHA256sum("naive honeypot")
}
func (i *Impl) makeAffirmations() []string {
count := rand.IntN(5) + 1
var result []string
for j := 0; j < count; j++ {
result = append(result, i.affirmation.Spin())
}
return result
}
func (i *Impl) makeSpins() []string {
count := rand.IntN(5) + 1
var result []string
for j := 0; j < count; j++ {
result = append(result, i.body.Spin())
}
return result
}
func (i *Impl) makeTitle() string {
return i.title.Spin()
}
func (i *Impl) ServeHTTP(w http.ResponseWriter, r *http.Request) {
t0 := time.Now()
lg := internal.GetRequestLogger(i.lg, r)
id := r.PathValue("id")
if id == "" {
id = uuid.NewString()
}
realIP, _ := internal.RealIP(r)
if !realIP.IsValid() {
lg.Error("the real IP is somehow invalid, bad middleware stack?")
http.Error(w, "The cake is a lie", http.StatusTeapot)
return
}
network, ok := internal.ClampIP(realIP)
if !ok {
lg.Error("clampIP failed", "output", network, "ok", ok)
http.Error(w, "The cake is a lie", http.StatusTeapot)
return
}
networkCount := i.incrementNetwork(r.Context(), network.String())
uaCount := i.incrementUA(r.Context(), r.UserAgent())
stage := r.PathValue("stage")
if stage == "init" {
lg.Debug("found new entrance point", "id", id, "stage", stage, "userAgent", r.UserAgent(), "clampedIP", network)
} else {
switch {
case networkCount%256 == 0, uaCount%256 == 0:
lg.Warn("found possible crawler", "id", id, "network", network)
}
}
spins := i.makeSpins()
affirmations := i.makeAffirmations()
title := i.makeTitle()
var links []link
for _, affirmation := range affirmations {
links = append(links, link{
href: uuid.NewString(),
body: affirmation,
})
}
templ.Handler(
base(title, i.maze(spins, links)),
templ.WithStreaming(),
templ.WithStatus(http.StatusOK),
).ServeHTTP(w, r)
t1 := time.Since(t0)
honeypot.Timings.WithLabelValues("naive").Observe(float64(t1.Milliseconds()))
}
type link struct {
href string
body string
}

View file

@ -0,0 +1,36 @@
package naive
import "fmt"
templ base(title string, body templ.Component) {
<!DOCTYPE html>
<html>
<head>
<style>
html {
max-width: 70ch;
padding: 3em 1em;
margin: auto;
line-height: 1.75;
font-size: 1.25em;
}
</style>
<title>{ title }</title>
</head>
<body>
<h1>{ title }</h1>
@body
</body>
</html>
}
templ (i Impl) maze(body []string, links []link) {
for _, paragraph := range body {
<p>{ paragraph }</p>
}
<ul>
for _, link := range links {
<li><a href={ templ.SafeURL(fmt.Sprintf("./%s", link.href)) }>{ link.body }</a></li>
}
</ul>
}

160
internal/honeypot/naive/page_templ.go generated Normal file
View file

@ -0,0 +1,160 @@
// Code generated by templ - DO NOT EDIT.
// templ: version: v0.3.960
package naive
//lint:file-ignore SA4006 This context is only used if a nested component is present.
import "github.com/a-h/templ"
import templruntime "github.com/a-h/templ/runtime"
import "fmt"
func base(title string, body templ.Component) templ.Component {
return templruntime.GeneratedTemplate(func(templ_7745c5c3_Input templruntime.GeneratedComponentInput) (templ_7745c5c3_Err error) {
templ_7745c5c3_W, ctx := templ_7745c5c3_Input.Writer, templ_7745c5c3_Input.Context
if templ_7745c5c3_CtxErr := ctx.Err(); templ_7745c5c3_CtxErr != nil {
return templ_7745c5c3_CtxErr
}
templ_7745c5c3_Buffer, templ_7745c5c3_IsBuffer := templruntime.GetBuffer(templ_7745c5c3_W)
if !templ_7745c5c3_IsBuffer {
defer func() {
templ_7745c5c3_BufErr := templruntime.ReleaseBuffer(templ_7745c5c3_Buffer)
if templ_7745c5c3_Err == nil {
templ_7745c5c3_Err = templ_7745c5c3_BufErr
}
}()
}
ctx = templ.InitializeContext(ctx)
templ_7745c5c3_Var1 := templ.GetChildren(ctx)
if templ_7745c5c3_Var1 == nil {
templ_7745c5c3_Var1 = templ.NopComponent
}
ctx = templ.ClearChildren(ctx)
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 1, "<!doctype html><html><head><style>\n html {\n max-width: 70ch;\n padding: 3em 1em;\n margin: auto;\n line-height: 1.75;\n font-size: 1.25em;\n }\n </style><title>")
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
var templ_7745c5c3_Var2 string
templ_7745c5c3_Var2, templ_7745c5c3_Err = templ.JoinStringErrs(title)
if templ_7745c5c3_Err != nil {
return templ.Error{Err: templ_7745c5c3_Err, FileName: `page.templ`, Line: 18, Col: 17}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var2))
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 2, "</title></head><body><h1>")
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
var templ_7745c5c3_Var3 string
templ_7745c5c3_Var3, templ_7745c5c3_Err = templ.JoinStringErrs(title)
if templ_7745c5c3_Err != nil {
return templ.Error{Err: templ_7745c5c3_Err, FileName: `page.templ`, Line: 21, Col: 14}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var3))
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 3, "</h1>")
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
templ_7745c5c3_Err = body.Render(ctx, templ_7745c5c3_Buffer)
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 4, "</body></html>")
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
return nil
})
}
func (i Impl) maze(body []string, links []link) templ.Component {
return templruntime.GeneratedTemplate(func(templ_7745c5c3_Input templruntime.GeneratedComponentInput) (templ_7745c5c3_Err error) {
templ_7745c5c3_W, ctx := templ_7745c5c3_Input.Writer, templ_7745c5c3_Input.Context
if templ_7745c5c3_CtxErr := ctx.Err(); templ_7745c5c3_CtxErr != nil {
return templ_7745c5c3_CtxErr
}
templ_7745c5c3_Buffer, templ_7745c5c3_IsBuffer := templruntime.GetBuffer(templ_7745c5c3_W)
if !templ_7745c5c3_IsBuffer {
defer func() {
templ_7745c5c3_BufErr := templruntime.ReleaseBuffer(templ_7745c5c3_Buffer)
if templ_7745c5c3_Err == nil {
templ_7745c5c3_Err = templ_7745c5c3_BufErr
}
}()
}
ctx = templ.InitializeContext(ctx)
templ_7745c5c3_Var4 := templ.GetChildren(ctx)
if templ_7745c5c3_Var4 == nil {
templ_7745c5c3_Var4 = templ.NopComponent
}
ctx = templ.ClearChildren(ctx)
for _, paragraph := range body {
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 5, "<p>")
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
var templ_7745c5c3_Var5 string
templ_7745c5c3_Var5, templ_7745c5c3_Err = templ.JoinStringErrs(paragraph)
if templ_7745c5c3_Err != nil {
return templ.Error{Err: templ_7745c5c3_Err, FileName: `page.templ`, Line: 29, Col: 16}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var5))
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 6, "</p>")
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
}
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 7, "<ul>")
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
for _, link := range links {
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 8, "<li><a href=\"")
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
var templ_7745c5c3_Var6 templ.SafeURL
templ_7745c5c3_Var6, templ_7745c5c3_Err = templ.JoinURLErrs(templ.SafeURL(fmt.Sprintf("./%s", link.href)))
if templ_7745c5c3_Err != nil {
return templ.Error{Err: templ_7745c5c3_Err, FileName: `page.templ`, Line: 33, Col: 62}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var6))
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 9, "\">")
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
var templ_7745c5c3_Var7 string
templ_7745c5c3_Var7, templ_7745c5c3_Err = templ.JoinStringErrs(link.body)
if templ_7745c5c3_Err != nil {
return templ.Error{Err: templ_7745c5c3_Err, FileName: `page.templ`, Line: 33, Col: 76}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var7))
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 10, "</a></li>")
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
}
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 11, "</ul>")
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
return nil
})
}
var _ = templruntime.GeneratedTemplate

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long