* feat: first implementation of honeypot logic This is a bit of an experiment, stick with me. The core idea here is that badly written crawlers are that: badly written. They look for anything that contains `<a href="whatever" />` tags and will blindly use those values to recurse. This takes advantage of that by hiding a link in a `<script>` tag like this: ```html <script type="ignore"><a href="/bots-only">Don't click</a></script> ``` Browsers will ignore it because they have no handler for the "ignore" script type. This current draft is very unoptimized (it takes like 7 seconds to generate a page on my tower), however switching spintax libraries will make this much faster. The hope is to make this pluggable with WebAssembly such that we force administrators to choose a storage method. First we crawl before we walk. The AI involvement in this commit is limited to the spintax in affirmations.txt, spintext.txt, and titles.txt. This generates a bunch of "pseudoprofound bullshit" like the following: > This Restoration to Balance & Alignment > > There's a moment when creators are being called to realize that the work > can't be reduced to results, but about energy. We don't innovate products > by pushing harder, we do it by holding the vision. Because momentum can't > be forced, it unfolds over time when culture are moving in the same > direction. We're being invited into a paradigm shift in how we think > about innovation. [...] This is intended to "look" like normal article text. As this is a first draft, this sucks and will be improved upon. Assisted-by: GLM 4.6, ChatGPT, GPT-OSS 120b Signed-off-by: Xe Iaso <me@xeiaso.net> * fix(honeypot/naive): optimize hilariously Signed-off-by: Xe Iaso <me@xeiaso.net> * feat(honeypot/naive): attempt to automatically filter out based on crawling Signed-off-by: Xe Iaso <me@xeiaso.net> * fix(lib): use mazeGen instead of bsGen Signed-off-by: Xe Iaso <me@xeiaso.net> * docs: add honeypot docs Signed-off-by: Xe Iaso <me@xeiaso.net> * chore(test): go mod tidy Signed-off-by: Xe Iaso <me@xeiaso.net> * chore: fix spelling metadata Signed-off-by: Xe Iaso <me@xeiaso.net> * chore: spelling Signed-off-by: Xe Iaso <me@xeiaso.net> --------- Signed-off-by: Xe Iaso <me@xeiaso.net>
206 lines
5.1 KiB
Go
206 lines
5.1 KiB
Go
package naive
|
|
|
|
import (
|
|
"context"
|
|
_ "embed"
|
|
"fmt"
|
|
"log/slog"
|
|
"math/rand/v2"
|
|
"net/http"
|
|
"time"
|
|
|
|
"github.com/TecharoHQ/anubis/internal"
|
|
"github.com/TecharoHQ/anubis/internal/honeypot"
|
|
"github.com/TecharoHQ/anubis/lib/policy/checker"
|
|
"github.com/TecharoHQ/anubis/lib/store"
|
|
"github.com/a-h/templ"
|
|
"github.com/google/uuid"
|
|
"github.com/nikandfor/spintax"
|
|
)
|
|
|
|
//go:generate go tool github.com/a-h/templ/cmd/templ generate
|
|
|
|
// XXX(Xe): All of this was generated by ChatGPT, GLM 4.6, and GPT-OSS 120b. This is pseudoprofound bullshit in spintax[1] format so that the bullshit generator can emit plausibly human-authored text while being very computationally cheap.
|
|
//
|
|
// It feels somewhat poetic to use spammer technology in Anubis.
|
|
//
|
|
// [1]: https://outboundly.ai/blogs/what-is-spintax-and-how-to-use-it/
|
|
//
|
|
//go:embed spintext.txt
|
|
var spintext string
|
|
|
|
//go:embed titles.txt
|
|
var titles string
|
|
|
|
//go:embed affirmations.txt
|
|
var affirmations string
|
|
|
|
func New(st store.Interface, lg *slog.Logger) (*Impl, error) {
|
|
affirmation, err := spintax.Parse(affirmations)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("can't parse affirmations: %w", err)
|
|
}
|
|
|
|
body, err := spintax.Parse(spintext)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("can't parse bodies: %w", err)
|
|
}
|
|
|
|
title, err := spintax.Parse(titles)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("can't parse titles: %w", err)
|
|
}
|
|
|
|
lg.Debug("initialized basic bullshit generator", "affirmations", affirmation.Count(), "bodies", body.Count(), "titles", title.Count())
|
|
|
|
return &Impl{
|
|
st: st,
|
|
infos: store.JSON[honeypot.Info]{Underlying: st, Prefix: "honeypot:info"},
|
|
uaWeight: store.JSON[int]{Underlying: st, Prefix: "honeypot:user-agent"},
|
|
networkWeight: store.JSON[int]{Underlying: st, Prefix: "honeypot:network"},
|
|
affirmation: affirmation,
|
|
body: body,
|
|
title: title,
|
|
lg: lg.With("component", "honeypot/naive"),
|
|
}, nil
|
|
}
|
|
|
|
type Impl struct {
|
|
st store.Interface
|
|
infos store.JSON[honeypot.Info]
|
|
uaWeight store.JSON[int]
|
|
networkWeight store.JSON[int]
|
|
lg *slog.Logger
|
|
|
|
affirmation, body, title spintax.Spintax
|
|
}
|
|
|
|
func (i *Impl) incrementUA(ctx context.Context, userAgent string) int {
|
|
result, _ := i.uaWeight.Get(ctx, internal.SHA256sum(userAgent))
|
|
result++
|
|
i.uaWeight.Set(ctx, internal.SHA256sum(userAgent), result, time.Hour)
|
|
return result
|
|
}
|
|
|
|
func (i *Impl) incrementNetwork(ctx context.Context, network string) int {
|
|
result, _ := i.networkWeight.Get(ctx, internal.SHA256sum(network))
|
|
result++
|
|
i.networkWeight.Set(ctx, internal.SHA256sum(network), result, time.Hour)
|
|
return result
|
|
}
|
|
|
|
func (i *Impl) CheckUA() checker.Impl {
|
|
return checker.Func(func(r *http.Request) (bool, error) {
|
|
result, _ := i.uaWeight.Get(r.Context(), internal.SHA256sum(r.UserAgent()))
|
|
if result >= 25 {
|
|
return true, nil
|
|
}
|
|
|
|
return false, nil
|
|
})
|
|
}
|
|
|
|
func (i *Impl) CheckNetwork() checker.Impl {
|
|
return checker.Func(func(r *http.Request) (bool, error) {
|
|
result, _ := i.uaWeight.Get(r.Context(), internal.SHA256sum(r.UserAgent()))
|
|
if result >= 25 {
|
|
return true, nil
|
|
}
|
|
|
|
return false, nil
|
|
})
|
|
}
|
|
|
|
func (i *Impl) Hash() string {
|
|
return internal.SHA256sum("naive honeypot")
|
|
}
|
|
|
|
func (i *Impl) makeAffirmations() []string {
|
|
count := rand.IntN(5) + 1
|
|
|
|
var result []string
|
|
for j := 0; j < count; j++ {
|
|
result = append(result, i.affirmation.Spin())
|
|
}
|
|
|
|
return result
|
|
}
|
|
|
|
func (i *Impl) makeSpins() []string {
|
|
count := rand.IntN(5) + 1
|
|
|
|
var result []string
|
|
for j := 0; j < count; j++ {
|
|
result = append(result, i.body.Spin())
|
|
}
|
|
|
|
return result
|
|
}
|
|
|
|
func (i *Impl) makeTitle() string {
|
|
return i.title.Spin()
|
|
}
|
|
|
|
func (i *Impl) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
|
t0 := time.Now()
|
|
lg := internal.GetRequestLogger(i.lg, r)
|
|
|
|
id := r.PathValue("id")
|
|
if id == "" {
|
|
id = uuid.NewString()
|
|
}
|
|
|
|
realIP, _ := internal.RealIP(r)
|
|
if !realIP.IsValid() {
|
|
lg.Error("the real IP is somehow invalid, bad middleware stack?")
|
|
http.Error(w, "The cake is a lie", http.StatusTeapot)
|
|
return
|
|
}
|
|
|
|
network, ok := internal.ClampIP(realIP)
|
|
if !ok {
|
|
lg.Error("clampIP failed", "output", network, "ok", ok)
|
|
http.Error(w, "The cake is a lie", http.StatusTeapot)
|
|
return
|
|
}
|
|
|
|
networkCount := i.incrementNetwork(r.Context(), network.String())
|
|
uaCount := i.incrementUA(r.Context(), r.UserAgent())
|
|
|
|
stage := r.PathValue("stage")
|
|
|
|
if stage == "init" {
|
|
lg.Debug("found new entrance point", "id", id, "stage", stage, "userAgent", r.UserAgent(), "clampedIP", network)
|
|
} else {
|
|
switch {
|
|
case networkCount%256 == 0, uaCount%256 == 0:
|
|
lg.Warn("found possible crawler", "id", id, "network", network)
|
|
}
|
|
}
|
|
|
|
spins := i.makeSpins()
|
|
affirmations := i.makeAffirmations()
|
|
title := i.makeTitle()
|
|
|
|
var links []link
|
|
for _, affirmation := range affirmations {
|
|
links = append(links, link{
|
|
href: uuid.NewString(),
|
|
body: affirmation,
|
|
})
|
|
}
|
|
|
|
templ.Handler(
|
|
base(title, i.maze(spins, links)),
|
|
templ.WithStreaming(),
|
|
templ.WithStatus(http.StatusOK),
|
|
).ServeHTTP(w, r)
|
|
|
|
t1 := time.Since(t0)
|
|
honeypot.Timings.WithLabelValues("naive").Observe(float64(t1.Milliseconds()))
|
|
}
|
|
|
|
type link struct {
|
|
href string
|
|
body string
|
|
}
|