feat: first implementation of honeypot logic (#1342)
* feat: first implementation of honeypot logic This is a bit of an experiment, stick with me. The core idea here is that badly written crawlers are that: badly written. They look for anything that contains `<a href="whatever" />` tags and will blindly use those values to recurse. This takes advantage of that by hiding a link in a `<script>` tag like this: ```html <script type="ignore"><a href="/bots-only">Don't click</a></script> ``` Browsers will ignore it because they have no handler for the "ignore" script type. This current draft is very unoptimized (it takes like 7 seconds to generate a page on my tower), however switching spintax libraries will make this much faster. The hope is to make this pluggable with WebAssembly such that we force administrators to choose a storage method. First we crawl before we walk. The AI involvement in this commit is limited to the spintax in affirmations.txt, spintext.txt, and titles.txt. This generates a bunch of "pseudoprofound bullshit" like the following: > This Restoration to Balance & Alignment > > There's a moment when creators are being called to realize that the work > can't be reduced to results, but about energy. We don't innovate products > by pushing harder, we do it by holding the vision. Because momentum can't > be forced, it unfolds over time when culture are moving in the same > direction. We're being invited into a paradigm shift in how we think > about innovation. [...] This is intended to "look" like normal article text. As this is a first draft, this sucks and will be improved upon. Assisted-by: GLM 4.6, ChatGPT, GPT-OSS 120b Signed-off-by: Xe Iaso <me@xeiaso.net> * fix(honeypot/naive): optimize hilariously Signed-off-by: Xe Iaso <me@xeiaso.net> * feat(honeypot/naive): attempt to automatically filter out based on crawling Signed-off-by: Xe Iaso <me@xeiaso.net> * fix(lib): use mazeGen instead of bsGen Signed-off-by: Xe Iaso <me@xeiaso.net> * docs: add honeypot docs Signed-off-by: Xe Iaso <me@xeiaso.net> * chore(test): go mod tidy Signed-off-by: Xe Iaso <me@xeiaso.net> * chore: fix spelling metadata Signed-off-by: Xe Iaso <me@xeiaso.net> * chore: spelling Signed-off-by: Xe Iaso <me@xeiaso.net> --------- Signed-off-by: Xe Iaso <me@xeiaso.net>
This commit is contained in:
parent
cb91145352
commit
122e4bc072
25 changed files with 968 additions and 84 deletions
206
internal/honeypot/naive/naive.go
Normal file
206
internal/honeypot/naive/naive.go
Normal file
|
|
@ -0,0 +1,206 @@
|
|||
package naive
|
||||
|
||||
import (
|
||||
"context"
|
||||
_ "embed"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"math/rand/v2"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/TecharoHQ/anubis/internal"
|
||||
"github.com/TecharoHQ/anubis/internal/honeypot"
|
||||
"github.com/TecharoHQ/anubis/lib/policy/checker"
|
||||
"github.com/TecharoHQ/anubis/lib/store"
|
||||
"github.com/a-h/templ"
|
||||
"github.com/google/uuid"
|
||||
"github.com/nikandfor/spintax"
|
||||
)
|
||||
|
||||
//go:generate go tool github.com/a-h/templ/cmd/templ generate
|
||||
|
||||
// XXX(Xe): All of this was generated by ChatGPT, GLM 4.6, and GPT-OSS 120b. This is pseudoprofound bullshit in spintax[1] format so that the bullshit generator can emit plausibly human-authored text while being very computationally cheap.
|
||||
//
|
||||
// It feels somewhat poetic to use spammer technology in Anubis.
|
||||
//
|
||||
// [1]: https://outboundly.ai/blogs/what-is-spintax-and-how-to-use-it/
|
||||
//
|
||||
//go:embed spintext.txt
|
||||
var spintext string
|
||||
|
||||
//go:embed titles.txt
|
||||
var titles string
|
||||
|
||||
//go:embed affirmations.txt
|
||||
var affirmations string
|
||||
|
||||
func New(st store.Interface, lg *slog.Logger) (*Impl, error) {
|
||||
affirmation, err := spintax.Parse(affirmations)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("can't parse affirmations: %w", err)
|
||||
}
|
||||
|
||||
body, err := spintax.Parse(spintext)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("can't parse bodies: %w", err)
|
||||
}
|
||||
|
||||
title, err := spintax.Parse(titles)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("can't parse titles: %w", err)
|
||||
}
|
||||
|
||||
lg.Debug("initialized basic bullshit generator", "affirmations", affirmation.Count(), "bodies", body.Count(), "titles", title.Count())
|
||||
|
||||
return &Impl{
|
||||
st: st,
|
||||
infos: store.JSON[honeypot.Info]{Underlying: st, Prefix: "honeypot:info"},
|
||||
uaWeight: store.JSON[int]{Underlying: st, Prefix: "honeypot:user-agent"},
|
||||
networkWeight: store.JSON[int]{Underlying: st, Prefix: "honeypot:network"},
|
||||
affirmation: affirmation,
|
||||
body: body,
|
||||
title: title,
|
||||
lg: lg.With("component", "honeypot/naive"),
|
||||
}, nil
|
||||
}
|
||||
|
||||
type Impl struct {
|
||||
st store.Interface
|
||||
infos store.JSON[honeypot.Info]
|
||||
uaWeight store.JSON[int]
|
||||
networkWeight store.JSON[int]
|
||||
lg *slog.Logger
|
||||
|
||||
affirmation, body, title spintax.Spintax
|
||||
}
|
||||
|
||||
func (i *Impl) incrementUA(ctx context.Context, userAgent string) int {
|
||||
result, _ := i.uaWeight.Get(ctx, internal.SHA256sum(userAgent))
|
||||
result++
|
||||
i.uaWeight.Set(ctx, internal.SHA256sum(userAgent), result, time.Hour)
|
||||
return result
|
||||
}
|
||||
|
||||
func (i *Impl) incrementNetwork(ctx context.Context, network string) int {
|
||||
result, _ := i.networkWeight.Get(ctx, internal.SHA256sum(network))
|
||||
result++
|
||||
i.networkWeight.Set(ctx, internal.SHA256sum(network), result, time.Hour)
|
||||
return result
|
||||
}
|
||||
|
||||
func (i *Impl) CheckUA() checker.Impl {
|
||||
return checker.Func(func(r *http.Request) (bool, error) {
|
||||
result, _ := i.uaWeight.Get(r.Context(), internal.SHA256sum(r.UserAgent()))
|
||||
if result >= 25 {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
return false, nil
|
||||
})
|
||||
}
|
||||
|
||||
func (i *Impl) CheckNetwork() checker.Impl {
|
||||
return checker.Func(func(r *http.Request) (bool, error) {
|
||||
result, _ := i.uaWeight.Get(r.Context(), internal.SHA256sum(r.UserAgent()))
|
||||
if result >= 25 {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
return false, nil
|
||||
})
|
||||
}
|
||||
|
||||
func (i *Impl) Hash() string {
|
||||
return internal.SHA256sum("naive honeypot")
|
||||
}
|
||||
|
||||
func (i *Impl) makeAffirmations() []string {
|
||||
count := rand.IntN(5) + 1
|
||||
|
||||
var result []string
|
||||
for j := 0; j < count; j++ {
|
||||
result = append(result, i.affirmation.Spin())
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (i *Impl) makeSpins() []string {
|
||||
count := rand.IntN(5) + 1
|
||||
|
||||
var result []string
|
||||
for j := 0; j < count; j++ {
|
||||
result = append(result, i.body.Spin())
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (i *Impl) makeTitle() string {
|
||||
return i.title.Spin()
|
||||
}
|
||||
|
||||
func (i *Impl) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||
t0 := time.Now()
|
||||
lg := internal.GetRequestLogger(i.lg, r)
|
||||
|
||||
id := r.PathValue("id")
|
||||
if id == "" {
|
||||
id = uuid.NewString()
|
||||
}
|
||||
|
||||
realIP, _ := internal.RealIP(r)
|
||||
if !realIP.IsValid() {
|
||||
lg.Error("the real IP is somehow invalid, bad middleware stack?")
|
||||
http.Error(w, "The cake is a lie", http.StatusTeapot)
|
||||
return
|
||||
}
|
||||
|
||||
network, ok := internal.ClampIP(realIP)
|
||||
if !ok {
|
||||
lg.Error("clampIP failed", "output", network, "ok", ok)
|
||||
http.Error(w, "The cake is a lie", http.StatusTeapot)
|
||||
return
|
||||
}
|
||||
|
||||
networkCount := i.incrementNetwork(r.Context(), network.String())
|
||||
uaCount := i.incrementUA(r.Context(), r.UserAgent())
|
||||
|
||||
stage := r.PathValue("stage")
|
||||
|
||||
if stage == "init" {
|
||||
lg.Debug("found new entrance point", "id", id, "stage", stage, "userAgent", r.UserAgent(), "clampedIP", network)
|
||||
} else {
|
||||
switch {
|
||||
case networkCount%256 == 0, uaCount%256 == 0:
|
||||
lg.Warn("found possible crawler", "id", id, "network", network)
|
||||
}
|
||||
}
|
||||
|
||||
spins := i.makeSpins()
|
||||
affirmations := i.makeAffirmations()
|
||||
title := i.makeTitle()
|
||||
|
||||
var links []link
|
||||
for _, affirmation := range affirmations {
|
||||
links = append(links, link{
|
||||
href: uuid.NewString(),
|
||||
body: affirmation,
|
||||
})
|
||||
}
|
||||
|
||||
templ.Handler(
|
||||
base(title, i.maze(spins, links)),
|
||||
templ.WithStreaming(),
|
||||
templ.WithStatus(http.StatusOK),
|
||||
).ServeHTTP(w, r)
|
||||
|
||||
t1 := time.Since(t0)
|
||||
honeypot.Timings.WithLabelValues("naive").Observe(float64(t1.Milliseconds()))
|
||||
}
|
||||
|
||||
type link struct {
|
||||
href string
|
||||
body string
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue