feat: add default OpenGraph tags to configuration file (#694)

* feat(config): opengraph passthrough configuration

Signed-off-by: Xe Iaso <me@xeiaso.net>

* chore(ogtags): use config.OpenGraph for configuration

Signed-off-by: Xe Iaso <me@xeiaso.net>

* chore: wire up ogtags config in most of the app

Signed-off-by: Xe Iaso <me@xeiaso.net>

* feat(ogtags): return default tags if they are supplied

Signed-off-by: Xe Iaso <me@xeiaso.net>

* chore: make OpenGraph legal so we have some sanity in reviewing

Signed-off-by: Xe Iaso <me@xeiaso.net>

* chore: spelling

Signed-off-by: Xe Iaso <me@xeiaso.net>

* fix(lib): use OpenGraph.Enabled

Signed-off-by: Xe Iaso <me@xeiaso.net>

* test(lib): load default config file if one is not specified in spawnAnubis

Signed-off-by: Xe Iaso <me@xeiaso.net>

* chore(config): fix ST1005

Signed-off-by: Xe Iaso <me@xeiaso.net>

* docs: document open graph defaults and its new home in the policy file

Signed-off-by: Xe Iaso <me@xeiaso.net>

* docs(installation): point to weight threshold new home

Signed-off-by: Xe Iaso <me@xeiaso.net>

* chore: rename default to override

Signed-off-by: Xe Iaso <me@xeiaso.net>

* chore(default-config): add off-by-default opengraph settings to bot policy file

Signed-off-by: Xe Iaso <me@xeiaso.net>

* fix(anubis): make build

Signed-off-by: Xe Iaso <me@xeiaso.net>

* test(lib): fix build

Signed-off-by: Xe Iaso <me@xeiaso.net>

---------

Signed-off-by: Xe Iaso <me@xeiaso.net>
This commit is contained in:
Xe Iaso 2025-06-19 18:00:44 -04:00 committed by GitHub
parent 7aa732c700
commit 4948036f39
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
25 changed files with 416 additions and 78 deletions

View file

@ -13,6 +13,10 @@ func (c *OGTagCache) GetOGTags(url *url.URL, originalHost string) (map[string]st
return nil, errors.New("nil URL provided, cannot fetch OG tags")
}
if len(c.ogOverride) != 0 {
return c.ogOverride, nil
}
target := c.getTarget(url)
cacheKey := c.generateCacheKey(target, originalHost)

View file

@ -7,10 +7,49 @@ import (
"reflect"
"testing"
"time"
"github.com/TecharoHQ/anubis/lib/policy/config"
)
func TestCacheReturnsDefault(t *testing.T) {
want := map[string]string{
"og:title": "Foo bar",
"og:description": "The best website ever made!!!1!",
}
cache := NewOGTagCache("", config.OpenGraph{
Enabled: true,
TimeToLive: time.Minute,
ConsiderHost: false,
Override: want,
})
u, err := url.Parse("https://anubis.techaro.lol")
if err != nil {
t.Fatal(err)
}
result, err := cache.GetOGTags(u, "anubis.techaro.lol")
if err != nil {
t.Fatal(err)
}
for k, v := range want {
t.Run(k, func(t *testing.T) {
if got := result[k]; got != v {
t.Logf("want: tags[%q] = %q", k, v)
t.Logf("got: tags[%q] = %q", k, got)
t.Error("invalid result from function")
}
})
}
}
func TestCheckCache(t *testing.T) {
cache := NewOGTagCache("http://example.com", true, time.Minute, false)
cache := NewOGTagCache("http://example.com", config.OpenGraph{
Enabled: true,
TimeToLive: time.Minute,
ConsiderHost: false,
})
// Set up test data
urlStr := "http://example.com/page"
@ -69,7 +108,11 @@ func TestGetOGTags(t *testing.T) {
defer ts.Close()
// Create an instance of OGTagCache with a short TTL for testing
cache := NewOGTagCache(ts.URL, true, 1*time.Minute, false)
cache := NewOGTagCache(ts.URL, config.OpenGraph{
Enabled: true,
TimeToLive: time.Minute,
ConsiderHost: false,
})
// Parse the test server URL
parsedURL, err := url.Parse(ts.URL)
@ -216,7 +259,11 @@ func TestGetOGTagsWithHostConsideration(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
loadCount = 0 // Reset load count for each test case
cache := NewOGTagCache(ts.URL, true, 1*time.Minute, tc.ogCacheConsiderHost)
cache := NewOGTagCache(ts.URL, config.OpenGraph{
Enabled: true,
TimeToLive: time.Minute,
ConsiderHost: tc.ogCacheConsiderHost,
})
for i, req := range tc.requests {
ogTags, err := cache.GetOGTags(parsedURL, req.host)

View file

@ -10,6 +10,7 @@ import (
"testing"
"time"
"github.com/TecharoHQ/anubis/lib/policy/config"
"golang.org/x/net/html"
)
@ -80,7 +81,11 @@ func TestFetchHTMLDocument(t *testing.T) {
}))
defer ts.Close()
cache := NewOGTagCache("", true, time.Minute, false)
cache := NewOGTagCache("", config.OpenGraph{
Enabled: true,
TimeToLive: time.Minute,
ConsiderHost: false,
})
doc, err := cache.fetchHTMLDocument(ts.URL, "anything")
if tt.expectError {
@ -107,7 +112,11 @@ func TestFetchHTMLDocumentInvalidURL(t *testing.T) {
t.Skip("test requires theoretical network egress")
}
cache := NewOGTagCache("", true, time.Minute, false)
cache := NewOGTagCache("", config.OpenGraph{
Enabled: true,
TimeToLive: time.Minute,
ConsiderHost: false,
})
doc, err := cache.fetchHTMLDocument("http://invalid.url.that.doesnt.exist.example", "anything")

View file

@ -6,6 +6,8 @@ import (
"net/url"
"testing"
"time"
"github.com/TecharoHQ/anubis/lib/policy/config"
)
func TestIntegrationGetOGTags(t *testing.T) {
@ -104,7 +106,11 @@ func TestIntegrationGetOGTags(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
// Create cache instance
cache := NewOGTagCache(ts.URL, true, 1*time.Minute, false)
cache := NewOGTagCache(ts.URL, config.OpenGraph{
Enabled: true,
TimeToLive: time.Minute,
ConsiderHost: false,
})
// Create URL for test
testURL, _ := url.Parse(ts.URL)

View file

@ -6,6 +6,7 @@ import (
"strings"
"testing"
"github.com/TecharoHQ/anubis/lib/policy/config"
"golang.org/x/net/html"
)
@ -29,7 +30,7 @@ func BenchmarkGetTarget(b *testing.B) {
for _, tt := range tests {
b.Run(tt.name, func(b *testing.B) {
cache := NewOGTagCache(tt.target, false, 0, false)
cache := NewOGTagCache(tt.target, config.OpenGraph{})
urls := make([]*url.URL, len(tt.paths))
for i, path := range tt.paths {
u, _ := url.Parse(path)
@ -65,7 +66,7 @@ func BenchmarkExtractOGTags(b *testing.B) {
</head><body><div><p>Content</p></div></body></html>`,
}
cache := NewOGTagCache("http://example.com", false, 0, false)
cache := NewOGTagCache("http://example.com", config.OpenGraph{})
docs := make([]*html.Node, len(htmlSamples))
for i, sample := range htmlSamples {
@ -83,7 +84,7 @@ func BenchmarkExtractOGTags(b *testing.B) {
// Memory usage test
func TestMemoryUsage(t *testing.T) {
cache := NewOGTagCache("http://example.com", false, 0, false)
cache := NewOGTagCache("http://example.com", config.OpenGraph{})
// Force GC and wait for it to complete
runtime.GC()

View file

@ -10,6 +10,7 @@ import (
"time"
"github.com/TecharoHQ/anubis/decaymap"
"github.com/TecharoHQ/anubis/lib/policy/config"
)
const (
@ -32,9 +33,10 @@ type OGTagCache struct {
ogTimeToLive time.Duration
ogCacheConsiderHost bool
ogPassthrough bool
ogOverride map[string]string
}
func NewOGTagCache(target string, ogPassthrough bool, ogTimeToLive time.Duration, ogTagsConsiderHost bool) *OGTagCache {
func NewOGTagCache(target string, conf config.OpenGraph) *OGTagCache {
// Predefined approved tags and prefixes
defaultApprovedTags := []string{"description", "keywords", "author"}
defaultApprovedPrefixes := []string{"og:", "twitter:", "fediverse:"}
@ -77,9 +79,10 @@ func NewOGTagCache(target string, ogPassthrough bool, ogTimeToLive time.Duration
return &OGTagCache{
cache: decaymap.New[string, map[string]string](),
targetURL: parsedTargetURL,
ogPassthrough: ogPassthrough,
ogTimeToLive: ogTimeToLive,
ogCacheConsiderHost: ogTagsConsiderHost,
ogPassthrough: conf.Enabled,
ogTimeToLive: conf.TimeToLive,
ogCacheConsiderHost: conf.ConsiderHost,
ogOverride: conf.Override,
approvedTags: defaultApprovedTags,
approvedPrefixes: defaultApprovedPrefixes,
client: client,

View file

@ -6,6 +6,7 @@ import (
"testing"
"unicode/utf8"
"github.com/TecharoHQ/anubis/lib/policy/config"
"golang.org/x/net/html"
)
@ -45,7 +46,7 @@ func FuzzGetTarget(f *testing.F) {
}
// Create cache - should not panic
cache := NewOGTagCache(target, false, 0, false)
cache := NewOGTagCache(target, config.OpenGraph{})
// Create URL
u := &url.URL{
@ -129,7 +130,7 @@ func FuzzExtractOGTags(f *testing.F) {
return
}
cache := NewOGTagCache("http://example.com", false, 0, false)
cache := NewOGTagCache("http://example.com", config.OpenGraph{})
// Should not panic
tags := cache.extractOGTags(doc)
@ -185,7 +186,7 @@ func FuzzGetTargetRoundTrip(f *testing.F) {
t.Skip()
}
cache := NewOGTagCache(target, false, 0, false)
cache := NewOGTagCache(target, config.OpenGraph{})
u := &url.URL{Path: path, RawQuery: query}
result := cache.getTarget(u)
@ -242,7 +243,7 @@ func FuzzExtractMetaTagInfo(f *testing.F) {
},
}
cache := NewOGTagCache("http://example.com", false, 0, false)
cache := NewOGTagCache("http://example.com", config.OpenGraph{})
// Should not panic
property, content := cache.extractMetaTagInfo(node)
@ -295,7 +296,7 @@ func BenchmarkFuzzedGetTarget(b *testing.B) {
for _, input := range inputs {
b.Run(input.name, func(b *testing.B) {
cache := NewOGTagCache(input.target, false, 0, false)
cache := NewOGTagCache(input.target, config.OpenGraph{})
u := &url.URL{Path: input.path, RawQuery: input.query}
b.ResetTimer()

View file

@ -13,6 +13,8 @@ import (
"strings"
"testing"
"time"
"github.com/TecharoHQ/anubis/lib/policy/config"
)
func TestNewOGTagCache(t *testing.T) {
@ -38,7 +40,11 @@ func TestNewOGTagCache(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
cache := NewOGTagCache(tt.target, tt.ogPassthrough, tt.ogTimeToLive, false)
cache := NewOGTagCache(tt.target, config.OpenGraph{
Enabled: tt.ogPassthrough,
TimeToLive: tt.ogTimeToLive,
ConsiderHost: false,
})
if cache == nil {
t.Fatal("expected non-nil cache, got nil")
@ -74,7 +80,11 @@ func TestNewOGTagCache_UnixSocket(t *testing.T) {
socketPath := filepath.Join(tempDir, "test.sock")
target := "unix://" + socketPath
cache := NewOGTagCache(target, true, 5*time.Minute, false)
cache := NewOGTagCache(target, config.OpenGraph{
Enabled: true,
TimeToLive: 5 * time.Minute,
ConsiderHost: false,
})
if cache == nil {
t.Fatal("expected non-nil cache, got nil")
@ -155,7 +165,11 @@ func TestGetTarget(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
cache := NewOGTagCache(tt.target, false, time.Minute, false)
cache := NewOGTagCache(tt.target, config.OpenGraph{
Enabled: true,
TimeToLive: time.Minute,
ConsiderHost: false,
})
u := &url.URL{
Path: tt.path,
@ -175,7 +189,9 @@ func TestGetTarget(t *testing.T) {
func TestIntegrationGetOGTags_UnixSocket(t *testing.T) {
tempDir := t.TempDir()
socketPath := filepath.Join(tempDir, "anubis-test.sock")
// XXX(Xe): if this is named longer, macOS fails with `bind: invalid argument`
// because the unix socket path is too long. I love computers.
socketPath := filepath.Join(tempDir, "t")
// Ensure the socket does not exist initially
_ = os.Remove(socketPath)
@ -222,7 +238,11 @@ func TestIntegrationGetOGTags_UnixSocket(t *testing.T) {
// Create cache instance pointing to the Unix socket
targetURL := "unix://" + socketPath
cache := NewOGTagCache(targetURL, true, 1*time.Minute, false)
cache := NewOGTagCache(targetURL, config.OpenGraph{
Enabled: true,
TimeToLive: time.Minute,
ConsiderHost: false,
})
// Create a dummy URL for the request (path and query matter)
testReqURL, _ := url.Parse("/some/page?query=1")

View file

@ -6,13 +6,18 @@ import (
"testing"
"time"
"github.com/TecharoHQ/anubis/lib/policy/config"
"golang.org/x/net/html"
)
// TestExtractOGTags updated with correct expectations based on filtering logic
func TestExtractOGTags(t *testing.T) {
// Use a cache instance that reflects the default approved lists
testCache := NewOGTagCache("", false, time.Minute, false)
testCache := NewOGTagCache("", config.OpenGraph{
Enabled: false,
ConsiderHost: false,
TimeToLive: time.Minute,
})
// Manually set approved tags/prefixes based on the user request for clarity
testCache.approvedTags = []string{"description"}
testCache.approvedPrefixes = []string{"og:"}
@ -189,7 +194,11 @@ func TestIsOGMetaTag(t *testing.T) {
func TestExtractMetaTagInfo(t *testing.T) {
// Use a cache instance that reflects the default approved lists
testCache := NewOGTagCache("", false, time.Minute, false)
testCache := NewOGTagCache("", config.OpenGraph{
Enabled: false,
ConsiderHost: false,
TimeToLive: time.Minute,
})
testCache.approvedTags = []string{"description"}
testCache.approvedPrefixes = []string{"og:"}