feat: add default OpenGraph tags to configuration file (#694)
* feat(config): opengraph passthrough configuration Signed-off-by: Xe Iaso <me@xeiaso.net> * chore(ogtags): use config.OpenGraph for configuration Signed-off-by: Xe Iaso <me@xeiaso.net> * chore: wire up ogtags config in most of the app Signed-off-by: Xe Iaso <me@xeiaso.net> * feat(ogtags): return default tags if they are supplied Signed-off-by: Xe Iaso <me@xeiaso.net> * chore: make OpenGraph legal so we have some sanity in reviewing Signed-off-by: Xe Iaso <me@xeiaso.net> * chore: spelling Signed-off-by: Xe Iaso <me@xeiaso.net> * fix(lib): use OpenGraph.Enabled Signed-off-by: Xe Iaso <me@xeiaso.net> * test(lib): load default config file if one is not specified in spawnAnubis Signed-off-by: Xe Iaso <me@xeiaso.net> * chore(config): fix ST1005 Signed-off-by: Xe Iaso <me@xeiaso.net> * docs: document open graph defaults and its new home in the policy file Signed-off-by: Xe Iaso <me@xeiaso.net> * docs(installation): point to weight threshold new home Signed-off-by: Xe Iaso <me@xeiaso.net> * chore: rename default to override Signed-off-by: Xe Iaso <me@xeiaso.net> * chore(default-config): add off-by-default opengraph settings to bot policy file Signed-off-by: Xe Iaso <me@xeiaso.net> * fix(anubis): make build Signed-off-by: Xe Iaso <me@xeiaso.net> * test(lib): fix build Signed-off-by: Xe Iaso <me@xeiaso.net> --------- Signed-off-by: Xe Iaso <me@xeiaso.net>
This commit is contained in:
parent
7aa732c700
commit
4948036f39
25 changed files with 416 additions and 78 deletions
|
|
@ -13,6 +13,10 @@ func (c *OGTagCache) GetOGTags(url *url.URL, originalHost string) (map[string]st
|
|||
return nil, errors.New("nil URL provided, cannot fetch OG tags")
|
||||
}
|
||||
|
||||
if len(c.ogOverride) != 0 {
|
||||
return c.ogOverride, nil
|
||||
}
|
||||
|
||||
target := c.getTarget(url)
|
||||
cacheKey := c.generateCacheKey(target, originalHost)
|
||||
|
||||
|
|
|
|||
|
|
@ -7,10 +7,49 @@ import (
|
|||
"reflect"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||
)
|
||||
|
||||
func TestCacheReturnsDefault(t *testing.T) {
|
||||
want := map[string]string{
|
||||
"og:title": "Foo bar",
|
||||
"og:description": "The best website ever made!!!1!",
|
||||
}
|
||||
cache := NewOGTagCache("", config.OpenGraph{
|
||||
Enabled: true,
|
||||
TimeToLive: time.Minute,
|
||||
ConsiderHost: false,
|
||||
Override: want,
|
||||
})
|
||||
|
||||
u, err := url.Parse("https://anubis.techaro.lol")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
result, err := cache.GetOGTags(u, "anubis.techaro.lol")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
for k, v := range want {
|
||||
t.Run(k, func(t *testing.T) {
|
||||
if got := result[k]; got != v {
|
||||
t.Logf("want: tags[%q] = %q", k, v)
|
||||
t.Logf("got: tags[%q] = %q", k, got)
|
||||
t.Error("invalid result from function")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCheckCache(t *testing.T) {
|
||||
cache := NewOGTagCache("http://example.com", true, time.Minute, false)
|
||||
cache := NewOGTagCache("http://example.com", config.OpenGraph{
|
||||
Enabled: true,
|
||||
TimeToLive: time.Minute,
|
||||
ConsiderHost: false,
|
||||
})
|
||||
|
||||
// Set up test data
|
||||
urlStr := "http://example.com/page"
|
||||
|
|
@ -69,7 +108,11 @@ func TestGetOGTags(t *testing.T) {
|
|||
defer ts.Close()
|
||||
|
||||
// Create an instance of OGTagCache with a short TTL for testing
|
||||
cache := NewOGTagCache(ts.URL, true, 1*time.Minute, false)
|
||||
cache := NewOGTagCache(ts.URL, config.OpenGraph{
|
||||
Enabled: true,
|
||||
TimeToLive: time.Minute,
|
||||
ConsiderHost: false,
|
||||
})
|
||||
|
||||
// Parse the test server URL
|
||||
parsedURL, err := url.Parse(ts.URL)
|
||||
|
|
@ -216,7 +259,11 @@ func TestGetOGTagsWithHostConsideration(t *testing.T) {
|
|||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
loadCount = 0 // Reset load count for each test case
|
||||
cache := NewOGTagCache(ts.URL, true, 1*time.Minute, tc.ogCacheConsiderHost)
|
||||
cache := NewOGTagCache(ts.URL, config.OpenGraph{
|
||||
Enabled: true,
|
||||
TimeToLive: time.Minute,
|
||||
ConsiderHost: tc.ogCacheConsiderHost,
|
||||
})
|
||||
|
||||
for i, req := range tc.requests {
|
||||
ogTags, err := cache.GetOGTags(parsedURL, req.host)
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ import (
|
|||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
|
|
@ -80,7 +81,11 @@ func TestFetchHTMLDocument(t *testing.T) {
|
|||
}))
|
||||
defer ts.Close()
|
||||
|
||||
cache := NewOGTagCache("", true, time.Minute, false)
|
||||
cache := NewOGTagCache("", config.OpenGraph{
|
||||
Enabled: true,
|
||||
TimeToLive: time.Minute,
|
||||
ConsiderHost: false,
|
||||
})
|
||||
doc, err := cache.fetchHTMLDocument(ts.URL, "anything")
|
||||
|
||||
if tt.expectError {
|
||||
|
|
@ -107,7 +112,11 @@ func TestFetchHTMLDocumentInvalidURL(t *testing.T) {
|
|||
t.Skip("test requires theoretical network egress")
|
||||
}
|
||||
|
||||
cache := NewOGTagCache("", true, time.Minute, false)
|
||||
cache := NewOGTagCache("", config.OpenGraph{
|
||||
Enabled: true,
|
||||
TimeToLive: time.Minute,
|
||||
ConsiderHost: false,
|
||||
})
|
||||
|
||||
doc, err := cache.fetchHTMLDocument("http://invalid.url.that.doesnt.exist.example", "anything")
|
||||
|
||||
|
|
|
|||
|
|
@ -6,6 +6,8 @@ import (
|
|||
"net/url"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||
)
|
||||
|
||||
func TestIntegrationGetOGTags(t *testing.T) {
|
||||
|
|
@ -104,7 +106,11 @@ func TestIntegrationGetOGTags(t *testing.T) {
|
|||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
// Create cache instance
|
||||
cache := NewOGTagCache(ts.URL, true, 1*time.Minute, false)
|
||||
cache := NewOGTagCache(ts.URL, config.OpenGraph{
|
||||
Enabled: true,
|
||||
TimeToLive: time.Minute,
|
||||
ConsiderHost: false,
|
||||
})
|
||||
|
||||
// Create URL for test
|
||||
testURL, _ := url.Parse(ts.URL)
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import (
|
|||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
|
|
@ -29,7 +30,7 @@ func BenchmarkGetTarget(b *testing.B) {
|
|||
|
||||
for _, tt := range tests {
|
||||
b.Run(tt.name, func(b *testing.B) {
|
||||
cache := NewOGTagCache(tt.target, false, 0, false)
|
||||
cache := NewOGTagCache(tt.target, config.OpenGraph{})
|
||||
urls := make([]*url.URL, len(tt.paths))
|
||||
for i, path := range tt.paths {
|
||||
u, _ := url.Parse(path)
|
||||
|
|
@ -65,7 +66,7 @@ func BenchmarkExtractOGTags(b *testing.B) {
|
|||
</head><body><div><p>Content</p></div></body></html>`,
|
||||
}
|
||||
|
||||
cache := NewOGTagCache("http://example.com", false, 0, false)
|
||||
cache := NewOGTagCache("http://example.com", config.OpenGraph{})
|
||||
docs := make([]*html.Node, len(htmlSamples))
|
||||
|
||||
for i, sample := range htmlSamples {
|
||||
|
|
@ -83,7 +84,7 @@ func BenchmarkExtractOGTags(b *testing.B) {
|
|||
|
||||
// Memory usage test
|
||||
func TestMemoryUsage(t *testing.T) {
|
||||
cache := NewOGTagCache("http://example.com", false, 0, false)
|
||||
cache := NewOGTagCache("http://example.com", config.OpenGraph{})
|
||||
|
||||
// Force GC and wait for it to complete
|
||||
runtime.GC()
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/TecharoHQ/anubis/decaymap"
|
||||
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||
)
|
||||
|
||||
const (
|
||||
|
|
@ -32,9 +33,10 @@ type OGTagCache struct {
|
|||
ogTimeToLive time.Duration
|
||||
ogCacheConsiderHost bool
|
||||
ogPassthrough bool
|
||||
ogOverride map[string]string
|
||||
}
|
||||
|
||||
func NewOGTagCache(target string, ogPassthrough bool, ogTimeToLive time.Duration, ogTagsConsiderHost bool) *OGTagCache {
|
||||
func NewOGTagCache(target string, conf config.OpenGraph) *OGTagCache {
|
||||
// Predefined approved tags and prefixes
|
||||
defaultApprovedTags := []string{"description", "keywords", "author"}
|
||||
defaultApprovedPrefixes := []string{"og:", "twitter:", "fediverse:"}
|
||||
|
|
@ -77,9 +79,10 @@ func NewOGTagCache(target string, ogPassthrough bool, ogTimeToLive time.Duration
|
|||
return &OGTagCache{
|
||||
cache: decaymap.New[string, map[string]string](),
|
||||
targetURL: parsedTargetURL,
|
||||
ogPassthrough: ogPassthrough,
|
||||
ogTimeToLive: ogTimeToLive,
|
||||
ogCacheConsiderHost: ogTagsConsiderHost,
|
||||
ogPassthrough: conf.Enabled,
|
||||
ogTimeToLive: conf.TimeToLive,
|
||||
ogCacheConsiderHost: conf.ConsiderHost,
|
||||
ogOverride: conf.Override,
|
||||
approvedTags: defaultApprovedTags,
|
||||
approvedPrefixes: defaultApprovedPrefixes,
|
||||
client: client,
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import (
|
|||
"testing"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
|
|
@ -45,7 +46,7 @@ func FuzzGetTarget(f *testing.F) {
|
|||
}
|
||||
|
||||
// Create cache - should not panic
|
||||
cache := NewOGTagCache(target, false, 0, false)
|
||||
cache := NewOGTagCache(target, config.OpenGraph{})
|
||||
|
||||
// Create URL
|
||||
u := &url.URL{
|
||||
|
|
@ -129,7 +130,7 @@ func FuzzExtractOGTags(f *testing.F) {
|
|||
return
|
||||
}
|
||||
|
||||
cache := NewOGTagCache("http://example.com", false, 0, false)
|
||||
cache := NewOGTagCache("http://example.com", config.OpenGraph{})
|
||||
|
||||
// Should not panic
|
||||
tags := cache.extractOGTags(doc)
|
||||
|
|
@ -185,7 +186,7 @@ func FuzzGetTargetRoundTrip(f *testing.F) {
|
|||
t.Skip()
|
||||
}
|
||||
|
||||
cache := NewOGTagCache(target, false, 0, false)
|
||||
cache := NewOGTagCache(target, config.OpenGraph{})
|
||||
u := &url.URL{Path: path, RawQuery: query}
|
||||
|
||||
result := cache.getTarget(u)
|
||||
|
|
@ -242,7 +243,7 @@ func FuzzExtractMetaTagInfo(f *testing.F) {
|
|||
},
|
||||
}
|
||||
|
||||
cache := NewOGTagCache("http://example.com", false, 0, false)
|
||||
cache := NewOGTagCache("http://example.com", config.OpenGraph{})
|
||||
|
||||
// Should not panic
|
||||
property, content := cache.extractMetaTagInfo(node)
|
||||
|
|
@ -295,7 +296,7 @@ func BenchmarkFuzzedGetTarget(b *testing.B) {
|
|||
|
||||
for _, input := range inputs {
|
||||
b.Run(input.name, func(b *testing.B) {
|
||||
cache := NewOGTagCache(input.target, false, 0, false)
|
||||
cache := NewOGTagCache(input.target, config.OpenGraph{})
|
||||
u := &url.URL{Path: input.path, RawQuery: input.query}
|
||||
|
||||
b.ResetTimer()
|
||||
|
|
|
|||
|
|
@ -13,6 +13,8 @@ import (
|
|||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||
)
|
||||
|
||||
func TestNewOGTagCache(t *testing.T) {
|
||||
|
|
@ -38,7 +40,11 @@ func TestNewOGTagCache(t *testing.T) {
|
|||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
cache := NewOGTagCache(tt.target, tt.ogPassthrough, tt.ogTimeToLive, false)
|
||||
cache := NewOGTagCache(tt.target, config.OpenGraph{
|
||||
Enabled: tt.ogPassthrough,
|
||||
TimeToLive: tt.ogTimeToLive,
|
||||
ConsiderHost: false,
|
||||
})
|
||||
|
||||
if cache == nil {
|
||||
t.Fatal("expected non-nil cache, got nil")
|
||||
|
|
@ -74,7 +80,11 @@ func TestNewOGTagCache_UnixSocket(t *testing.T) {
|
|||
socketPath := filepath.Join(tempDir, "test.sock")
|
||||
target := "unix://" + socketPath
|
||||
|
||||
cache := NewOGTagCache(target, true, 5*time.Minute, false)
|
||||
cache := NewOGTagCache(target, config.OpenGraph{
|
||||
Enabled: true,
|
||||
TimeToLive: 5 * time.Minute,
|
||||
ConsiderHost: false,
|
||||
})
|
||||
|
||||
if cache == nil {
|
||||
t.Fatal("expected non-nil cache, got nil")
|
||||
|
|
@ -155,7 +165,11 @@ func TestGetTarget(t *testing.T) {
|
|||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
cache := NewOGTagCache(tt.target, false, time.Minute, false)
|
||||
cache := NewOGTagCache(tt.target, config.OpenGraph{
|
||||
Enabled: true,
|
||||
TimeToLive: time.Minute,
|
||||
ConsiderHost: false,
|
||||
})
|
||||
|
||||
u := &url.URL{
|
||||
Path: tt.path,
|
||||
|
|
@ -175,7 +189,9 @@ func TestGetTarget(t *testing.T) {
|
|||
func TestIntegrationGetOGTags_UnixSocket(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
|
||||
socketPath := filepath.Join(tempDir, "anubis-test.sock")
|
||||
// XXX(Xe): if this is named longer, macOS fails with `bind: invalid argument`
|
||||
// because the unix socket path is too long. I love computers.
|
||||
socketPath := filepath.Join(tempDir, "t")
|
||||
|
||||
// Ensure the socket does not exist initially
|
||||
_ = os.Remove(socketPath)
|
||||
|
|
@ -222,7 +238,11 @@ func TestIntegrationGetOGTags_UnixSocket(t *testing.T) {
|
|||
|
||||
// Create cache instance pointing to the Unix socket
|
||||
targetURL := "unix://" + socketPath
|
||||
cache := NewOGTagCache(targetURL, true, 1*time.Minute, false)
|
||||
cache := NewOGTagCache(targetURL, config.OpenGraph{
|
||||
Enabled: true,
|
||||
TimeToLive: time.Minute,
|
||||
ConsiderHost: false,
|
||||
})
|
||||
|
||||
// Create a dummy URL for the request (path and query matter)
|
||||
testReqURL, _ := url.Parse("/some/page?query=1")
|
||||
|
|
|
|||
|
|
@ -6,13 +6,18 @@ import (
|
|||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
// TestExtractOGTags updated with correct expectations based on filtering logic
|
||||
func TestExtractOGTags(t *testing.T) {
|
||||
// Use a cache instance that reflects the default approved lists
|
||||
testCache := NewOGTagCache("", false, time.Minute, false)
|
||||
testCache := NewOGTagCache("", config.OpenGraph{
|
||||
Enabled: false,
|
||||
ConsiderHost: false,
|
||||
TimeToLive: time.Minute,
|
||||
})
|
||||
// Manually set approved tags/prefixes based on the user request for clarity
|
||||
testCache.approvedTags = []string{"description"}
|
||||
testCache.approvedPrefixes = []string{"og:"}
|
||||
|
|
@ -189,7 +194,11 @@ func TestIsOGMetaTag(t *testing.T) {
|
|||
|
||||
func TestExtractMetaTagInfo(t *testing.T) {
|
||||
// Use a cache instance that reflects the default approved lists
|
||||
testCache := NewOGTagCache("", false, time.Minute, false)
|
||||
testCache := NewOGTagCache("", config.OpenGraph{
|
||||
Enabled: false,
|
||||
ConsiderHost: false,
|
||||
TimeToLive: time.Minute,
|
||||
})
|
||||
testCache.approvedTags = []string{"description"}
|
||||
testCache.approvedPrefixes = []string{"og:"}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue