fix: make ogtags and dnsbl use the Store instead of memory (#760)

Signed-off-by: Xe Iaso <me@xeiaso.net>
This commit is contained in:
Xe Iaso 2025-07-05 20:17:46 +00:00 committed by GitHub
parent e870ede120
commit 7d0c58d1a8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 134 additions and 86 deletions

View file

@ -1,6 +1,7 @@
package ogtags
import (
"context"
"errors"
"log/slog"
"net/url"
@ -8,7 +9,7 @@ import (
)
// GetOGTags is the main function that retrieves Open Graph tags for a URL
func (c *OGTagCache) GetOGTags(url *url.URL, originalHost string) (map[string]string, error) {
func (c *OGTagCache) GetOGTags(ctx context.Context, url *url.URL, originalHost string) (map[string]string, error) {
if url == nil {
return nil, errors.New("nil URL provided, cannot fetch OG tags")
}
@ -21,12 +22,12 @@ func (c *OGTagCache) GetOGTags(url *url.URL, originalHost string) (map[string]st
cacheKey := c.generateCacheKey(target, originalHost)
// Check cache first
if cachedTags := c.checkCache(cacheKey); cachedTags != nil {
if cachedTags := c.checkCache(ctx, cacheKey); cachedTags != nil {
return cachedTags, nil
}
// Fetch HTML content, passing the original host
doc, err := c.fetchHTMLDocumentWithCache(target, originalHost, cacheKey)
doc, err := c.fetchHTMLDocumentWithCache(ctx, target, originalHost, cacheKey)
if errors.Is(err, syscall.ECONNREFUSED) {
slog.Debug("Connection refused, returning empty tags")
return nil, nil
@ -42,7 +43,7 @@ func (c *OGTagCache) GetOGTags(url *url.URL, originalHost string) (map[string]st
ogTags := c.extractOGTags(doc)
// Store in cache
c.cache.Set(cacheKey, ogTags, c.ogTimeToLive)
c.cache.Set(ctx, cacheKey, ogTags, c.ogTimeToLive)
return ogTags, nil
}
@ -59,8 +60,8 @@ func (c *OGTagCache) generateCacheKey(target string, originalHost string) string
}
// checkCache checks if we have the tags cached and returns them if so
func (c *OGTagCache) checkCache(cacheKey string) map[string]string {
if cachedTags, ok := c.cache.Get(cacheKey); ok {
func (c *OGTagCache) checkCache(ctx context.Context, cacheKey string) map[string]string {
if cachedTags, err := c.cache.Get(ctx, cacheKey); err == nil {
slog.Debug("cache hit", "tags", cachedTags)
return cachedTags
}

View file

@ -9,6 +9,7 @@ import (
"time"
"github.com/TecharoHQ/anubis/lib/policy/config"
"github.com/TecharoHQ/anubis/lib/store/memory"
)
func TestCacheReturnsDefault(t *testing.T) {
@ -21,14 +22,14 @@ func TestCacheReturnsDefault(t *testing.T) {
TimeToLive: time.Minute,
ConsiderHost: false,
Override: want,
})
}, memory.New(t.Context()))
u, err := url.Parse("https://anubis.techaro.lol")
if err != nil {
t.Fatal(err)
}
result, err := cache.GetOGTags(u, "anubis.techaro.lol")
result, err := cache.GetOGTags(t.Context(), u, "anubis.techaro.lol")
if err != nil {
t.Fatal(err)
}
@ -49,7 +50,7 @@ func TestCheckCache(t *testing.T) {
Enabled: true,
TimeToLive: time.Minute,
ConsiderHost: false,
})
}, memory.New(t.Context()))
// Set up test data
urlStr := "http://example.com/page"
@ -60,16 +61,16 @@ func TestCheckCache(t *testing.T) {
cacheKey := cache.generateCacheKey(urlStr, "example.com")
// Test cache miss
tags := cache.checkCache(cacheKey)
tags := cache.checkCache(t.Context(), cacheKey)
if tags != nil {
t.Errorf("expected nil tags on cache miss, got %v", tags)
}
// Manually add to cache
cache.cache.Set(cacheKey, expectedTags, time.Minute)
cache.cache.Set(t.Context(), cacheKey, expectedTags, time.Minute)
// Test cache hit
tags = cache.checkCache(cacheKey)
tags = cache.checkCache(t.Context(), cacheKey)
if tags == nil {
t.Fatal("expected non-nil tags on cache hit, got nil")
}
@ -112,7 +113,7 @@ func TestGetOGTags(t *testing.T) {
Enabled: true,
TimeToLive: time.Minute,
ConsiderHost: false,
})
}, memory.New(t.Context()))
// Parse the test server URL
parsedURL, err := url.Parse(ts.URL)
@ -122,7 +123,7 @@ func TestGetOGTags(t *testing.T) {
// Test fetching OG tags from the test server
// Pass the host from the parsed test server URL
ogTags, err := cache.GetOGTags(parsedURL, parsedURL.Host)
ogTags, err := cache.GetOGTags(t.Context(), parsedURL, parsedURL.Host)
if err != nil {
t.Fatalf("failed to get OG tags: %v", err)
}
@ -142,14 +143,14 @@ func TestGetOGTags(t *testing.T) {
// Test fetching OG tags from the cache
// Pass the host from the parsed test server URL
ogTags, err = cache.GetOGTags(parsedURL, parsedURL.Host)
ogTags, err = cache.GetOGTags(t.Context(), parsedURL, parsedURL.Host)
if err != nil {
t.Fatalf("failed to get OG tags from cache: %v", err)
}
// Test fetching OG tags from the cache (3rd time)
// Pass the host from the parsed test server URL
newOgTags, err := cache.GetOGTags(parsedURL, parsedURL.Host)
newOgTags, err := cache.GetOGTags(t.Context(), parsedURL, parsedURL.Host)
if err != nil {
t.Fatalf("failed to get OG tags from cache: %v", err)
}
@ -263,10 +264,10 @@ func TestGetOGTagsWithHostConsideration(t *testing.T) {
Enabled: true,
TimeToLive: time.Minute,
ConsiderHost: tc.ogCacheConsiderHost,
})
}, memory.New(t.Context()))
for i, req := range tc.requests {
ogTags, err := cache.GetOGTags(parsedURL, req.host)
ogTags, err := cache.GetOGTags(t.Context(), parsedURL, req.host)
if err != nil {
t.Errorf("Request %d (host: %s): unexpected error: %v", i+1, req.host, err)
continue // Skip further checks for this request if error occurred

View file

@ -20,8 +20,8 @@ var (
// fetchHTMLDocumentWithCache fetches the HTML document from the given URL string,
// preserving the original host header.
func (c *OGTagCache) fetchHTMLDocumentWithCache(urlStr string, originalHost string, cacheKey string) (*html.Node, error) {
req, err := http.NewRequestWithContext(context.Background(), "GET", urlStr, nil)
func (c *OGTagCache) fetchHTMLDocumentWithCache(ctx context.Context, urlStr string, originalHost string, cacheKey string) (*html.Node, error) {
req, err := http.NewRequestWithContext(ctx, "GET", urlStr, nil)
if err != nil {
return nil, fmt.Errorf("failed to create http request: %w", err)
}
@ -41,7 +41,7 @@ func (c *OGTagCache) fetchHTMLDocumentWithCache(urlStr string, originalHost stri
var netErr net.Error
if errors.As(err, &netErr) && netErr.Timeout() {
slog.Debug("og: request timed out", "url", urlStr)
c.cache.Set(cacheKey, emptyMap, c.ogTimeToLive/2) // Cache empty result for half the TTL to not spam the server
c.cache.Set(ctx, cacheKey, emptyMap, c.ogTimeToLive/2) // Cache empty result for half the TTL to not spam the server
}
return nil, fmt.Errorf("http get failed: %w", err)
}
@ -56,7 +56,7 @@ func (c *OGTagCache) fetchHTMLDocumentWithCache(urlStr string, originalHost stri
if resp.StatusCode != http.StatusOK {
slog.Debug("og: received non-OK status code", "url", urlStr, "status", resp.StatusCode)
c.cache.Set(cacheKey, emptyMap, c.ogTimeToLive) // Cache empty result for non-successful status codes
c.cache.Set(ctx, cacheKey, emptyMap, c.ogTimeToLive) // Cache empty result for non-successful status codes
return nil, fmt.Errorf("%w: page not found", ErrOgHandled)
}

View file

@ -1,6 +1,7 @@
package ogtags
import (
"context"
"fmt"
"io"
"net/http"
@ -11,6 +12,7 @@ import (
"time"
"github.com/TecharoHQ/anubis/lib/policy/config"
"github.com/TecharoHQ/anubis/lib/store/memory"
"golang.org/x/net/html"
)
@ -85,8 +87,8 @@ func TestFetchHTMLDocument(t *testing.T) {
Enabled: true,
TimeToLive: time.Minute,
ConsiderHost: false,
})
doc, err := cache.fetchHTMLDocument(ts.URL, "anything")
}, memory.New(t.Context()))
doc, err := cache.fetchHTMLDocument(t.Context(), ts.URL, "anything")
if tt.expectError {
if err == nil {
@ -116,9 +118,9 @@ func TestFetchHTMLDocumentInvalidURL(t *testing.T) {
Enabled: true,
TimeToLive: time.Minute,
ConsiderHost: false,
})
}, memory.New(t.Context()))
doc, err := cache.fetchHTMLDocument("http://invalid.url.that.doesnt.exist.example", "anything")
doc, err := cache.fetchHTMLDocument(t.Context(), "http://invalid.url.that.doesnt.exist.example", "anything")
if err == nil {
t.Error("expected error for invalid URL, got nil")
@ -130,7 +132,7 @@ func TestFetchHTMLDocumentInvalidURL(t *testing.T) {
}
// fetchHTMLDocument allows you to call fetchHTMLDocumentWithCache without a duplicate generateCacheKey call
func (c *OGTagCache) fetchHTMLDocument(urlStr string, originalHost string) (*html.Node, error) {
func (c *OGTagCache) fetchHTMLDocument(ctx context.Context, urlStr string, originalHost string) (*html.Node, error) {
cacheKey := c.generateCacheKey(urlStr, originalHost)
return c.fetchHTMLDocumentWithCache(urlStr, originalHost, cacheKey)
return c.fetchHTMLDocumentWithCache(ctx, urlStr, originalHost, cacheKey)
}

View file

@ -8,6 +8,7 @@ import (
"time"
"github.com/TecharoHQ/anubis/lib/policy/config"
"github.com/TecharoHQ/anubis/lib/store/memory"
)
func TestIntegrationGetOGTags(t *testing.T) {
@ -110,7 +111,7 @@ func TestIntegrationGetOGTags(t *testing.T) {
Enabled: true,
TimeToLive: time.Minute,
ConsiderHost: false,
})
}, memory.New(t.Context()))
// Create URL for test
testURL, _ := url.Parse(ts.URL)
@ -119,7 +120,7 @@ func TestIntegrationGetOGTags(t *testing.T) {
// Get OG tags
// Pass the host from the test URL
ogTags, err := cache.GetOGTags(testURL, testURL.Host)
ogTags, err := cache.GetOGTags(t.Context(), testURL, testURL.Host)
// Check error expectation
if tc.expectError {
@ -147,7 +148,7 @@ func TestIntegrationGetOGTags(t *testing.T) {
// Test cache retrieval
// Pass the host from the test URL
cachedOGTags, err := cache.GetOGTags(testURL, testURL.Host)
cachedOGTags, err := cache.GetOGTags(t.Context(), testURL, testURL.Host)
if err != nil {
t.Fatalf("failed to get OG tags from cache: %v", err)
}

View file

@ -7,6 +7,7 @@ import (
"testing"
"github.com/TecharoHQ/anubis/lib/policy/config"
"github.com/TecharoHQ/anubis/lib/store/memory"
"golang.org/x/net/html"
)
@ -30,7 +31,7 @@ func BenchmarkGetTarget(b *testing.B) {
for _, tt := range tests {
b.Run(tt.name, func(b *testing.B) {
cache := NewOGTagCache(tt.target, config.OpenGraph{})
cache := NewOGTagCache(tt.target, config.OpenGraph{}, memory.New(b.Context()))
urls := make([]*url.URL, len(tt.paths))
for i, path := range tt.paths {
u, _ := url.Parse(path)
@ -66,7 +67,7 @@ func BenchmarkExtractOGTags(b *testing.B) {
</head><body><div><p>Content</p></div></body></html>`,
}
cache := NewOGTagCache("http://example.com", config.OpenGraph{})
cache := NewOGTagCache("http://example.com", config.OpenGraph{}, memory.New(b.Context()))
docs := make([]*html.Node, len(htmlSamples))
for i, sample := range htmlSamples {
@ -84,7 +85,7 @@ func BenchmarkExtractOGTags(b *testing.B) {
// Memory usage test
func TestMemoryUsage(t *testing.T) {
cache := NewOGTagCache("http://example.com", config.OpenGraph{})
cache := NewOGTagCache("http://example.com", config.OpenGraph{}, memory.New(t.Context()))
// Force GC and wait for it to complete
runtime.GC()

View file

@ -9,8 +9,8 @@ import (
"strings"
"time"
"github.com/TecharoHQ/anubis/decaymap"
"github.com/TecharoHQ/anubis/lib/policy/config"
"github.com/TecharoHQ/anubis/lib/store"
)
const (
@ -22,7 +22,7 @@ const (
)
type OGTagCache struct {
cache *decaymap.Impl[string, map[string]string]
cache store.JSON[map[string]string]
targetURL *url.URL
client *http.Client
@ -36,7 +36,7 @@ type OGTagCache struct {
ogOverride map[string]string
}
func NewOGTagCache(target string, conf config.OpenGraph) *OGTagCache {
func NewOGTagCache(target string, conf config.OpenGraph, backend store.Interface) *OGTagCache {
// Predefined approved tags and prefixes
defaultApprovedTags := []string{"description", "keywords", "author"}
defaultApprovedPrefixes := []string{"og:", "twitter:", "fediverse:"}
@ -77,7 +77,10 @@ func NewOGTagCache(target string, conf config.OpenGraph) *OGTagCache {
}
return &OGTagCache{
cache: decaymap.New[string, map[string]string](),
cache: store.JSON[map[string]string]{
Underlying: backend,
Prefix: "ogtags:",
},
targetURL: parsedTargetURL,
ogPassthrough: conf.Enabled,
ogTimeToLive: conf.TimeToLive,
@ -124,9 +127,3 @@ func (c *OGTagCache) getTarget(u *url.URL) string {
return sb.String()
}
func (c *OGTagCache) Cleanup() {
if c.cache != nil {
c.cache.Cleanup()
}
}

View file

@ -1,12 +1,14 @@
package ogtags
import (
"context"
"net/url"
"strings"
"testing"
"unicode/utf8"
"github.com/TecharoHQ/anubis/lib/policy/config"
"github.com/TecharoHQ/anubis/lib/store/memory"
"golang.org/x/net/html"
)
@ -46,7 +48,7 @@ func FuzzGetTarget(f *testing.F) {
}
// Create cache - should not panic
cache := NewOGTagCache(target, config.OpenGraph{})
cache := NewOGTagCache(target, config.OpenGraph{}, memory.New(context.Background()))
// Create URL
u := &url.URL{
@ -130,7 +132,7 @@ func FuzzExtractOGTags(f *testing.F) {
return
}
cache := NewOGTagCache("http://example.com", config.OpenGraph{})
cache := NewOGTagCache("http://example.com", config.OpenGraph{}, memory.New(context.Background()))
// Should not panic
tags := cache.extractOGTags(doc)
@ -186,7 +188,7 @@ func FuzzGetTargetRoundTrip(f *testing.F) {
t.Skip()
}
cache := NewOGTagCache(target, config.OpenGraph{})
cache := NewOGTagCache(target, config.OpenGraph{}, memory.New(context.Background()))
u := &url.URL{Path: path, RawQuery: query}
result := cache.getTarget(u)
@ -243,7 +245,7 @@ func FuzzExtractMetaTagInfo(f *testing.F) {
},
}
cache := NewOGTagCache("http://example.com", config.OpenGraph{})
cache := NewOGTagCache("http://example.com", config.OpenGraph{}, memory.New(context.Background()))
// Should not panic
property, content := cache.extractMetaTagInfo(node)
@ -296,7 +298,7 @@ func BenchmarkFuzzedGetTarget(b *testing.B) {
for _, input := range inputs {
b.Run(input.name, func(b *testing.B) {
cache := NewOGTagCache(input.target, config.OpenGraph{})
cache := NewOGTagCache(input.target, config.OpenGraph{}, memory.New(context.Background()))
u := &url.URL{Path: input.path, RawQuery: input.query}
b.ResetTimer()

View file

@ -15,6 +15,7 @@ import (
"time"
"github.com/TecharoHQ/anubis/lib/policy/config"
"github.com/TecharoHQ/anubis/lib/store/memory"
)
func TestNewOGTagCache(t *testing.T) {
@ -44,7 +45,7 @@ func TestNewOGTagCache(t *testing.T) {
Enabled: tt.ogPassthrough,
TimeToLive: tt.ogTimeToLive,
ConsiderHost: false,
})
}, memory.New(t.Context()))
if cache == nil {
t.Fatal("expected non-nil cache, got nil")
@ -84,7 +85,7 @@ func TestNewOGTagCache_UnixSocket(t *testing.T) {
Enabled: true,
TimeToLive: 5 * time.Minute,
ConsiderHost: false,
})
}, memory.New(t.Context()))
if cache == nil {
t.Fatal("expected non-nil cache, got nil")
@ -169,7 +170,7 @@ func TestGetTarget(t *testing.T) {
Enabled: true,
TimeToLive: time.Minute,
ConsiderHost: false,
})
}, memory.New(t.Context()))
u := &url.URL{
Path: tt.path,
@ -242,14 +243,14 @@ func TestIntegrationGetOGTags_UnixSocket(t *testing.T) {
Enabled: true,
TimeToLive: time.Minute,
ConsiderHost: false,
})
}, memory.New(t.Context()))
// Create a dummy URL for the request (path and query matter)
testReqURL, _ := url.Parse("/some/page?query=1")
// Get OG tags
// Pass an empty string for host, as it's irrelevant for unix sockets
ogTags, err := cache.GetOGTags(testReqURL, "")
ogTags, err := cache.GetOGTags(t.Context(), testReqURL, "")
if err != nil {
t.Fatalf("GetOGTags failed for unix socket: %v", err)
@ -265,7 +266,7 @@ func TestIntegrationGetOGTags_UnixSocket(t *testing.T) {
// Test cache retrieval (should hit cache)
// Pass an empty string for host
cachedTags, err := cache.GetOGTags(testReqURL, "")
cachedTags, err := cache.GetOGTags(t.Context(), testReqURL, "")
if err != nil {
t.Fatalf("GetOGTags (cache hit) failed for unix socket: %v", err)
}

View file

@ -7,6 +7,7 @@ import (
"time"
"github.com/TecharoHQ/anubis/lib/policy/config"
"github.com/TecharoHQ/anubis/lib/store/memory"
"golang.org/x/net/html"
)
@ -17,7 +18,7 @@ func TestExtractOGTags(t *testing.T) {
Enabled: false,
ConsiderHost: false,
TimeToLive: time.Minute,
})
}, memory.New(t.Context()))
// Manually set approved tags/prefixes based on the user request for clarity
testCache.approvedTags = []string{"description"}
testCache.approvedPrefixes = []string{"og:"}
@ -198,7 +199,7 @@ func TestExtractMetaTagInfo(t *testing.T) {
Enabled: false,
ConsiderHost: false,
TimeToLive: time.Minute,
})
}, memory.New(t.Context()))
testCache.approvedTags = []string{"description"}
testCache.approvedPrefixes = []string{"og:"}