Skip to content

Commit 67101f0

Browse files
committed
add a lru cache to regexp decoder
Often, regexp decoder can run repeatedly on the same input. An common example is matching cgroup path in a chain like so: ``` - name: cgroup - name: regexp regexps: - ^.*(system.slice).*$ ``` In these cases, the overhead of regexp matching can often be noticable. Since the input does not vary too often, we can add a cache here to skip the overhead of regex matching. This is implemented as an LRU cache to prevent the cache from growing infinitely, especially in cases where we have many short-live cgroups. The previous example can be written aborts ``` - name: cgroup - name: regexp lru_cache_size: 1024 regexps: - ^.*(system.slice).*$ ``` Signed-off-by: Daniel Dao <dqminh@cloudflare.com>
1 parent 8f471b0 commit 67101f0

File tree

7 files changed

+52
-7
lines changed

7 files changed

+52
-7
lines changed

.vscode/config-schema.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,10 @@ definitions:
145145
type: object
146146
allow_unknown:
147147
type: boolean
148+
lru_cache_size:
149+
type: number
148150
regexps:
149151
type: array
150152
items:
151153
type: string
154+

config/config.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ type Decoder struct {
7373
StaticMap map[string]string `yaml:"static_map"`
7474
Regexps []string `yaml:"regexps"`
7575
AllowUnknown bool `yaml:"allow_unknown"`
76+
LruCacheSize int `yaml:"lru_cache_size"`
7677
}
7778

7879
// HistogramBucketType is an enum to define how to interpret histogram

decoder/regexp.go

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,22 +6,32 @@ import (
66
"regexp"
77

88
"github.com/cloudflare/ebpf_exporter/v2/config"
9+
lru "github.com/hashicorp/golang-lru/v2"
910
)
1011

1112
// Regexp is a decoder that only allows inputs matching regexp
1213
type Regexp struct {
13-
cache map[string]*regexp.Regexp
14+
cache map[string]*regexp.Regexp
15+
outputCache *lru.Cache[string, []byte]
1416
}
1517

1618
// Decode only allows inputs matching regexp
1719
func (r *Regexp) Decode(in []byte, conf config.Decoder) ([]byte, error) {
1820
if conf.Regexps == nil {
1921
return nil, errors.New("no regexps defined in config")
2022
}
23+
inputStr := string(in)
2124

2225
if r.cache == nil {
2326
r.cache = map[string]*regexp.Regexp{}
2427
}
28+
if conf.LruCacheSize > 0 && r.outputCache == nil {
29+
outputCache, err := lru.New[string, []byte](conf.LruCacheSize)
30+
if err != nil {
31+
return nil, err
32+
}
33+
r.outputCache = outputCache
34+
}
2535

2636
for _, expr := range conf.Regexps {
2737
if _, ok := r.cache[expr]; !ok {
@@ -33,16 +43,27 @@ func (r *Regexp) Decode(in []byte, conf config.Decoder) ([]byte, error) {
3343
r.cache[expr] = compiled
3444
}
3545

46+
if r.outputCache != nil {
47+
if v, ok := r.outputCache.Get(inputStr); ok {
48+
return v, nil
49+
}
50+
}
51+
3652
matches := r.cache[expr].FindSubmatch(in)
3753

38-
// First sub-match if present
54+
var output []byte
3955
if len(matches) == 2 {
40-
return matches[1], nil
56+
// First sub-match if present
57+
output = matches[1]
58+
} else if len(matches) == 1 {
59+
// General match
60+
output = matches[0]
4161
}
42-
43-
// General match
44-
if len(matches) == 1 {
45-
return matches[0], nil
62+
if output != nil {
63+
if r.outputCache != nil {
64+
r.outputCache.Add(inputStr, output)
65+
}
66+
return output, nil
4667
}
4768
}
4869

decoder/regexp_test.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,3 +54,19 @@ func TestRegexpDecoder(t *testing.T) {
5454
}
5555
}
5656
}
57+
58+
func TestRegexpDecoderWithLru(t *testing.T) {
59+
d := &Regexp{}
60+
input := []byte("systemd-bananad")
61+
expectedOutput := []byte("systemd")
62+
out, err := d.Decode(input, config.Decoder{Regexps: []string{"^(systemd).*$", "^syslog-ng$"}, LruCacheSize: 100})
63+
if err != nil {
64+
t.Errorf("Error decoding %s: %v", input, err)
65+
}
66+
if !bytes.Equal(out, expectedOutput) {
67+
t.Errorf("Expected %s, got %s", expectedOutput, out)
68+
}
69+
if d.outputCache.Len() != 1 || d.outputCache.Keys()[0] != string(input) {
70+
t.Errorf("Error decoding %s: unexpected lru keys %v", input, d.outputCache.Keys())
71+
}
72+
}

examples/regexp.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ metrics:
88
decoders:
99
- name: string
1010
- name: regexp
11+
lru_cache_size: 100
1112
regexps:
1213
- ^(kswapd).*$ # anything matching kswapd prefix, mapping to one kswapd bucket
1314
- ^(systemd.*)$ # anything matching systemd prefix, mapping to one systemd bucket

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ require (
3030
github.com/go-logr/stdr v1.2.2 // indirect
3131
github.com/google/uuid v1.6.0 // indirect
3232
github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.1 // indirect
33+
github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
3334
github.com/klauspost/compress v1.17.11 // indirect
3435
github.com/mitchellh/go-homedir v1.1.0 // indirect
3536
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect

go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
3030
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
3131
github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.1 h1:e9Rjr40Z98/clHv5Yg79Is0NtosR5LXRvdr7o/6NwbA=
3232
github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.1/go.mod h1:tIxuGz/9mpox++sgp9fJjHO0+q1X9/UOWd798aAm22M=
33+
github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
34+
github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
3335
github.com/iovisor/gobpf v0.2.0 h1:34xkQxft+35GagXBk3n23eqhm0v7q0ejeVirb8sqEOQ=
3436
github.com/iovisor/gobpf v0.2.0/go.mod h1:WSY9Jj5RhdgC3ci1QaacvbFdQ8cbrEjrpiZbLHLt2s4=
3537
github.com/jaypipes/pcidb v1.0.1 h1:WB2zh27T3nwg8AE8ei81sNRb9yWBii3JGNJtT7K9Oic=

0 commit comments

Comments
 (0)