Skip to content

Commit 2128a0c

Browse files
committed
feat(stats): overhaul stats service interface, implement os stats cache
1 parent fdcdca4 commit 2128a0c

File tree

4 files changed

+237
-1279
lines changed

4 files changed

+237
-1279
lines changed

stats/cache.go

Lines changed: 176 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -3,79 +3,227 @@ package stats
33
import (
44
"context"
55
"encoding/base32"
6+
"encoding/json"
7+
"errors"
68
"fmt"
7-
"io"
9+
"io/ioutil"
810
"os"
911
"path/filepath"
12+
"sync"
13+
14+
"github.com/qri-io/dataset"
1015
)
1116

1217
var (
1318
// ErrCacheMiss indicates a requested path isn't in the cache
1419
ErrCacheMiss = fmt.Errorf("stats: cache miss")
1520
// ErrNoCache indicates there is no cache
1621
ErrNoCache = fmt.Errorf("stats: no cache")
22+
// ErrCacheCorrupt indicates a faulty stats cache
23+
ErrCacheCorrupt = fmt.Errorf("stats: cache is corrupt")
1724
)
1825

19-
// Cache is a store of JSON-formated stats data, keyed by path
26+
// Cache is a store of stats components
2027
// Consumers of a cache must not rely on the cache for persistence
2128
// Implementations are expected to maintain their own size bounding
2229
// semantics internally
2330
// Cache implementations must be safe for concurrent use, and must be
2431
// nil-callable
2532
type Cache interface {
26-
// Put places stats in the cache, keyed by path
27-
PutJSON(ctx context.Context, path string, r io.Reader) error
28-
// JSON gets cached byte data for a path
29-
JSON(ctx context.Context, path string) (r io.Reader, err error)
33+
// placing a stats object in the Cache will expire all caches with a lower
34+
// modTime, use a modTime of zero when no modTime is known
35+
PutStats(ctx context.Context, key string, modTime int, sa *dataset.Stats) error
36+
// get the cached modTime for
37+
// will return ErrCacheMiss if the key does not exist
38+
ModTime(ctx context.Context, key string) (modTime int, err error)
39+
// Get the stats component for a given key
40+
Stats(ctx context.Context, key string) (sa *dataset.Stats, err error)
41+
}
42+
43+
// nilCache is a stand in for not having a cache
44+
// it only ever returns ErrNoCache
45+
type nilCache bool
46+
47+
var _ Cache = (*nilCache)(nil)
48+
49+
// PutJSON places stats in the cache, keyed by path
50+
func (nilCache) PutStats(ctx context.Context, key string, modTime int, sa *dataset.Stats) error {
51+
return ErrNoCache
52+
}
53+
54+
// ModTime always returns ErrCacheMiss
55+
func (nilCache) ModTime(ctx context.Context, key string) (modTime int, err error) {
56+
return -1, ErrCacheMiss
57+
}
58+
59+
// JSON gets cached byte data for a path
60+
func (nilCache) Stats(ctx context.Context, key string) (sa *dataset.Stats, err error) {
61+
return nil, ErrCacheMiss
3062
}
3163

3264
// osCache is a stats cache stored in a directory on the local operating system
3365
type osCache struct {
3466
root string
3567
maxSize uint64
68+
69+
info *cacheInfo
70+
infoLk sync.Mutex
3671
}
3772

3873
var _ Cache = (*osCache)(nil)
3974

4075
// NewOSCache creates a cache in a local direcory
41-
func NewOSCache(rootDir string, maxSize uint64) Cache {
42-
if err := os.MkdirAll(rootDir, os.ModePerm); err != nil {
43-
// log.Errorf("stat: %s", args ...interface{})
44-
}
45-
return osCache{
76+
func NewOSCache(rootDir string, maxSize uint64) (Cache, error) {
77+
c := &osCache{
4678
root: rootDir,
4779
maxSize: maxSize,
4880
}
81+
82+
err := c.readCacheInfo()
83+
if errors.Is(err, ErrCacheCorrupt) {
84+
log.Warn("your cache of stats data is corrupt, removing all cached data")
85+
err = os.RemoveAll(rootDir)
86+
return c, err
87+
}
88+
89+
if err := os.MkdirAll(rootDir, os.ModePerm); err != nil {
90+
return nil, err
91+
}
92+
93+
return c, err
4994
}
5095

5196
// Put places stats in the cache, keyed by path
52-
func (c osCache) PutJSON(ctx context.Context, path string, r io.Reader) error {
53-
filename := fmt.Sprintf("%s.json", b32Enc.EncodeToString([]byte(path)))
54-
// TODO (b5) - use this
55-
_ = filepath.Join(c.root, filename)
97+
func (c *osCache) PutStats(ctx context.Context, key string, modTime int, sa *dataset.Stats) error {
98+
if modTime < 0 {
99+
modTime = 0
100+
}
56101

57-
return fmt.Errorf("not finished")
102+
key = c.cacheKey(key)
103+
filename := c.componentFilepath(key)
104+
data, err := json.Marshal(sa)
105+
if err != nil {
106+
return err
107+
}
108+
109+
if uint64(len(data)) > c.maxSize {
110+
return fmt.Errorf("stats component size exceeds maximum size of cache")
111+
}
112+
113+
if err := ioutil.WriteFile(filename, data, 0644); err != nil {
114+
return err
115+
}
116+
117+
c.addAndPurgeExcess(key, modTime, uint64(len(data)))
118+
119+
return c.writeCacheInfo()
58120
}
59121

60-
// JSON gets cached byte data for a path
61-
func (c osCache) JSON(ctx context.Context, path string) (r io.Reader, err error) {
122+
func (c *osCache) ModTime(ctx context.Context, key string) (modTime int, err error) {
123+
var exists bool
124+
if modTime, exists = c.info.ModTimes[c.cacheKey(key)]; !exists {
125+
return -1, ErrCacheMiss
126+
}
127+
128+
return modTime, nil
129+
}
130+
131+
// Stats gets cached byte data for a path
132+
func (c *osCache) Stats(ctx context.Context, key string) (sa *dataset.Stats, err error) {
62133
return nil, ErrCacheMiss
63134
}
64135

65136
var b32Enc = base32.NewEncoding("abcdefghijklmnopqrstuvwxyz234567").WithPadding(base32.NoPadding)
66137

67-
// nilCache is a stand in for not having a cache
68-
// it only ever returns ErrNoCache
69-
type nilCache bool
138+
func (c *osCache) componentFilepath(cacheKey string) string {
139+
return filepath.Join(c.root, fmt.Sprintf("%s.json", cacheKey))
140+
}
70141

71-
var _ Cache = (*nilCache)(nil)
142+
func (c *osCache) cacheKey(key string) string {
143+
return b32Enc.EncodeToString([]byte(key))
144+
}
72145

73-
// PutJSON places stats in the cache, keyed by path
74-
func (nilCache) PutJSON(ctx context.Context, path string, r io.Reader) error {
75-
return ErrNoCache
146+
const uintSize = 32 << (^uint(0) >> 32 & 1)
147+
148+
func (c *osCache) addAndPurgeExcess(cacheKey string, modTime int, size uint64) {
149+
c.infoLk.Lock()
150+
defer c.infoLk.Unlock()
151+
c.info.Sizes[cacheKey] = uint64(len(data))
152+
c.info.ModTimes[cacheKey] = modTime
153+
154+
var (
155+
lowestKey string
156+
lowestModTime int
157+
)
158+
159+
for c.info.Size() > c.maxSize {
160+
lowestKey = ""
161+
lowestModTime = 1<<(uintSize-1) - 1
162+
163+
for key, modTime := range c.info.ModTimes {
164+
if modTime < lowestModTime {
165+
lowestKey = key
166+
}
167+
}
168+
if lowestKey == "" {
169+
break
170+
}
171+
if err := os.Remove(c.componentFilepath(lowestKey)); err != nil {
172+
break
173+
}
174+
delete(c.info.Sizes, lowestKey)
175+
delete(c.info.ModTimes, lowestKey)
176+
}
76177
}
77178

78-
// JSON gets cached byte data for a path
79-
func (nilCache) JSON(ctx context.Context, path string) (r io.Reader, err error) {
80-
return nil, ErrNoCache
179+
const osCacheInfoFilename = "info.json"
180+
181+
type cacheInfo struct {
182+
Sizes map[string]uint64
183+
ModTimes map[string]int
184+
}
185+
186+
func (ci cacheInfo) Size() (size uint64) {
187+
for _, s := range ci.Sizes {
188+
size += s
189+
}
190+
return size
191+
}
192+
193+
func (c *osCache) readCacheInfo() error {
194+
c.infoLk.Lock()
195+
defer c.infoLk.Unlock()
196+
197+
name := filepath.Join(c.root, osCacheInfoFilename)
198+
f, err := os.Open(name)
199+
if err != nil {
200+
if os.IsNotExist(err) {
201+
c.info = &cacheInfo{}
202+
return nil
203+
}
204+
return err
205+
}
206+
207+
defer f.Close()
208+
209+
c.info = &cacheInfo{}
210+
if err := json.NewDecoder(f).Decode(c.info); err != nil {
211+
// corrupt cache
212+
return fmt.Errorf("%w decoding stats info: %s", ErrCacheCorrupt, err)
213+
}
214+
215+
return nil
216+
}
217+
218+
func (c *osCache) writeCacheInfo() error {
219+
c.infoLk.Lock()
220+
defer c.infoLk.Unlock()
221+
222+
name := filepath.Join(c.root, osCacheInfoFilename)
223+
data, err := json.Marshal(c.info)
224+
if err != nil {
225+
return err
226+
}
227+
228+
return ioutil.WriteFile(name, data, 0644)
81229
}

0 commit comments

Comments
 (0)