@@ -3,79 +3,227 @@ package stats
33import (
44 "context"
55 "encoding/base32"
6+ "encoding/json"
7+ "errors"
68 "fmt"
7- "io"
9+ "io/ioutil "
810 "os"
911 "path/filepath"
12+ "sync"
13+
14+ "github.com/qri-io/dataset"
1015)
1116
1217var (
1318 // ErrCacheMiss indicates a requested path isn't in the cache
1419 ErrCacheMiss = fmt .Errorf ("stats: cache miss" )
1520 // ErrNoCache indicates there is no cache
1621 ErrNoCache = fmt .Errorf ("stats: no cache" )
22+ // ErrCacheCorrupt indicates a faulty stats cache
23+ ErrCacheCorrupt = fmt .Errorf ("stats: cache is corrupt" )
1724)
1825
19- // Cache is a store of JSON-formated stats data, keyed by path
26+ // Cache is a store of stats components
2027// Consumers of a cache must not rely on the cache for persistence
2128// Implementations are expected to maintain their own size bounding
2229// semantics internally
2330// Cache implementations must be safe for concurrent use, and must be
2431// nil-callable
2532type Cache interface {
26- // Put places stats in the cache, keyed by path
27- PutJSON (ctx context.Context , path string , r io.Reader ) error
28- // JSON gets cached byte data for a path
29- JSON (ctx context.Context , path string ) (r io.Reader , err error )
33+ // placing a stats object in the Cache will expire all caches with a lower
34+ // modTime, use a modTime of zero when no modTime is known
35+ PutStats (ctx context.Context , key string , modTime int , sa * dataset.Stats ) error
36+ // get the cached modTime for
37+ // will return ErrCacheMiss if the key does not exist
38+ ModTime (ctx context.Context , key string ) (modTime int , err error )
39+ // Get the stats component for a given key
40+ Stats (ctx context.Context , key string ) (sa * dataset.Stats , err error )
41+ }
42+
43+ // nilCache is a stand in for not having a cache
44+ // it only ever returns ErrNoCache
45+ type nilCache bool
46+
47+ var _ Cache = (* nilCache )(nil )
48+
49+ // PutJSON places stats in the cache, keyed by path
50+ func (nilCache ) PutStats (ctx context.Context , key string , modTime int , sa * dataset.Stats ) error {
51+ return ErrNoCache
52+ }
53+
54+ // ModTime always returns ErrCacheMiss
55+ func (nilCache ) ModTime (ctx context.Context , key string ) (modTime int , err error ) {
56+ return - 1 , ErrCacheMiss
57+ }
58+
59+ // JSON gets cached byte data for a path
60+ func (nilCache ) Stats (ctx context.Context , key string ) (sa * dataset.Stats , err error ) {
61+ return nil , ErrCacheMiss
3062}
3163
3264// osCache is a stats cache stored in a directory on the local operating system
3365type osCache struct {
3466 root string
3567 maxSize uint64
68+
69+ info * cacheInfo
70+ infoLk sync.Mutex
3671}
3772
3873var _ Cache = (* osCache )(nil )
3974
4075// NewOSCache creates a cache in a local direcory
41- func NewOSCache (rootDir string , maxSize uint64 ) Cache {
42- if err := os .MkdirAll (rootDir , os .ModePerm ); err != nil {
43- // log.Errorf("stat: %s", args ...interface{})
44- }
45- return osCache {
76+ func NewOSCache (rootDir string , maxSize uint64 ) (Cache , error ) {
77+ c := & osCache {
4678 root : rootDir ,
4779 maxSize : maxSize ,
4880 }
81+
82+ err := c .readCacheInfo ()
83+ if errors .Is (err , ErrCacheCorrupt ) {
84+ log .Warn ("your cache of stats data is corrupt, removing all cached data" )
85+ err = os .RemoveAll (rootDir )
86+ return c , err
87+ }
88+
89+ if err := os .MkdirAll (rootDir , os .ModePerm ); err != nil {
90+ return nil , err
91+ }
92+
93+ return c , err
4994}
5095
5196// Put places stats in the cache, keyed by path
52- func (c osCache ) PutJSON (ctx context.Context , path string , r io. Reader ) error {
53- filename := fmt . Sprintf ( "%s.json" , b32Enc . EncodeToString ([] byte ( path )))
54- // TODO (b5) - use this
55- _ = filepath . Join ( c . root , filename )
97+ func (c * osCache ) PutStats (ctx context.Context , key string , modTime int , sa * dataset. Stats ) error {
98+ if modTime < 0 {
99+ modTime = 0
100+ }
56101
57- return fmt .Errorf ("not finished" )
102+ key = c .cacheKey (key )
103+ filename := c .componentFilepath (key )
104+ data , err := json .Marshal (sa )
105+ if err != nil {
106+ return err
107+ }
108+
109+ if uint64 (len (data )) > c .maxSize {
110+ return fmt .Errorf ("stats component size exceeds maximum size of cache" )
111+ }
112+
113+ if err := ioutil .WriteFile (filename , data , 0644 ); err != nil {
114+ return err
115+ }
116+
117+ c .addAndPurgeExcess (key , modTime , uint64 (len (data )))
118+
119+ return c .writeCacheInfo ()
58120}
59121
60- // JSON gets cached byte data for a path
61- func (c osCache ) JSON (ctx context.Context , path string ) (r io.Reader , err error ) {
122+ func (c * osCache ) ModTime (ctx context.Context , key string ) (modTime int , err error ) {
123+ var exists bool
124+ if modTime , exists = c .info .ModTimes [c .cacheKey (key )]; ! exists {
125+ return - 1 , ErrCacheMiss
126+ }
127+
128+ return modTime , nil
129+ }
130+
131+ // Stats gets cached byte data for a path
132+ func (c * osCache ) Stats (ctx context.Context , key string ) (sa * dataset.Stats , err error ) {
62133 return nil , ErrCacheMiss
63134}
64135
65136var b32Enc = base32 .NewEncoding ("abcdefghijklmnopqrstuvwxyz234567" ).WithPadding (base32 .NoPadding )
66137
67- // nilCache is a stand in for not having a cache
68- // it only ever returns ErrNoCache
69- type nilCache bool
138+ func ( c * osCache ) componentFilepath ( cacheKey string ) string {
139+ return filepath . Join ( c . root , fmt . Sprintf ( "%s.json" , cacheKey ))
140+ }
70141
71- var _ Cache = (* nilCache )(nil )
142+ func (c * osCache ) cacheKey (key string ) string {
143+ return b32Enc .EncodeToString ([]byte (key ))
144+ }
72145
73- // PutJSON places stats in the cache, keyed by path
74- func (nilCache ) PutJSON (ctx context.Context , path string , r io.Reader ) error {
75- return ErrNoCache
146+ const uintSize = 32 << (^ uint (0 ) >> 32 & 1 )
147+
148+ func (c * osCache ) addAndPurgeExcess (cacheKey string , modTime int , size uint64 ) {
149+ c .infoLk .Lock ()
150+ defer c .infoLk .Unlock ()
151+ c .info .Sizes [cacheKey ] = uint64 (len (data ))
152+ c .info .ModTimes [cacheKey ] = modTime
153+
154+ var (
155+ lowestKey string
156+ lowestModTime int
157+ )
158+
159+ for c .info .Size () > c .maxSize {
160+ lowestKey = ""
161+ lowestModTime = 1 << (uintSize - 1 ) - 1
162+
163+ for key , modTime := range c .info .ModTimes {
164+ if modTime < lowestModTime {
165+ lowestKey = key
166+ }
167+ }
168+ if lowestKey == "" {
169+ break
170+ }
171+ if err := os .Remove (c .componentFilepath (lowestKey )); err != nil {
172+ break
173+ }
174+ delete (c .info .Sizes , lowestKey )
175+ delete (c .info .ModTimes , lowestKey )
176+ }
76177}
77178
78- // JSON gets cached byte data for a path
79- func (nilCache ) JSON (ctx context.Context , path string ) (r io.Reader , err error ) {
80- return nil , ErrNoCache
179+ const osCacheInfoFilename = "info.json"
180+
181+ type cacheInfo struct {
182+ Sizes map [string ]uint64
183+ ModTimes map [string ]int
184+ }
185+
186+ func (ci cacheInfo ) Size () (size uint64 ) {
187+ for _ , s := range ci .Sizes {
188+ size += s
189+ }
190+ return size
191+ }
192+
193+ func (c * osCache ) readCacheInfo () error {
194+ c .infoLk .Lock ()
195+ defer c .infoLk .Unlock ()
196+
197+ name := filepath .Join (c .root , osCacheInfoFilename )
198+ f , err := os .Open (name )
199+ if err != nil {
200+ if os .IsNotExist (err ) {
201+ c .info = & cacheInfo {}
202+ return nil
203+ }
204+ return err
205+ }
206+
207+ defer f .Close ()
208+
209+ c .info = & cacheInfo {}
210+ if err := json .NewDecoder (f ).Decode (c .info ); err != nil {
211+ // corrupt cache
212+ return fmt .Errorf ("%w decoding stats info: %s" , ErrCacheCorrupt , err )
213+ }
214+
215+ return nil
216+ }
217+
218+ func (c * osCache ) writeCacheInfo () error {
219+ c .infoLk .Lock ()
220+ defer c .infoLk .Unlock ()
221+
222+ name := filepath .Join (c .root , osCacheInfoFilename )
223+ data , err := json .Marshal (c .info )
224+ if err != nil {
225+ return err
226+ }
227+
228+ return ioutil .WriteFile (name , data , 0644 )
81229}
0 commit comments