You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

403 lines
9.3 KiB

// caches file so re-downloading them is unnecessary
package toot
import (
"io"
"os"
"fmt"
"sync"
"time"
"errors"
"io/ioutil"
"crypto/sha1"
"encoding/hex"
"encoding/json"
)
// default directory for the cacher objects
const CacheDir string = "/cache/"
// default size of the cache
const DefaultCacheSize int = 100
// interface to the Cacher
type Cacher interface {
// download an url and create a CacheObject in the Cacher
Download ( string ) error
// download an url and return its byte-contents
DownloadUncached ( string ) ([]byte, error)
// evicts a single element from the cacher
Evict ( string ) error
// evicts a single element from the cacher based on some heuristic
EvictOne () error
// returns an io.Reader for the object referenced by url (and updates access stats)
Get ( string ) ( string, error )
GetRead ( string ) ( *io.Reader, error )
// returns whether or not a given url is cached or not
IsCached ( string ) bool
// set an entry as hot, i.e. it will never be evicted
SetHot ( string ) error
// empty the cache
EmptyCache () error
// check the sha1 of an cache object and evict the entry if the hash does not match
CheckSha1 ( *CacheObject ) error
// persist the information of the cacher
Persist ( string ) error
}
type CacheObject struct {
Created time.Time
LastAccess time.Time
AccessCount int
Epoch int
DontEvict bool
Url string
LocalPath string
Variant []CacheVariant
Sha1 [20]byte
}
type CacheVariant struct {
Name string
File string
}
type Cache struct {
Cache map[string]*CacheObject
MaxSize int
Epoch int
Mutex sync.Mutex
}
// Create a new cacher object
func NewCacher ( maxSize int ) *Cache {
var c Cache;
c.Cache = make ( map[string]*CacheObject )
c.MaxSize = maxSize
c.Epoch = 1
return &c
}
// Reads a cacher meta data file and returns a new cacher object
// always returns a valid cache object (on error: an empty one).
// Will not empty the cache folder.
func NewCacherFromFile ( filename string ) ( *Cache, error ) {
var c *Cache = NewCacher ( DefaultCacheSize )
f, err := os.Open ( filename )
if err != nil {
return c, err
}
defer f.Close();
dec := json.NewDecoder ( f )
for dec.More() {
err := dec.Decode ( &c )
if err != nil {
return &Cache{ Epoch : 1 }, err
}
}
// evict wrong entries
for _,v := range c.Cache {
err = c.CheckSha1( v )
if err != nil {
fmt.Println ("sha check:", err)
}
}
if len(c.Cache) > c.MaxSize {
c.MaxSize = len(c.Cache)
}
return c, nil;
}
func (c *Cache) Debug () {
fmt.Println ("MaxSize: ", c.MaxSize)
for k,v := range c.Cache {
fmt.Println (" --> Cache: ", k, ": ", v)
}
}
func (obj *CacheObject) CheckSha1 () bool {
f, err := os.Open ( obj.LocalPath )
if err != nil {
return false
}
buffer, err := ioutil.ReadAll ( f )
if err != nil {
return false
}
hash := sha1.Sum ( buffer )
if hash != obj.Sha1 {
return false
}
return true
}
func (c *Cache) CheckSha1 ( obj *CacheObject ) error {
if obj == nil {
return errors.New ("Cannot check, which is not present")
}
if obj.CheckSha1() == false {
c.Evict ( obj.Url )
return errors.New ("Hashes wrong :O, Evicted entry")
}
return nil
}
func (c *Cache) DownloadUncached ( url string ) ([]byte, error) {
// uses the client directly
resp, err := lc.client.Get ( url )
if err != nil {
return nil, err
}
if resp.StatusCode != 200 {
return nil, errors.New("Statuscode != 200")
}
defer resp.Body.Close()
buffer, err := ioutil.ReadAll ( resp.Body )
if err != nil {
return nil, err
}
return buffer, err
}
func (c *Cache) Download ( url string ) error {
buffer, err := c.DownloadUncached ( url )
if err != nil {
return err
}
hash := sha1.Sum ( buffer )
cwd, err := os.Getwd ()
if err != nil {
return err
}
filename := cwd + CacheDir + hex.EncodeToString(hash[:])
err = os.MkdirAll ( cwd + CacheDir, os.ModeDir | 0777 )
if err != nil {
return err
}
err = ioutil.WriteFile ( filename, buffer, 0644 )
if err != nil {
return err
}
c.Mutex.Lock()
var obj CacheObject = CacheObject { time.Now(), time.Now(), 0, 0, false, url, filename, []CacheVariant{}, hash }
c.Cache [ url ] = &obj
c.Mutex.Unlock()
return nil
}
func (c *Cache) EpochDampener ( accessCount int ) int {
return accessCount / 2
}
func (c *Cache) NextEpoch () {
c.Mutex.Lock()
c.Epoch++;
for _, v := range c.Cache {
v.Epoch = c.Epoch
v.AccessCount = c.EpochDampener(v.AccessCount)
}
c.Mutex.Unlock()
}
func (c *Cache) GetReader ( url string ) ( *os.File, error ) {
localpath, err := c.Get ( url )
if err != nil {
return nil, err
}
f, err := os.Open ( localpath )
return f, err
}
func (c *Cache) Get ( url string ) ( string, error ) {
obj := c.Cache[ url ]
err := c.CheckSha1 ( obj )
if err != nil {
obj = nil
}
if obj == nil {
if c.MaxSize >= 0 && len(c.Cache) >= c.MaxSize {
c.EvictOne ()
}
err = c.Download ( url )
if err != nil {
return "", err
}
}
c.Mutex.Lock()
obj = c.Cache[ url ]
obj.LastAccess = time.Now()
obj.AccessCount++;
c.Mutex.Unlock()
return obj.LocalPath, nil
}
func (c *Cache) get ( url string ) ( *CacheObject, error ) {
var err error
obj := c.Cache[ url ]
err = c.CheckSha1 ( obj )
if err != nil {
obj = nil
}
if obj == nil {
if c.MaxSize >= 0 && len(c.Cache) >= c.MaxSize {
c.EvictOne ()
}
err = c.Download ( url )
if err != nil {
return nil, err
}
}
c.Mutex.Lock()
obj = c.Cache[ url ]
if obj != nil {
obj.LastAccess = time.Now()
obj.AccessCount++;
err = nil
} else {
err = errors.New("No object found")
}
c.Mutex.Unlock()
return obj, err
}
func isVariant ( obj *CacheObject, variant string ) ( string ) {
for _, x := range obj.Variant {
if ( x.Name == variant ) {
return x.File
}
}
return ""
}
func (c *Cache) GetVariant ( url string, variant string ) ( string, error ) {
obj, err := c.get ( url )
if err != nil {
return "", nil
}
x := isVariant ( obj, variant )
if x == "" {
return "", errors.New("Cannot find this variant")
}
return x, nil
}
func makeVariant ( obj *CacheObject, variant string ) string {
var file string = obj.LocalPath+"_"+variant
obj.Variant = append ( obj.Variant, CacheVariant{ Name: variant, File: file })
return file
}
func (c *Cache) PutVariantUser ( url string, variant string ) ( string, string, error ) {
obj, err := c.get ( url )
if err != nil {
return "", "", err
}
x := isVariant ( obj, variant )
if x == "" {
return makeVariant ( obj, variant ), obj.LocalPath, nil
} else {
return makeVariant ( obj, variant ), obj.LocalPath, errors.New("Variant already exists")
}
}
// expects the Mutex-Lock to be taken!
func (c *Cache) Evict ( url string ) error {
obj := c.Cache[ url ]
if obj == nil {
return errors.New ("Cannot evict nil")
}
delete ( c.Cache, url )
// We would need to defer deleting the file...
//err := os.Remove ( obj.LocalPath )
//if err != nil {
// return err
//}
return nil
}
func (c *Cache) EvictOne () error {
c.Mutex.Lock()
var minKey string = ""
var minVal int = -1
for k,v := range c.Cache {
if v != nil && v.DontEvict == false {
if minVal == -1 || v.AccessCount < minVal {
minKey = k
minVal = v.AccessCount
}
}
}
var err error
if minKey != "" {
err = c.Evict ( minKey )
} else {
err = errors.New ("Cannot evict, which is not evictable")
}
c.Mutex.Unlock()
return err
}
func (c *Cache) IsCached ( url string ) bool {
obj := c.Cache[url]
if obj == nil {
return false
} else {
return true
}
}
func (c *Cache) SetHot ( url string ) error {
obj := c.Cache[url]
if obj != nil {
c.Mutex.Lock()
obj.DontEvict = true
c.Mutex.Unlock()
return nil
}
return errors.New ("Cannot setHot nil")
}
func (c *Cache) EmptyCache () error {
c.Mutex.Lock()
c.Cache = make(map[string]*CacheObject)
cwd, err := os.Getwd ()
if err == nil {
err = os.RemoveAll ( cwd + CacheDir )
}
c.Mutex.Unlock()
return err
}
func (c *Cache) Persist ( filename string ) error {
c.Mutex.Lock()
fmt.Println ("%+v\n", c)
output, err := json.MarshalIndent ( c, "", " ")
if err == nil {
err = ioutil.WriteFile( filename, output, 0644 )
}
c.Mutex.Unlock()
return err
}