add modTime & refresh if file changed

Signed-off-by: Hadi <112569860+anotherhadi@users.noreply.github.com>
This commit is contained in:
Hadi
2025-09-29 21:31:10 +02:00
parent 3f2699f7e0
commit 6b0bf83c97
2 changed files with 42 additions and 28 deletions

View File

@@ -5,6 +5,7 @@ import (
"fmt"
"os"
"path/filepath"
"time"
"github.com/charmbracelet/log"
)
@@ -15,12 +16,11 @@ type Dataleak struct {
Columns []string
Length uint64
Size uint64
ModTime time.Time
}
const CACHE_FILENAME = "dataleaks_cache.json"
// TODO: check os.FileInfo.ModTime() to see if the file has changed since last cache update
func Cache(s *Server) error {
if len(s.Settings.Folders) == 0 {
return nil
@@ -44,38 +44,42 @@ func Cache(s *Server) error {
log.Warn("Failed to read dataleaks cache file", "error", err)
}
// Filter out non-existent files
dataleakMap := make(map[string]Dataleak, len(dataleaks))
for _, d := range dataleaks {
dataleakMap[d.Path] = d
}
filteredDataleaks := []Dataleak{}
writeOutput := false
for _, d := range dataleaks {
if _, err := os.Stat(d.Path); err == nil {
filteredDataleaks = append(filteredDataleaks, d)
} else if os.IsNotExist(err) {
log.Info("Removing non-existent file from cache", "path", d.Path)
writeOutput = true
} else {
log.Error("Error checking file existence", "path", d.Path, "error", err)
}
}
dataleaks = filteredDataleaks
// Create a map for quick lookups
dataleakMap := make(map[string]struct{}, len(dataleaks))
for _, d := range dataleaks {
dataleakMap[d.Path] = struct{}{}
}
// Add new files
parquetFiles := getAllParquetFiles(s.Settings.Folders)
for _, p := range parquetFiles {
if _, found := dataleakMap[p]; found {
continue
currentModTime := getModTime(p)
cachedDataleak, found := dataleakMap[p]
if found {
if currentModTime.Equal(cachedDataleak.ModTime) {
filteredDataleaks = append(filteredDataleaks, cachedDataleak)
} else {
log.Info("File modification time changed, re-caching dataleak", "path", p)
writeOutput = true
filteredDataleaks = append(filteredDataleaks, getDataleak(*s, p))
}
delete(dataleakMap, p)
} else {
log.Info("Found new dataleak file, caching", "path", p)
writeOutput = true
filteredDataleaks = append(filteredDataleaks, getDataleak(*s, p))
}
}
for path := range dataleakMap {
log.Info("Removing non-existent file from cache", "path", path)
writeOutput = true
dataleaks = append(dataleaks, getDataleak(*s, p))
}
dataleaks = filteredDataleaks
if writeOutput {
data, err := json.MarshalIndent(dataleaks, "", " ")
if err != nil {
@@ -107,5 +111,6 @@ func getDataleak(s Server, path string) Dataleak {
Columns: getParquetColumns(s, path),
Length: getParquetLength(s, path),
Size: getFileSize(path),
ModTime: getModTime(path),
}
}

View File

@@ -6,6 +6,7 @@ import (
"os"
"path/filepath"
"strings"
"time"
)
func getParquetColumns(s Server, path string) []string {
@@ -82,6 +83,14 @@ func getFileSize(path string) uint64 {
return uint64(info.Size() / (1024 * 1024)) // MB
}
func getModTime(path string) time.Time {
info, err := os.Stat(path)
if err != nil {
return time.Time{}
}
return info.ModTime()
}
func FormatParquetName(path string) string {
_, file := filepath.Split(path)
fileName := strings.TrimSuffix(file, ".parquet")