add modTime & refresh if file changed
Signed-off-by: Hadi <112569860+anotherhadi@users.noreply.github.com>
This commit is contained in:
@@ -5,6 +5,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/charmbracelet/log"
|
"github.com/charmbracelet/log"
|
||||||
)
|
)
|
||||||
@@ -15,12 +16,11 @@ type Dataleak struct {
|
|||||||
Columns []string
|
Columns []string
|
||||||
Length uint64
|
Length uint64
|
||||||
Size uint64
|
Size uint64
|
||||||
|
ModTime time.Time
|
||||||
}
|
}
|
||||||
|
|
||||||
const CACHE_FILENAME = "dataleaks_cache.json"
|
const CACHE_FILENAME = "dataleaks_cache.json"
|
||||||
|
|
||||||
// TODO: check os.FileInfo.ModTime() to see if the file has changed since last cache update
|
|
||||||
|
|
||||||
func Cache(s *Server) error {
|
func Cache(s *Server) error {
|
||||||
if len(s.Settings.Folders) == 0 {
|
if len(s.Settings.Folders) == 0 {
|
||||||
return nil
|
return nil
|
||||||
@@ -44,38 +44,42 @@ func Cache(s *Server) error {
|
|||||||
log.Warn("Failed to read dataleaks cache file", "error", err)
|
log.Warn("Failed to read dataleaks cache file", "error", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Filter out non-existent files
|
dataleakMap := make(map[string]Dataleak, len(dataleaks))
|
||||||
|
for _, d := range dataleaks {
|
||||||
|
dataleakMap[d.Path] = d
|
||||||
|
}
|
||||||
|
|
||||||
filteredDataleaks := []Dataleak{}
|
filteredDataleaks := []Dataleak{}
|
||||||
writeOutput := false
|
writeOutput := false
|
||||||
for _, d := range dataleaks {
|
|
||||||
if _, err := os.Stat(d.Path); err == nil {
|
|
||||||
filteredDataleaks = append(filteredDataleaks, d)
|
|
||||||
} else if os.IsNotExist(err) {
|
|
||||||
log.Info("Removing non-existent file from cache", "path", d.Path)
|
|
||||||
writeOutput = true
|
|
||||||
} else {
|
|
||||||
log.Error("Error checking file existence", "path", d.Path, "error", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
dataleaks = filteredDataleaks
|
|
||||||
|
|
||||||
// Create a map for quick lookups
|
|
||||||
dataleakMap := make(map[string]struct{}, len(dataleaks))
|
|
||||||
for _, d := range dataleaks {
|
|
||||||
dataleakMap[d.Path] = struct{}{}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add new files
|
|
||||||
parquetFiles := getAllParquetFiles(s.Settings.Folders)
|
parquetFiles := getAllParquetFiles(s.Settings.Folders)
|
||||||
for _, p := range parquetFiles {
|
|
||||||
if _, found := dataleakMap[p]; found {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
writeOutput = true
|
for _, p := range parquetFiles {
|
||||||
dataleaks = append(dataleaks, getDataleak(*s, p))
|
currentModTime := getModTime(p)
|
||||||
|
cachedDataleak, found := dataleakMap[p]
|
||||||
|
|
||||||
|
if found {
|
||||||
|
if currentModTime.Equal(cachedDataleak.ModTime) {
|
||||||
|
filteredDataleaks = append(filteredDataleaks, cachedDataleak)
|
||||||
|
} else {
|
||||||
|
log.Info("File modification time changed, re-caching dataleak", "path", p)
|
||||||
|
writeOutput = true
|
||||||
|
filteredDataleaks = append(filteredDataleaks, getDataleak(*s, p))
|
||||||
|
}
|
||||||
|
delete(dataleakMap, p)
|
||||||
|
} else {
|
||||||
|
log.Info("Found new dataleak file, caching", "path", p)
|
||||||
|
writeOutput = true
|
||||||
|
filteredDataleaks = append(filteredDataleaks, getDataleak(*s, p))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for path := range dataleakMap {
|
||||||
|
log.Info("Removing non-existent file from cache", "path", path)
|
||||||
|
writeOutput = true
|
||||||
|
}
|
||||||
|
|
||||||
|
dataleaks = filteredDataleaks
|
||||||
|
|
||||||
if writeOutput {
|
if writeOutput {
|
||||||
data, err := json.MarshalIndent(dataleaks, "", " ")
|
data, err := json.MarshalIndent(dataleaks, "", " ")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -107,5 +111,6 @@ func getDataleak(s Server, path string) Dataleak {
|
|||||||
Columns: getParquetColumns(s, path),
|
Columns: getParquetColumns(s, path),
|
||||||
Length: getParquetLength(s, path),
|
Length: getParquetLength(s, path),
|
||||||
Size: getFileSize(path),
|
Size: getFileSize(path),
|
||||||
|
ModTime: getModTime(path),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
func getParquetColumns(s Server, path string) []string {
|
func getParquetColumns(s Server, path string) []string {
|
||||||
@@ -82,6 +83,14 @@ func getFileSize(path string) uint64 {
|
|||||||
return uint64(info.Size() / (1024 * 1024)) // MB
|
return uint64(info.Size() / (1024 * 1024)) // MB
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getModTime(path string) time.Time {
|
||||||
|
info, err := os.Stat(path)
|
||||||
|
if err != nil {
|
||||||
|
return time.Time{}
|
||||||
|
}
|
||||||
|
return info.ModTime()
|
||||||
|
}
|
||||||
|
|
||||||
func FormatParquetName(path string) string {
|
func FormatParquetName(path string) string {
|
||||||
_, file := filepath.Split(path)
|
_, file := filepath.Split(path)
|
||||||
fileName := strings.TrimSuffix(file, ".parquet")
|
fileName := strings.TrimSuffix(file, ".parquet")
|
||||||
|
|||||||
Reference in New Issue
Block a user