init
This commit is contained in:
111
back/server/dataleak.go
Normal file
111
back/server/dataleak.go
Normal file
@@ -0,0 +1,111 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/charmbracelet/log"
|
||||
)
|
||||
|
||||
type Dataleak struct {
|
||||
Path string
|
||||
Name string
|
||||
Columns []string
|
||||
Length uint64
|
||||
Size uint64
|
||||
}
|
||||
|
||||
const CACHE_FILENAME = "dataleaks_cache.json"
|
||||
|
||||
// TODO: check os.FileInfo.ModTime() to see if the file has changed since last cache update
|
||||
|
||||
func Cache(s *Server) error {
|
||||
if len(s.Settings.Folders) == 0 {
|
||||
return nil
|
||||
}
|
||||
if s.Settings.CacheFolder == "" {
|
||||
s.Settings.CacheFolder = s.Settings.Folders[0]
|
||||
}
|
||||
if err := createDirectoryIfNotExists(s.Settings.CacheFolder); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
cacheFile := filepath.Join(s.Settings.CacheFolder, CACHE_FILENAME)
|
||||
dataleaks := []Dataleak{}
|
||||
|
||||
data, err := os.ReadFile(cacheFile)
|
||||
if err == nil {
|
||||
if err := json.Unmarshal(data, &dataleaks); err != nil {
|
||||
log.Warn("Failed to unmarshal dataleaks cache", "error", err)
|
||||
}
|
||||
} else {
|
||||
log.Warn("Failed to read dataleaks cache file", "error", err)
|
||||
}
|
||||
|
||||
// Filter out non-existent files
|
||||
filteredDataleaks := []Dataleak{}
|
||||
writeOutput := false
|
||||
for _, d := range dataleaks {
|
||||
if _, err := os.Stat(d.Path); err == nil {
|
||||
filteredDataleaks = append(filteredDataleaks, d)
|
||||
} else if os.IsNotExist(err) {
|
||||
log.Info("Removing non-existent file from cache", "path", d.Path)
|
||||
writeOutput = true
|
||||
} else {
|
||||
log.Error("Error checking file existence", "path", d.Path, "error", err)
|
||||
}
|
||||
}
|
||||
dataleaks = filteredDataleaks
|
||||
|
||||
// Create a map for quick lookups
|
||||
dataleakMap := make(map[string]struct{}, len(dataleaks))
|
||||
for _, d := range dataleaks {
|
||||
dataleakMap[d.Path] = struct{}{}
|
||||
}
|
||||
|
||||
// Add new files
|
||||
parquetFiles := getAllParquetFiles(s.Settings.Folders)
|
||||
for _, p := range parquetFiles {
|
||||
if _, found := dataleakMap[p]; found {
|
||||
continue
|
||||
}
|
||||
|
||||
writeOutput = true
|
||||
dataleaks = append(dataleaks, getDataleak(*s, p))
|
||||
}
|
||||
|
||||
if writeOutput {
|
||||
data, err := json.MarshalIndent(dataleaks, "", " ")
|
||||
if err != nil {
|
||||
return fmt.Errorf("error marshalling cache: %w", err)
|
||||
}
|
||||
if err := os.WriteFile(cacheFile, data, 0644); err != nil {
|
||||
return fmt.Errorf("error writing cache: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
s.Dataleaks = &dataleaks
|
||||
totalDataleaks := uint64(len(dataleaks))
|
||||
totalRows := uint64(0)
|
||||
totalSize := uint64(0)
|
||||
for _, d := range dataleaks {
|
||||
totalRows += d.Length
|
||||
totalSize += d.Size
|
||||
}
|
||||
s.TotalDataleaks = &totalDataleaks
|
||||
s.TotalSize = &totalSize
|
||||
s.TotalRows = &totalRows
|
||||
return nil
|
||||
}
|
||||
|
||||
func getDataleak(s Server, path string) Dataleak {
|
||||
return Dataleak{
|
||||
Path: path,
|
||||
Name: FormatParquetName(path),
|
||||
Columns: getParquetColumns(s, path),
|
||||
Length: getParquetLength(s, path),
|
||||
Size: getFileSize(path),
|
||||
}
|
||||
}
|
||||
61
back/server/server.go
Normal file
61
back/server/server.go
Normal file
@@ -0,0 +1,61 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/charmbracelet/log"
|
||||
"github.com/gin-gonic/gin"
|
||||
_ "github.com/marcboeker/go-duckdb"
|
||||
)
|
||||
|
||||
type Server struct {
|
||||
Settings ServerSettings
|
||||
|
||||
Dataleaks *[]Dataleak
|
||||
|
||||
TotalRows *uint64
|
||||
TotalDataleaks *uint64
|
||||
TotalSize *uint64 // MB
|
||||
|
||||
Router *gin.Engine
|
||||
Duckdb *sql.DB
|
||||
Mu *sync.RWMutex
|
||||
}
|
||||
|
||||
func NewServer() *Server {
|
||||
zero := uint64(0)
|
||||
emptyDataleak := []Dataleak{}
|
||||
s := &Server{
|
||||
Settings: LoadServerSettings(),
|
||||
Mu: &sync.RWMutex{},
|
||||
TotalDataleaks: &zero,
|
||||
TotalRows: &zero,
|
||||
TotalSize: &zero,
|
||||
Dataleaks: &emptyDataleak,
|
||||
}
|
||||
|
||||
var err error
|
||||
|
||||
s.Duckdb, err = sql.Open("duckdb", "")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
err = Cache(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
go func() {
|
||||
for {
|
||||
time.Sleep(s.Settings.ReloadDataleaksInterval)
|
||||
err := Cache(s)
|
||||
if err != nil {
|
||||
log.Error(err)
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
return s
|
||||
}
|
||||
129
back/server/settings.go
Normal file
129
back/server/settings.go
Normal file
@@ -0,0 +1,129 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
github_recon_settings "github.com/anotherhadi/github-recon/settings"
|
||||
)
|
||||
|
||||
type ServerSettings struct {
|
||||
Port int `json:"-"` // Port to run the server on
|
||||
Debug bool
|
||||
Password string `json:"-"` // Do not expose the password in JSON
|
||||
MinimumQueryLength int
|
||||
MaxCacheDuration time.Duration // Delete a search from the cache after this duration
|
||||
|
||||
// Dataleaks
|
||||
Folders []string // Folders to search in for parquets, recursive
|
||||
CacheFolder string
|
||||
BaseColumns []string // Use these columns when column="all"
|
||||
Limit int // Limit number of rows returned
|
||||
ReloadDataleaksInterval time.Duration // Reload dataleaks files from disk every X
|
||||
|
||||
// OSINT Tools
|
||||
GithubRecon bool // Activate github-recon OSINT tool
|
||||
GithubToken string `json:"-"` // Github token for github-recon
|
||||
GithubTokenLoaded bool
|
||||
GithubDeepMode bool // Deep mode for github-recon
|
||||
}
|
||||
|
||||
func LoadServerSettings() ServerSettings {
|
||||
ss := ServerSettings{
|
||||
Port: getEnvPortOrDefault("PORT", 9198),
|
||||
Debug: getEnvBoolOrDefault("DEBUG", false),
|
||||
Password: getEnvStringOrDefault("PASSWORD", ""),
|
||||
MinimumQueryLength: getEnvIntOrDefault("MINIMUM_QUERY_LENGTH", 3),
|
||||
MaxCacheDuration: getEnvDurationOrDefault("MAX_CACHE_DURATION", 24*time.Hour),
|
||||
|
||||
// Dataleaks
|
||||
Folders: getEnvStringListOrDefault("DATALEAKS_FOLDERS", []string{}),
|
||||
CacheFolder: getEnvStringOrDefault("DATALEAKS_CACHE_FOLDER", ""),
|
||||
BaseColumns: getEnvStringListOrDefault("BASE_COLUMNS", []string{"email", "username", "password", "full_name", "phone", "url"}),
|
||||
Limit: getEnvIntOrDefault("LIMIT", 100),
|
||||
ReloadDataleaksInterval: getEnvDurationOrDefault("RELOAD_DATALEAKS_INTERVAL", 20*time.Minute),
|
||||
|
||||
// OSINT Tools
|
||||
GithubRecon: getEnvBoolOrDefault("GITHUB_RECON", true),
|
||||
GithubToken: getEnvStringOrDefault("GITHUB_TOKEN", "null"),
|
||||
GithubDeepMode: getEnvBoolOrDefault("GITHUB_DEEP_MODE", false),
|
||||
}
|
||||
|
||||
if ss.GithubToken == "null" || strings.TrimSpace(ss.GithubToken) == "" {
|
||||
ss.GithubToken = github_recon_settings.GetToken()
|
||||
}
|
||||
|
||||
if ss.GithubToken != "null" && strings.TrimSpace(ss.GithubToken) != "" {
|
||||
ss.GithubTokenLoaded = true
|
||||
}
|
||||
|
||||
return ss
|
||||
}
|
||||
|
||||
func getEnvStringOrDefault(envKey, defaultValue string) string {
|
||||
value := strings.TrimSpace(os.Getenv(envKey))
|
||||
if value == "" {
|
||||
return defaultValue
|
||||
}
|
||||
return value
|
||||
}
|
||||
|
||||
func getEnvBoolOrDefault(envKey string, defaultValue bool) bool {
|
||||
value := strings.TrimSpace(os.Getenv(envKey))
|
||||
if value == "" {
|
||||
return defaultValue
|
||||
}
|
||||
value = strings.ToLower(value)
|
||||
if value == "true" || value == "1" {
|
||||
return true
|
||||
} else if value == "false" || value == "0" {
|
||||
return false
|
||||
}
|
||||
return defaultValue
|
||||
}
|
||||
|
||||
func getEnvDurationOrDefault(envKey string, defaultValue time.Duration) time.Duration {
|
||||
v := getEnvStringOrDefault(envKey, "")
|
||||
if v == "" {
|
||||
return defaultValue
|
||||
}
|
||||
t, err := time.ParseDuration(v)
|
||||
if err != nil {
|
||||
return defaultValue
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
func getEnvStringListOrDefault(envKey string, defaultValue []string) []string {
|
||||
value := strings.TrimSpace(os.Getenv(envKey))
|
||||
if value == "" {
|
||||
return defaultValue
|
||||
}
|
||||
l := strings.Split(value, ",")
|
||||
for i := range l {
|
||||
l[i] = strings.TrimSpace(l[i])
|
||||
}
|
||||
return l
|
||||
}
|
||||
|
||||
func getEnvIntOrDefault(envKey string, defaultValue int) int {
|
||||
value := strings.TrimSpace(os.Getenv(envKey))
|
||||
if value == "" {
|
||||
return defaultValue
|
||||
}
|
||||
i, err := strconv.Atoi(value)
|
||||
if err != nil {
|
||||
return defaultValue
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
func getEnvPortOrDefault(envKey string, defaultValue int) int {
|
||||
p := getEnvIntOrDefault(envKey, defaultValue)
|
||||
if p <= 0 || p >= 65534 {
|
||||
return defaultValue
|
||||
}
|
||||
return p
|
||||
}
|
||||
131
back/server/utils.go
Normal file
131
back/server/utils.go
Normal file
@@ -0,0 +1,131 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func getParquetColumns(s Server, path string) []string {
|
||||
query := fmt.Sprintf("DESCRIBE SELECT * FROM '%s';", path)
|
||||
|
||||
rows, err := s.Duckdb.Query(query)
|
||||
if err != nil {
|
||||
return []string{}
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var columns []string
|
||||
|
||||
for rows.Next() {
|
||||
var columnName string
|
||||
var columnType string
|
||||
var nullable string
|
||||
|
||||
var key sql.NullString
|
||||
var defaultValue sql.NullString
|
||||
var extra sql.NullString
|
||||
|
||||
if err := rows.Scan(&columnName, &columnType, &nullable, &key, &defaultValue, &extra); err != nil {
|
||||
return []string{}
|
||||
}
|
||||
columns = append(columns, columnName)
|
||||
}
|
||||
|
||||
if err = rows.Err(); err != nil {
|
||||
return []string{}
|
||||
}
|
||||
|
||||
if len(columns) == 0 {
|
||||
return []string{}
|
||||
}
|
||||
|
||||
return columns
|
||||
}
|
||||
|
||||
func getParquetLength(s Server, path string) uint64 {
|
||||
query := fmt.Sprintf("SELECT COUNT(*) FROM '%s';", path)
|
||||
|
||||
row := s.Duckdb.QueryRow(query)
|
||||
|
||||
var count uint64
|
||||
if err := row.Scan(&count); err != nil {
|
||||
return 0
|
||||
}
|
||||
|
||||
return count
|
||||
}
|
||||
|
||||
// Walk through the given folder and its subfolders to find all parquet files
|
||||
// Return a list of path
|
||||
func getAllParquetFiles(folders []string) []string {
|
||||
var paths []string
|
||||
for _, baseDir := range folders {
|
||||
_ = filepath.Walk(baseDir, func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil || info.IsDir() || !strings.HasSuffix(info.Name(), ".parquet") {
|
||||
return err
|
||||
}
|
||||
paths = append(paths, path)
|
||||
return nil
|
||||
})
|
||||
}
|
||||
return paths
|
||||
}
|
||||
|
||||
func getFileSize(path string) uint64 {
|
||||
info, err := os.Stat(path)
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
return uint64(info.Size() / (1024 * 1024)) // MB
|
||||
}
|
||||
|
||||
func FormatParquetName(path string) string {
|
||||
_, file := filepath.Split(path)
|
||||
fileName := strings.TrimSuffix(file, ".parquet")
|
||||
|
||||
parts := strings.Split(fileName, "-")
|
||||
sourceName := parts[0]
|
||||
var blocks []string
|
||||
|
||||
for _, part := range parts[1:] {
|
||||
if strings.HasPrefix(part, "date_") {
|
||||
dateStr := strings.TrimPrefix(part, "date_")
|
||||
dateStr = strings.ReplaceAll(dateStr, "_", "/")
|
||||
blocks = append(blocks, fmt.Sprintf("date: %s", dateStr))
|
||||
} else if strings.HasPrefix(part, "source_") {
|
||||
sourceStr := strings.TrimPrefix(part, "source_")
|
||||
blocks = append(blocks, fmt.Sprintf("source: %s", sourceStr))
|
||||
} else if strings.HasPrefix(part, "notes_") {
|
||||
noteStr := strings.TrimPrefix(part, "notes_")
|
||||
noteStr = strings.ReplaceAll(noteStr, "_", " ")
|
||||
blocks = append(blocks, noteStr)
|
||||
}
|
||||
}
|
||||
|
||||
sourceName = strings.ReplaceAll(sourceName, "_", " ")
|
||||
sourceWords := strings.Fields(sourceName)
|
||||
for i, word := range sourceWords {
|
||||
if len(word) > 0 {
|
||||
sourceWords[i] = strings.ToUpper(string(word[0])) + word[1:]
|
||||
}
|
||||
}
|
||||
formattedSourceName := strings.Join(sourceWords, " ")
|
||||
|
||||
if len(blocks) > 0 {
|
||||
return fmt.Sprintf("%s (%s)", formattedSourceName, strings.Join(blocks, ", "))
|
||||
}
|
||||
|
||||
return formattedSourceName
|
||||
}
|
||||
|
||||
func createDirectoryIfNotExists(path string) error {
|
||||
if _, err := os.Stat(path); os.IsNotExist(err) {
|
||||
if err := os.MkdirAll(path, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
Reference in New Issue
Block a user