Allow to search on specific folders

Signed-off-by: Hadi <112569860+anotherhadi@users.noreply.github.com>
This commit is contained in:
Hadi
2026-01-06 16:06:27 +01:00
parent 881cdfa9cb
commit 717e4136cd
7 changed files with 90 additions and 17 deletions

View File

@@ -2,6 +2,7 @@ package dataleak
import (
"fmt"
"path/filepath"
"slices"
"strconv"
"strings"
@@ -19,7 +20,7 @@ type LeakResult struct {
LimitHit bool // Whether the search hit the limit
}
func Search(s *server.Server, queryText, column string, exactMatch bool) LeakResult {
func Search(s *server.Server, queryText, column string, exactMatch bool, includeFolders []bool) LeakResult {
if len(*(s.Dataleaks)) == 0 {
return LeakResult{
Inactive: true,
@@ -28,7 +29,11 @@ func Search(s *server.Server, queryText, column string, exactMatch bool) LeakRes
now := time.Now()
result := LeakResult{}
sqlQuery := buildSqlQuery(s, queryText, column, exactMatch)
sqlQuery := buildSqlQuery(s, queryText, column, exactMatch, includeFolders)
if strings.HasPrefix(sqlQuery, "error:") {
result.Error = strings.TrimPrefix(sqlQuery, "error: ")
return result
}
if s.Settings.Debug {
log.Info("New query:", "query", sqlQuery)
@@ -117,7 +122,21 @@ func removeDuplicateMaps(maps []map[string]string) []map[string]string {
return result
}
func buildSqlQuery(s *server.Server, queryText, column string, exactMatch bool) string {
func buildSqlQuery(s *server.Server, queryText, column string, exactMatch bool, includeFolders []bool) string {
folders := s.Settings.Folders
includedFolders := []string{}
for i, f := range folders {
if i >= len(includeFolders) {
break
}
if includeFolders[i] {
includedFolders = append(includedFolders, f)
}
}
if len(includedFolders) == 0 {
return "error: no folders included"
}
// Step 1: Determine candidate columns to search
var candidateColumns []string
if column == "all" || column == "" {
@@ -132,6 +151,9 @@ func buildSqlQuery(s *server.Server, queryText, column string, exactMatch bool)
allColumns := make([]string, 0)
seen := make(map[string]struct{})
for _, dataleak := range *s.Dataleaks {
if !isPathInFolders(dataleak.Path, includedFolders) {
continue
}
for _, col := range dataleak.Columns {
if _, ok := seen[col]; !ok {
seen[col] = struct{}{}
@@ -162,7 +184,7 @@ func buildSqlQuery(s *server.Server, queryText, column string, exactMatch bool)
}
limit := strconv.Itoa(s.Settings.Limit)
from := getFromClause(s)
from := getFromClause(s, includedFolders)
if len(columnsFiltered) == 0 {
return fmt.Sprintf("SELECT * FROM %s LIMIT %s", from, limit)
@@ -210,9 +232,12 @@ func getWhereClause(queryText string, columns []string, exactMatch bool) string
return strings.Join(andClauses, " AND ")
}
func getFromClause(s *server.Server) string {
func getFromClause(s *server.Server, includedFolders []string) string {
parquets := []string{}
for _, dataleak := range *s.Dataleaks {
if !isPathInFolders(dataleak.Path, includedFolders) {
continue
}
parquets = append(parquets, "'"+dataleak.Path+"'")
}
return fmt.Sprintf("read_parquet([%s], union_by_name=true, filename=true)", strings.Join(parquets, ", "))
@@ -272,3 +297,16 @@ func GetDataleakSample(s server.Server, path string) ([][]string, error) {
return rowsData, nil
}
func isPathInFolders(path string, folders []string) bool {
for _, folder := range folders {
rel, err := filepath.Rel(folder, path)
if err != nil {
continue
}
if !strings.HasPrefix(rel, "..") {
return true
}
}
return false
}

View File

@@ -14,9 +14,11 @@ import (
)
type Query struct {
Text string
Column string // The column to search in (e.g., "email", "password", etc.
ExactMatch bool // Whether to search for an exact match
Text string
Column string // The column to search in (e.g., "email", "password", etc.
ExactMatch bool // Whether to search for an exact match
Folders []string
IncludeFolders []bool
// Services
Datawells bool // Whether to include datawells in the search
@@ -53,7 +55,7 @@ func Search(s *server.Server, q Query, r *Result, mu *sync.RWMutex) {
wg.Done()
return
}
leakResult := dataleak.Search(s, q.Text, q.Column, q.ExactMatch)
leakResult := dataleak.Search(s, q.Text, q.Column, q.ExactMatch, q.IncludeFolders)
mu.Lock()
r.LeakResult = leakResult
r.ResultsCount += len(leakResult.Rows)