Allow to search on specific folders

Signed-off-by: Hadi <112569860+anotherhadi@users.noreply.github.com>
This commit is contained in:
Hadi
2026-01-06 16:06:27 +01:00
parent 881cdfa9cb
commit 717e4136cd
7 changed files with 90 additions and 17 deletions

View File

@@ -94,6 +94,7 @@ func routes(s *server.Server, cache *map[string]*search.Result, searchQueue chan
c.JSON(http.StatusBadRequest, gin.H{"Error": "query too short"}) c.JSON(http.StatusBadRequest, gin.H{"Error": "query too short"})
return return
} }
query.Folders = s.Settings.Folders
id := search.EncodeQueryID(query, *s.TotalDataleaks) id := search.EncodeQueryID(query, *s.TotalDataleaks)
s.Mu.RLock() s.Mu.RLock()
_, exists := (*cache)[id] _, exists := (*cache)[id]

View File

@@ -2,6 +2,7 @@ package dataleak
import ( import (
"fmt" "fmt"
"path/filepath"
"slices" "slices"
"strconv" "strconv"
"strings" "strings"
@@ -19,7 +20,7 @@ type LeakResult struct {
LimitHit bool // Whether the search hit the limit LimitHit bool // Whether the search hit the limit
} }
func Search(s *server.Server, queryText, column string, exactMatch bool) LeakResult { func Search(s *server.Server, queryText, column string, exactMatch bool, includeFolders []bool) LeakResult {
if len(*(s.Dataleaks)) == 0 { if len(*(s.Dataleaks)) == 0 {
return LeakResult{ return LeakResult{
Inactive: true, Inactive: true,
@@ -28,7 +29,11 @@ func Search(s *server.Server, queryText, column string, exactMatch bool) LeakRes
now := time.Now() now := time.Now()
result := LeakResult{} result := LeakResult{}
sqlQuery := buildSqlQuery(s, queryText, column, exactMatch) sqlQuery := buildSqlQuery(s, queryText, column, exactMatch, includeFolders)
if strings.HasPrefix(sqlQuery, "error:") {
result.Error = strings.TrimPrefix(sqlQuery, "error: ")
return result
}
if s.Settings.Debug { if s.Settings.Debug {
log.Info("New query:", "query", sqlQuery) log.Info("New query:", "query", sqlQuery)
@@ -117,7 +122,21 @@ func removeDuplicateMaps(maps []map[string]string) []map[string]string {
return result return result
} }
func buildSqlQuery(s *server.Server, queryText, column string, exactMatch bool) string { func buildSqlQuery(s *server.Server, queryText, column string, exactMatch bool, includeFolders []bool) string {
folders := s.Settings.Folders
includedFolders := []string{}
for i, f := range folders {
if i >= len(includeFolders) {
break
}
if includeFolders[i] {
includedFolders = append(includedFolders, f)
}
}
if len(includedFolders) == 0 {
return "error: no folders included"
}
// Step 1: Determine candidate columns to search // Step 1: Determine candidate columns to search
var candidateColumns []string var candidateColumns []string
if column == "all" || column == "" { if column == "all" || column == "" {
@@ -132,6 +151,9 @@ func buildSqlQuery(s *server.Server, queryText, column string, exactMatch bool)
allColumns := make([]string, 0) allColumns := make([]string, 0)
seen := make(map[string]struct{}) seen := make(map[string]struct{})
for _, dataleak := range *s.Dataleaks { for _, dataleak := range *s.Dataleaks {
if !isPathInFolders(dataleak.Path, includedFolders) {
continue
}
for _, col := range dataleak.Columns { for _, col := range dataleak.Columns {
if _, ok := seen[col]; !ok { if _, ok := seen[col]; !ok {
seen[col] = struct{}{} seen[col] = struct{}{}
@@ -162,7 +184,7 @@ func buildSqlQuery(s *server.Server, queryText, column string, exactMatch bool)
} }
limit := strconv.Itoa(s.Settings.Limit) limit := strconv.Itoa(s.Settings.Limit)
from := getFromClause(s) from := getFromClause(s, includedFolders)
if len(columnsFiltered) == 0 { if len(columnsFiltered) == 0 {
return fmt.Sprintf("SELECT * FROM %s LIMIT %s", from, limit) return fmt.Sprintf("SELECT * FROM %s LIMIT %s", from, limit)
@@ -210,9 +232,12 @@ func getWhereClause(queryText string, columns []string, exactMatch bool) string
return strings.Join(andClauses, " AND ") return strings.Join(andClauses, " AND ")
} }
func getFromClause(s *server.Server) string { func getFromClause(s *server.Server, includedFolders []string) string {
parquets := []string{} parquets := []string{}
for _, dataleak := range *s.Dataleaks { for _, dataleak := range *s.Dataleaks {
if !isPathInFolders(dataleak.Path, includedFolders) {
continue
}
parquets = append(parquets, "'"+dataleak.Path+"'") parquets = append(parquets, "'"+dataleak.Path+"'")
} }
return fmt.Sprintf("read_parquet([%s], union_by_name=true, filename=true)", strings.Join(parquets, ", ")) return fmt.Sprintf("read_parquet([%s], union_by_name=true, filename=true)", strings.Join(parquets, ", "))
@@ -272,3 +297,16 @@ func GetDataleakSample(s server.Server, path string) ([][]string, error) {
return rowsData, nil return rowsData, nil
} }
func isPathInFolders(path string, folders []string) bool {
for _, folder := range folders {
rel, err := filepath.Rel(folder, path)
if err != nil {
continue
}
if !strings.HasPrefix(rel, "..") {
return true
}
}
return false
}

View File

@@ -17,6 +17,8 @@ type Query struct {
Text string Text string
Column string // The column to search in (e.g., "email", "password", etc. Column string // The column to search in (e.g., "email", "password", etc.
ExactMatch bool // Whether to search for an exact match ExactMatch bool // Whether to search for an exact match
Folders []string
IncludeFolders []bool
// Services // Services
Datawells bool // Whether to include datawells in the search Datawells bool // Whether to include datawells in the search
@@ -53,7 +55,7 @@ func Search(s *server.Server, q Query, r *Result, mu *sync.RWMutex) {
wg.Done() wg.Done()
return return
} }
leakResult := dataleak.Search(s, q.Text, q.Column, q.ExactMatch) leakResult := dataleak.Search(s, q.Text, q.Column, q.ExactMatch, q.IncludeFolders)
mu.Lock() mu.Lock()
r.LeakResult = leakResult r.LeakResult = leakResult
r.ResultsCount += len(leakResult.Rows) r.ResultsCount += len(leakResult.Rows)

View File

@@ -12,6 +12,8 @@
initialDatawells = true, initialDatawells = true,
initialGithubRecon = true, initialGithubRecon = true,
initialGravatarRecon = true, initialGravatarRecon = true,
folders = [],
initialIncludeFolders = [],
}: { }: {
initialQuery?: string; initialQuery?: string;
initialFilter?: string; initialFilter?: string;
@@ -19,6 +21,8 @@
initialDatawells?: boolean; initialDatawells?: boolean;
initialGithubRecon?: boolean; initialGithubRecon?: boolean;
initialGravatarRecon?: boolean; initialGravatarRecon?: boolean;
folders?: string[];
initialIncludeFolders?: boolean[];
} = $props(); } = $props();
let filters = [ let filters = [
@@ -38,6 +42,16 @@
let datawells = $state<boolean>(initialDatawells); let datawells = $state<boolean>(initialDatawells);
let githubRecon = $state<boolean>(initialGithubRecon); let githubRecon = $state<boolean>(initialGithubRecon);
let gravatarRecon = $state<boolean>(initialGravatarRecon); let gravatarRecon = $state<boolean>(initialGravatarRecon);
let includeFolders = $state<boolean[]>([]);
$effect(() => {
if (folders.length > 0 && includeFolders.length !== folders.length) {
includeFolders =
initialIncludeFolders.length === folders.length
? [...initialIncludeFolders]
: new Array(folders.length).fill(true);
}
});
function NewSearch() { function NewSearch() {
axios axios
@@ -50,6 +64,7 @@
Datawells: datawells, Datawells: datawells,
GithubRecon: githubRecon, GithubRecon: githubRecon,
GravatarRecon: gravatarRecon, GravatarRecon: gravatarRecon,
IncludeFolders: includeFolders,
}, },
{ {
headers: { headers: {
@@ -104,6 +119,23 @@
<input type="checkbox" bind:checked={datawells} class="checkbox" /> <input type="checkbox" bind:checked={datawells} class="checkbox" />
Datawells lookup Datawells lookup
</label> </label>
{#each folders as folder, i}
<label class="label">
{#if includeFolders[i] !== undefined}
<input
type="checkbox"
bind:checked={includeFolders[i]}
class="checkbox checkbox-xs ml-5"
disabled={!datawells}
/>
{/if}
{
folder.split("/")
.filter((part) => part.length > 0)
.pop()
}
</label>
{/each}
</li> </li>
<li> <li>
<label class="label"> <label class="label">
@@ -142,14 +174,10 @@
class="grow input-xl" class="grow input-xl"
type="text" type="text"
bind:value={query} bind:value={query}
placeholder={ placeholder={(activeFilter === "all"
(
activeFilter === "all"
? "Search..." ? "Search..."
: `Search in ${activeFilter.replace("_", " ")}...` : `Search in ${activeFilter.replace("_", " ")}...`) +
)+ ((activeFilter === "phone" && " (e.g. 612233445)") || "")}
(activeFilter === "phone" && " (e.g. 612233445)" || "")
}
required required
/> />

View File

@@ -2,6 +2,8 @@ export type Query = {
Text: string; Text: string;
Column: string; Column: string;
ExactMatch: boolean; ExactMatch: boolean;
Folders: string[];
IncludeFolders: boolean[];
// Services // Services
Datawells: boolean; Datawells: boolean;

View File

@@ -116,6 +116,8 @@
initialDatawells={result.Query.Datawells} initialDatawells={result.Query.Datawells}
initialGithubRecon={result.Query.GithubRecon} initialGithubRecon={result.Query.GithubRecon}
initialGravatarRecon={result.Query.GravatarRecon} initialGravatarRecon={result.Query.GravatarRecon}
folders={result.Query.Folders}
initialIncludeFolders={result.Query.IncludeFolders}
/> />
</header> </header>

View File

@@ -55,7 +55,7 @@
<main> <main>
<header class="flex gap-5 flex-col"> <header class="flex gap-5 flex-col">
<h1 class="h1"><span class="text-2xl align-middle">🔍</span> Search</h1> <h1 class="h1"><span class="text-2xl align-middle">🔍</span> Search</h1>
<Searchbar /> <Searchbar folders={serverInfo?.Settings.Folders} />
</header> </header>
<div class="my-10"></div> <div class="my-10"></div>