diff --git a/back/api/api.go b/back/api/api.go index 7759622..01d166b 100644 --- a/back/api/api.go +++ b/back/api/api.go @@ -94,6 +94,7 @@ func routes(s *server.Server, cache *map[string]*search.Result, searchQueue chan c.JSON(http.StatusBadRequest, gin.H{"Error": "query too short"}) return } + query.Folders = s.Settings.Folders id := search.EncodeQueryID(query, *s.TotalDataleaks) s.Mu.RLock() _, exists := (*cache)[id] diff --git a/back/search/dataleak/dataleak.go b/back/search/dataleak/dataleak.go index e73a8a2..22b4e7b 100644 --- a/back/search/dataleak/dataleak.go +++ b/back/search/dataleak/dataleak.go @@ -2,6 +2,7 @@ package dataleak import ( "fmt" + "path/filepath" "slices" "strconv" "strings" @@ -19,7 +20,7 @@ type LeakResult struct { LimitHit bool // Whether the search hit the limit } -func Search(s *server.Server, queryText, column string, exactMatch bool) LeakResult { +func Search(s *server.Server, queryText, column string, exactMatch bool, includeFolders []bool) LeakResult { if len(*(s.Dataleaks)) == 0 { return LeakResult{ Inactive: true, @@ -28,7 +29,11 @@ func Search(s *server.Server, queryText, column string, exactMatch bool) LeakRes now := time.Now() result := LeakResult{} - sqlQuery := buildSqlQuery(s, queryText, column, exactMatch) + sqlQuery := buildSqlQuery(s, queryText, column, exactMatch, includeFolders) + if strings.HasPrefix(sqlQuery, "error:") { + result.Error = strings.TrimPrefix(sqlQuery, "error: ") + return result + } if s.Settings.Debug { log.Info("New query:", "query", sqlQuery) @@ -117,7 +122,21 @@ func removeDuplicateMaps(maps []map[string]string) []map[string]string { return result } -func buildSqlQuery(s *server.Server, queryText, column string, exactMatch bool) string { +func buildSqlQuery(s *server.Server, queryText, column string, exactMatch bool, includeFolders []bool) string { + folders := s.Settings.Folders + includedFolders := []string{} + for i, f := range folders { + if i >= len(includeFolders) { + break + } + if includeFolders[i] { + includedFolders = append(includedFolders, f) + } + } + if len(includedFolders) == 0 { + return "error: no folders included" + } + // Step 1: Determine candidate columns to search var candidateColumns []string if column == "all" || column == "" { @@ -132,6 +151,9 @@ func buildSqlQuery(s *server.Server, queryText, column string, exactMatch bool) allColumns := make([]string, 0) seen := make(map[string]struct{}) for _, dataleak := range *s.Dataleaks { + if !isPathInFolders(dataleak.Path, includedFolders) { + continue + } for _, col := range dataleak.Columns { if _, ok := seen[col]; !ok { seen[col] = struct{}{} @@ -162,7 +184,7 @@ func buildSqlQuery(s *server.Server, queryText, column string, exactMatch bool) } limit := strconv.Itoa(s.Settings.Limit) - from := getFromClause(s) + from := getFromClause(s, includedFolders) if len(columnsFiltered) == 0 { return fmt.Sprintf("SELECT * FROM %s LIMIT %s", from, limit) @@ -210,9 +232,12 @@ func getWhereClause(queryText string, columns []string, exactMatch bool) string return strings.Join(andClauses, " AND ") } -func getFromClause(s *server.Server) string { +func getFromClause(s *server.Server, includedFolders []string) string { parquets := []string{} for _, dataleak := range *s.Dataleaks { + if !isPathInFolders(dataleak.Path, includedFolders) { + continue + } parquets = append(parquets, "'"+dataleak.Path+"'") } return fmt.Sprintf("read_parquet([%s], union_by_name=true, filename=true)", strings.Join(parquets, ", ")) @@ -272,3 +297,16 @@ func GetDataleakSample(s server.Server, path string) ([][]string, error) { return rowsData, nil } + +func isPathInFolders(path string, folders []string) bool { + for _, folder := range folders { + rel, err := filepath.Rel(folder, path) + if err != nil { + continue + } + if !strings.HasPrefix(rel, "..") { + return true + } + } + return false +} diff --git a/back/search/search.go b/back/search/search.go index 4040010..ba1c03e 100644 --- a/back/search/search.go +++ b/back/search/search.go @@ -14,9 +14,11 @@ import ( ) type Query struct { - Text string - Column string // The column to search in (e.g., "email", "password", etc. - ExactMatch bool // Whether to search for an exact match + Text string + Column string // The column to search in (e.g., "email", "password", etc. + ExactMatch bool // Whether to search for an exact match + Folders []string + IncludeFolders []bool // Services Datawells bool // Whether to include datawells in the search @@ -53,7 +55,7 @@ func Search(s *server.Server, q Query, r *Result, mu *sync.RWMutex) { wg.Done() return } - leakResult := dataleak.Search(s, q.Text, q.Column, q.ExactMatch) + leakResult := dataleak.Search(s, q.Text, q.Column, q.ExactMatch, q.IncludeFolders) mu.Lock() r.LeakResult = leakResult r.ResultsCount += len(leakResult.Rows) diff --git a/front/src/lib/components/index/search/searchbar.svelte b/front/src/lib/components/index/search/searchbar.svelte index 91f4167..435b94f 100644 --- a/front/src/lib/components/index/search/searchbar.svelte +++ b/front/src/lib/components/index/search/searchbar.svelte @@ -12,6 +12,8 @@ initialDatawells = true, initialGithubRecon = true, initialGravatarRecon = true, + folders = [], + initialIncludeFolders = [], }: { initialQuery?: string; initialFilter?: string; @@ -19,6 +21,8 @@ initialDatawells?: boolean; initialGithubRecon?: boolean; initialGravatarRecon?: boolean; + folders?: string[]; + initialIncludeFolders?: boolean[]; } = $props(); let filters = [ @@ -38,6 +42,16 @@ let datawells = $state(initialDatawells); let githubRecon = $state(initialGithubRecon); let gravatarRecon = $state(initialGravatarRecon); + let includeFolders = $state([]); + + $effect(() => { + if (folders.length > 0 && includeFolders.length !== folders.length) { + includeFolders = + initialIncludeFolders.length === folders.length + ? [...initialIncludeFolders] + : new Array(folders.length).fill(true); + } + }); function NewSearch() { axios @@ -50,6 +64,7 @@ Datawells: datawells, GithubRecon: githubRecon, GravatarRecon: gravatarRecon, + IncludeFolders: includeFolders, }, { headers: { @@ -104,6 +119,23 @@ Datawells lookup + {#each folders as folder, i} + + {/each}