package dataleak import ( "fmt" "slices" "strconv" "strings" "time" "github.com/anotherhadi/eleakxir/backend/server" "github.com/charmbracelet/log" ) type LeakResult struct { Duration time.Duration Rows []map[string]string Error string } func Search(s *server.Server, queryText, column string, exactMatch bool) LeakResult { if len(*(s.Dataleaks)) == 0 { return LeakResult{ Error: "No dataleak configured", } } now := time.Now() result := LeakResult{} sqlQuery := buildSqlQuery(s, queryText, column, exactMatch) if s.Settings.Debug { log.Info("New query:", "query", sqlQuery) } rows, err := s.Duckdb.Query(sqlQuery) if err != nil { result.Error = err.Error() return result } defer rows.Close() cols, err := rows.Columns() if err != nil { result.Error = err.Error() return result } rawResult := make([][]byte, len(cols)) dest := make([]any, len(cols)) for i := range rawResult { dest[i] = &rawResult[i] } for rows.Next() { err := rows.Scan(dest...) if err != nil { result.Error = err.Error() return result } rowMap := make(map[string]string) for i, colName := range cols { if rawResult[i] == nil || colName == "" { continue } if colName == "filename" { rowMap["source"] = server.FormatParquetName(string(rawResult[i])) continue } rowMap[colName] = string(rawResult[i]) } result.Rows = append(result.Rows, rowMap) } if err = rows.Err(); err != nil { result.Error = err.Error() return result } result.Rows = removeDuplicateMaps(result.Rows) result.Duration = time.Since(now) return result } func removeDuplicateMaps(maps []map[string]string) []map[string]string { seen := make(map[string]struct{}) result := []map[string]string{} for _, m := range maps { // Create a unique key for the map by concatenating its key-value pairs var sb strings.Builder keys := make([]string, 0, len(m)) for k := range m { keys = append(keys, k) } slices.Sort(keys) // Sort keys to ensure consistent order for _, k := range keys { sb.WriteString(k) sb.WriteString("=") sb.WriteString(m[k]) sb.WriteString(";") } key := sb.String() if _, exists := seen[key]; !exists { seen[key] = struct{}{} result = append(result, m) } } return result } func buildSqlQuery(s *server.Server, queryText, column string, exactMatch bool) string { limit := strconv.Itoa(s.Settings.Limit) from := getFromClause(s) if column == "name" { column = "full_name" } columns := []string{column} if column == "all" || column == "" { columns = s.Settings.BaseColumns } columnsFiltered := []string{} allColumns := []string{} // TODO: Add columns that ends with _col aswell for _, dataleak := range *s.Dataleaks { for _, col := range dataleak.Columns { if !slices.Contains(allColumns, col) { allColumns = append(allColumns, col) } } } if column == "full_text" { columnsFiltered = allColumns } else { for _, col := range columns { if slices.Contains(allColumns, col) { columnsFiltered = append(columnsFiltered, col) } } } if len(columnsFiltered) == 0 { return fmt.Sprintf("SELECT * FROM %s LIMIT %s", from, limit) } where := getWhereClause(queryText, columnsFiltered, exactMatch) return fmt.Sprintf("SELECT * FROM %s WHERE %s LIMIT %s", from, where, limit) } func getWhereClause(queryText string, columns []string, exactMatch bool) string { terms := strings.Fields(queryText) var andClauses []string for _, term := range terms { var orClausesForTerm []string termEscaped := strings.ReplaceAll(term, "'", "''") for _, col := range columns { if exactMatch { termEscapedILike := strings.ReplaceAll(termEscaped, "_", "\\_") termEscapedILike = strings.ReplaceAll(termEscapedILike, "%", "\\%") orClausesForTerm = append(orClausesForTerm, fmt.Sprintf("\"%s\" ILIKE '%s' ESCAPE '\\'", col, strings.ToLower(termEscapedILike))) } else { // Escape characters for ILIKE termEscapedILike := strings.ReplaceAll(termEscaped, "_", "\\_") termEscapedILike = strings.ReplaceAll(termEscapedILike, "%", "\\%") orClausesForTerm = append(orClausesForTerm, fmt.Sprintf("\"%s\" ILIKE '%%%s%%' ESCAPE '\\'", col, strings.ToLower(termEscapedILike))) } } andClauses = append(andClauses, "("+strings.Join(orClausesForTerm, " OR ")+")") } return strings.Join(andClauses, " AND ") } func getFromClause(s *server.Server) string { parquets := []string{} for _, dataleak := range *s.Dataleaks { parquets = append(parquets, "'"+dataleak.Path+"'") } return fmt.Sprintf("read_parquet([%s], union_by_name=true, filename=true)", strings.Join(parquets, ", ")) } func castAllColumns(cols []string) []string { casted := make([]string, len(cols)) for i, col := range cols { casted[i] = fmt.Sprintf("cast(%s as text)", col) } return casted }