266 lines
6.7 KiB
Go
266 lines
6.7 KiB
Go
package dataleak
|
|
|
|
import (
|
|
"fmt"
|
|
"slices"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/anotherhadi/eleakxir/backend/server"
|
|
"github.com/charmbracelet/log"
|
|
)
|
|
|
|
type LeakResult struct {
|
|
Duration time.Duration
|
|
Rows []map[string]string
|
|
Error string
|
|
Inactive bool
|
|
LimitHit bool // Whether the search hit the limit
|
|
}
|
|
|
|
func Search(s *server.Server, queryText, column string, exactMatch bool) LeakResult {
|
|
if len(*(s.Dataleaks)) == 0 {
|
|
return LeakResult{
|
|
Inactive: true,
|
|
}
|
|
}
|
|
now := time.Now()
|
|
result := LeakResult{}
|
|
|
|
sqlQuery := buildSqlQuery(s, queryText, column, exactMatch)
|
|
|
|
if s.Settings.Debug {
|
|
log.Info("New query:", "query", sqlQuery)
|
|
}
|
|
rows, err := s.Duckdb.Query(sqlQuery)
|
|
if err != nil {
|
|
result.Error = err.Error()
|
|
return result
|
|
}
|
|
defer rows.Close()
|
|
|
|
cols, err := rows.Columns()
|
|
if err != nil {
|
|
result.Error = err.Error()
|
|
return result
|
|
}
|
|
|
|
rawResult := make([][]byte, len(cols))
|
|
dest := make([]any, len(cols))
|
|
for i := range rawResult {
|
|
dest[i] = &rawResult[i]
|
|
}
|
|
|
|
for rows.Next() {
|
|
err := rows.Scan(dest...)
|
|
if err != nil {
|
|
result.Error = err.Error()
|
|
return result
|
|
}
|
|
|
|
rowMap := make(map[string]string)
|
|
for i, colName := range cols {
|
|
if rawResult[i] == nil || colName == "" {
|
|
continue
|
|
}
|
|
if colName == "filename" {
|
|
rowMap["source"] = server.FormatParquetName(string(rawResult[i]))
|
|
continue
|
|
}
|
|
rowMap[colName] = string(rawResult[i])
|
|
}
|
|
result.Rows = append(result.Rows, rowMap)
|
|
}
|
|
|
|
if err = rows.Err(); err != nil {
|
|
result.Error = err.Error()
|
|
return result
|
|
}
|
|
|
|
if len(result.Rows) >= s.Settings.Limit {
|
|
result.LimitHit = true
|
|
}
|
|
|
|
result.Rows = removeDuplicateMaps(result.Rows)
|
|
|
|
result.Duration = time.Since(now)
|
|
return result
|
|
}
|
|
|
|
func removeDuplicateMaps(maps []map[string]string) []map[string]string {
|
|
seen := make(map[string]struct{})
|
|
result := []map[string]string{}
|
|
|
|
for _, m := range maps {
|
|
// Create a unique key for the map by concatenating its key-value pairs
|
|
var sb strings.Builder
|
|
keys := make([]string, 0, len(m))
|
|
for k := range m {
|
|
keys = append(keys, k)
|
|
}
|
|
slices.Sort(keys) // Sort keys to ensure consistent order
|
|
for _, k := range keys {
|
|
sb.WriteString(k)
|
|
sb.WriteString("=")
|
|
sb.WriteString(m[k])
|
|
sb.WriteString(";")
|
|
}
|
|
key := sb.String()
|
|
|
|
if _, exists := seen[key]; !exists {
|
|
seen[key] = struct{}{}
|
|
result = append(result, m)
|
|
}
|
|
}
|
|
|
|
return result
|
|
}
|
|
|
|
func buildSqlQuery(s *server.Server, queryText, column string, exactMatch bool) string {
|
|
// Step 1: Determine candidate columns to search
|
|
var candidateColumns []string
|
|
if column == "all" || column == "" {
|
|
// Use base columns if "all" or empty
|
|
candidateColumns = s.Settings.BaseColumns
|
|
} else {
|
|
// Otherwise, only search the given column
|
|
candidateColumns = []string{column}
|
|
}
|
|
|
|
// Step 2: Collect all available columns across dataleaks
|
|
allColumns := make([]string, 0)
|
|
seen := make(map[string]struct{})
|
|
for _, dataleak := range *s.Dataleaks {
|
|
for _, col := range dataleak.Columns {
|
|
if _, ok := seen[col]; !ok {
|
|
seen[col] = struct{}{}
|
|
allColumns = append(allColumns, col)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Step 3: Resolve which columns should actually be used in the WHERE clause
|
|
var columnsFiltered []string
|
|
if strings.EqualFold(column, "full_text") {
|
|
// "full_text" means search across all columns
|
|
columnsFiltered = allColumns
|
|
} else {
|
|
for _, candidate := range candidateColumns {
|
|
for _, available := range allColumns {
|
|
// Exact match (case-insensitive)
|
|
if strings.EqualFold(available, candidate) {
|
|
columnsFiltered = append(columnsFiltered, available)
|
|
continue
|
|
}
|
|
// Match columns ending with "_<candidate>"
|
|
if strings.HasSuffix(strings.ToLower(available), "_"+strings.ToLower(candidate)) {
|
|
columnsFiltered = append(columnsFiltered, available)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
limit := strconv.Itoa(s.Settings.Limit)
|
|
from := getFromClause(s)
|
|
|
|
if len(columnsFiltered) == 0 {
|
|
return fmt.Sprintf("SELECT * FROM %s LIMIT %s", from, limit)
|
|
}
|
|
|
|
where := getWhereClause(queryText, columnsFiltered, exactMatch)
|
|
|
|
return fmt.Sprintf("SELECT * FROM %s WHERE %s LIMIT %s", from, where, limit)
|
|
}
|
|
|
|
func getWhereClause(queryText string, columns []string, exactMatch bool) string {
|
|
terms := strings.Fields(queryText)
|
|
var andClauses []string
|
|
|
|
for _, term := range terms {
|
|
var orClausesForTerm []string
|
|
termEscaped := strings.ReplaceAll(term, "'", "''")
|
|
|
|
startsWith := false
|
|
endsWith := false
|
|
if strings.HasPrefix(termEscaped, "^") {
|
|
startsWith = true
|
|
termEscaped = strings.TrimPrefix(termEscaped, "^")
|
|
}
|
|
if strings.HasSuffix(termEscaped, "$") {
|
|
endsWith = true
|
|
termEscaped = strings.TrimSuffix(termEscaped, "$")
|
|
}
|
|
|
|
termEscapedILike := strings.ReplaceAll(termEscaped, "_", "\\_")
|
|
termEscapedILike = strings.ReplaceAll(termEscapedILike, "%", "\\%")
|
|
for _, col := range columns {
|
|
if exactMatch || (startsWith && endsWith) {
|
|
orClausesForTerm = append(orClausesForTerm, fmt.Sprintf("\"%s\" ILIKE '%s' ESCAPE '\\'", col, strings.ToLower(termEscapedILike)))
|
|
} else if startsWith {
|
|
orClausesForTerm = append(orClausesForTerm, fmt.Sprintf("\"%s\" ILIKE '%s%%' ESCAPE '\\'", col, strings.ToLower(termEscapedILike)))
|
|
} else if endsWith {
|
|
orClausesForTerm = append(orClausesForTerm, fmt.Sprintf("\"%s\" ILIKE '%%%s' ESCAPE '\\'", col, strings.ToLower(termEscapedILike)))
|
|
} else {
|
|
orClausesForTerm = append(orClausesForTerm, fmt.Sprintf("\"%s\" ILIKE '%%%s%%' ESCAPE '\\'", col, strings.ToLower(termEscapedILike)))
|
|
}
|
|
}
|
|
andClauses = append(andClauses, "("+strings.Join(orClausesForTerm, " OR ")+")")
|
|
}
|
|
return strings.Join(andClauses, " AND ")
|
|
}
|
|
|
|
func getFromClause(s *server.Server) string {
|
|
parquets := []string{}
|
|
for _, dataleak := range *s.Dataleaks {
|
|
parquets = append(parquets, "'"+dataleak.Path+"'")
|
|
}
|
|
return fmt.Sprintf("read_parquet([%s], union_by_name=true, filename=true)", strings.Join(parquets, ", "))
|
|
}
|
|
|
|
func GetDataleakSample(s server.Server, path string) ([][]string, error) {
|
|
rowsData := [][]string{}
|
|
|
|
query := fmt.Sprintf("SELECT * FROM read_parquet('%s') LIMIT 5", path)
|
|
rows, err := s.Duckdb.Query(query)
|
|
if err != nil {
|
|
return rowsData, err
|
|
}
|
|
defer rows.Close()
|
|
|
|
cols, err := rows.Columns()
|
|
if err != nil {
|
|
return rowsData, err
|
|
}
|
|
|
|
rowsData = append(rowsData, cols)
|
|
|
|
rawResult := make([][]byte, len(cols))
|
|
dest := make([]any, len(cols))
|
|
for i := range rawResult {
|
|
dest[i] = &rawResult[i]
|
|
}
|
|
|
|
for rows.Next() {
|
|
if err := rows.Scan(dest...); err != nil {
|
|
return rowsData, err
|
|
}
|
|
|
|
row := make([]string, len(cols))
|
|
for i := range cols {
|
|
if rawResult[i] == nil {
|
|
row[i] = ""
|
|
} else {
|
|
row[i] = string(rawResult[i])
|
|
}
|
|
}
|
|
rowsData = append(rowsData, row)
|
|
}
|
|
|
|
if err = rows.Err(); err != nil {
|
|
return rowsData, err
|
|
}
|
|
|
|
return rowsData, nil
|
|
}
|