From b51c149488d457e03b7719145bb73be5dd13380a Mon Sep 17 00:00:00 2001 From: Hadi <112569860+anotherhadi@users.noreply.github.com> Date: Sat, 4 Oct 2025 16:39:46 +0200 Subject: [PATCH] take 3 first and 3 last Signed-off-by: Hadi <112569860+anotherhadi@users.noreply.github.com> --- back/search/dataleak/dataleak.go | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/back/search/dataleak/dataleak.go b/back/search/dataleak/dataleak.go index 9874079..e73a8a2 100644 --- a/back/search/dataleak/dataleak.go +++ b/back/search/dataleak/dataleak.go @@ -217,11 +217,20 @@ func getFromClause(s *server.Server) string { } return fmt.Sprintf("read_parquet([%s], union_by_name=true, filename=true)", strings.Join(parquets, ", ")) } - func GetDataleakSample(s server.Server, path string) ([][]string, error) { rowsData := [][]string{} - query := fmt.Sprintf("SELECT * FROM read_parquet('%s') LIMIT 5", path) + // Use row_number() to get first 3 and last 3 rows + query := fmt.Sprintf(` + WITH numbered AS ( + SELECT *, row_number() OVER () AS rn, count(*) OVER () AS total_rows + FROM read_parquet('%s') + ) + SELECT * EXCLUDE (rn, total_rows) + FROM numbered + WHERE rn <= 3 OR rn > total_rows - 3 + `, path) + rows, err := s.Duckdb.Query(query) if err != nil { return rowsData, err @@ -233,7 +242,7 @@ func GetDataleakSample(s server.Server, path string) ([][]string, error) { return rowsData, err } - rowsData = append(rowsData, cols) + rowsData = append(rowsData, cols) // header rawResult := make([][]byte, len(cols)) dest := make([]any, len(cols))