From c03b534009b9a7d0bea402751ec19d79a26a5437 Mon Sep 17 00:00:00 2001 From: Hadi <112569860+anotherhadi@users.noreply.github.com> Date: Thu, 2 Oct 2025 17:18:51 +0200 Subject: [PATCH] Remove "full_name" indication, keep only name & *_name Signed-off-by: Hadi <112569860+anotherhadi@users.noreply.github.com> --- back/search/dataleak/dataleak.go | 5 -- back/server/settings.go | 2 +- leak-utils/DATALEAKS-NORMALIZATION.md | 9 +--- leak-utils/parquet/format.go | 66 --------------------------- leak-utils/parquet/parquet.go | 1 - 5 files changed, 3 insertions(+), 80 deletions(-) diff --git a/back/search/dataleak/dataleak.go b/back/search/dataleak/dataleak.go index 6cac1a3..6f1417d 100644 --- a/back/search/dataleak/dataleak.go +++ b/back/search/dataleak/dataleak.go @@ -118,11 +118,6 @@ func removeDuplicateMaps(maps []map[string]string) []map[string]string { } func buildSqlQuery(s *server.Server, queryText, column string, exactMatch bool) string { - // Normalize "name" -> "full_name" - if strings.EqualFold(column, "name") { - column = "full_name" - } - // Step 1: Determine candidate columns to search var candidateColumns []string if column == "all" || column == "" { diff --git a/back/server/settings.go b/back/server/settings.go index 6f3c225..d727efe 100644 --- a/back/server/settings.go +++ b/back/server/settings.go @@ -42,7 +42,7 @@ func LoadServerSettings() ServerSettings { // Dataleaks Folders: getEnvStringListOrDefault("DATALEAKS_FOLDERS", []string{}), CacheFolder: getEnvStringOrDefault("DATALEAKS_CACHE_FOLDER", ""), - BaseColumns: getEnvStringListOrDefault("BASE_COLUMNS", []string{"email", "username", "password", "full_name", "phone", "url"}), + BaseColumns: getEnvStringListOrDefault("BASE_COLUMNS", []string{"email", "username", "password", "name", "phone", "url"}), Limit: getEnvIntOrDefault("LIMIT", 200), ReloadDataleaksInterval: getEnvDurationOrDefault("RELOAD_DATALEAKS_INTERVAL", 20*time.Minute), diff --git a/leak-utils/DATALEAKS-NORMALIZATION.md b/leak-utils/DATALEAKS-NORMALIZATION.md index 3e1ddd7..3451218 100644 --- a/leak-utils/DATALEAKS-NORMALIZATION.md +++ b/leak-utils/DATALEAKS-NORMALIZATION.md @@ -100,7 +100,7 @@ combo_french-notes_crypto.parquet | age | | first_name | | last_name | - | full_name | + | middle_name | | address | | city | | country | @@ -116,17 +116,12 @@ combo_french-notes_crypto.parquet - **Phone**: keep only `[^0-9]` -- **Names**: - - Keep `first_name` / `last_name` if present. - - Generate `full_name = CONCAT(first_name, ' ', last_name)`. - - If only `name` exists, rename it to `full_name`. - - **Passwords**: - Hashes → `password_hash`. - Plaintext → `password`. - Never mix hashes and plaintext in the same column. -- **NULLs**: always use SQL `NULL` (never `""` or `"NULL"`). +- **NULLs**: always use SQL `NULL` (never `""` or `"NULL"`, ...). ## Deduplication diff --git a/leak-utils/parquet/format.go b/leak-utils/parquet/format.go index 1579a03..d540af3 100644 --- a/leak-utils/parquet/format.go +++ b/leak-utils/parquet/format.go @@ -1,75 +1,9 @@ package parquet import ( - "fmt" "strings" - - "github.com/anotherhadi/eleakxir/leak-utils/settings" ) -// If there is no full_name but there is last_name and first_name, create full_name -// If there is no full_name, no last_name or no first_name, but there is name, rename name to full_name -func addFullname(operations []ColumnOperation) []ColumnOperation { - hasFullName := false - hasFirstName := false - hasLastName := false - hasName := false - for _, op := range operations { - if op.Action != "drop" { - if op.NewName == "full_name" { - hasFullName = true - } else if op.NewName == "first_name" { - hasFirstName = true - } else if op.NewName == "last_name" { - hasLastName = true - } else if op.NewName == "name" { - hasName = true - } - } - } - if hasFullName { - return operations - } - if hasFirstName && hasLastName { - operations = append(operations, ColumnOperation{ - OriginalName: "first_name || ' ' || last_name", - NewName: "full_name", - Action: "rename", - }) - fmt.Println(settings.Muted.Render("\nAdding new column 'full_name' as concatenation of 'first_name' and 'last_name'.")) - return operations - } - if hasName { - for i, op := range operations { - if op.NewName == "name" && op.Action != "drop" { - operations[i].NewName = "full_name" - fmt.Println(settings.Muted.Render("\nRenaming column 'name' to 'full_name'.")) - return operations - } - } - } - if hasFirstName { - operations = append(operations, ColumnOperation{ - OriginalName: "first_name", - NewName: "full_name", - Action: "rename", - }) - fmt.Println(settings.Muted.Render("\nAdding new column 'full_name' from 'first_name'.")) - return operations - } - if hasLastName { - operations = append(operations, ColumnOperation{ - OriginalName: "last_name", - NewName: "full_name", - Action: "rename", - }) - fmt.Println(settings.Muted.Render("\nAdding new column 'full_name' from 'last_name'.")) - return operations - } - - return operations -} - // formatColumnName formats a column name to be SQL-compliant. func formatColumnName(columnName string) string { columnName = strings.TrimSpace(columnName) diff --git a/leak-utils/parquet/parquet.go b/leak-utils/parquet/parquet.go index af6b7c1..e76b969 100644 --- a/leak-utils/parquet/parquet.go +++ b/leak-utils/parquet/parquet.go @@ -182,7 +182,6 @@ func configureColumns(input Parquet, skipLineFormating bool) []ColumnOperation { if !skipLineFormating { operations = formatColumns(operations) } - operations = addFullname(operations) return operations }