Remove "full_name" indication, keep only name & *_name

Signed-off-by: Hadi <112569860+anotherhadi@users.noreply.github.com>
This commit is contained in:
Hadi
2025-10-02 17:18:51 +02:00
parent e4b2edeae2
commit c03b534009
5 changed files with 3 additions and 80 deletions

View File

@@ -100,7 +100,7 @@ combo_french-notes_crypto.parquet
| age |
| first_name |
| last_name |
| full_name |
| middle_name |
| address |
| city |
| country |
@@ -116,17 +116,12 @@ combo_french-notes_crypto.parquet
- **Phone**: keep only `[^0-9]`
- **Names**:
- Keep `first_name` / `last_name` if present.
- Generate `full_name = CONCAT(first_name, ' ', last_name)`.
- If only `name` exists, rename it to `full_name`.
- **Passwords**:
- Hashes → `password_hash`.
- Plaintext → `password`.
- Never mix hashes and plaintext in the same column.
- **NULLs**: always use SQL `NULL` (never `""` or `"NULL"`).
- **NULLs**: always use SQL `NULL` (never `""` or `"NULL"`, ...).
## Deduplication

View File

@@ -1,75 +1,9 @@
package parquet
import (
"fmt"
"strings"
"github.com/anotherhadi/eleakxir/leak-utils/settings"
)
// If there is no full_name but there is last_name and first_name, create full_name
// If there is no full_name, no last_name or no first_name, but there is name, rename name to full_name
func addFullname(operations []ColumnOperation) []ColumnOperation {
hasFullName := false
hasFirstName := false
hasLastName := false
hasName := false
for _, op := range operations {
if op.Action != "drop" {
if op.NewName == "full_name" {
hasFullName = true
} else if op.NewName == "first_name" {
hasFirstName = true
} else if op.NewName == "last_name" {
hasLastName = true
} else if op.NewName == "name" {
hasName = true
}
}
}
if hasFullName {
return operations
}
if hasFirstName && hasLastName {
operations = append(operations, ColumnOperation{
OriginalName: "first_name || ' ' || last_name",
NewName: "full_name",
Action: "rename",
})
fmt.Println(settings.Muted.Render("\nAdding new column 'full_name' as concatenation of 'first_name' and 'last_name'."))
return operations
}
if hasName {
for i, op := range operations {
if op.NewName == "name" && op.Action != "drop" {
operations[i].NewName = "full_name"
fmt.Println(settings.Muted.Render("\nRenaming column 'name' to 'full_name'."))
return operations
}
}
}
if hasFirstName {
operations = append(operations, ColumnOperation{
OriginalName: "first_name",
NewName: "full_name",
Action: "rename",
})
fmt.Println(settings.Muted.Render("\nAdding new column 'full_name' from 'first_name'."))
return operations
}
if hasLastName {
operations = append(operations, ColumnOperation{
OriginalName: "last_name",
NewName: "full_name",
Action: "rename",
})
fmt.Println(settings.Muted.Render("\nAdding new column 'full_name' from 'last_name'."))
return operations
}
return operations
}
// formatColumnName formats a column name to be SQL-compliant.
func formatColumnName(columnName string) string {
columnName = strings.TrimSpace(columnName)

View File

@@ -182,7 +182,6 @@ func configureColumns(input Parquet, skipLineFormating bool) []ColumnOperation {
if !skipLineFormating {
operations = formatColumns(operations)
}
operations = addFullname(operations)
return operations
}