Change suggestion strategy

Signed-off-by: Hadi <112569860+anotherhadi@users.noreply.github.com>
This commit is contained in:
Hadi
2025-09-24 19:54:32 +02:00
parent 979b395c56
commit 6dc672fcc7
4 changed files with 68 additions and 43 deletions

View File

@@ -7,6 +7,7 @@ require (
github.com/charmbracelet/log v0.4.2 github.com/charmbracelet/log v0.4.2
github.com/marcboeker/go-duckdb/v2 v2.4.0 github.com/marcboeker/go-duckdb/v2 v2.4.0
github.com/spf13/pflag v1.0.10 github.com/spf13/pflag v1.0.10
golang.org/x/text v0.28.0
) )
require ( require (

View File

@@ -100,6 +100,8 @@ golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI= golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI=
golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng=
golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU=
golang.org/x/tools v0.36.0 h1:kWS0uv/zsvHEle1LbV5LE8QujrxB3wfQyxHfhOk0Qkg= golang.org/x/tools v0.36.0 h1:kWS0uv/zsvHEle1LbV5LE8QujrxB3wfQyxHfhOk0Qkg=
golang.org/x/tools v0.36.0/go.mod h1:WBDiHKJK8YgLHlcQPYQzNCkUxUypCaa5ZegCVutKm+s= golang.org/x/tools v0.36.0/go.mod h1:WBDiHKJK8YgLHlcQPYQzNCkUxUypCaa5ZegCVutKm+s=
golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da h1:noIWHXmPHxILtqtCOPIhSt0ABwskkZKjD3bXGnZGpNY= golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da h1:noIWHXmPHxILtqtCOPIhSt0ABwskkZKjD3bXGnZGpNY=

View File

@@ -1,15 +1,20 @@
package parquet package parquet
import ( import (
"regexp"
"slices" "slices"
"strings"
"unicode"
"golang.org/x/text/unicode/norm"
) )
func getSuggestion(col string) string { var (
col = formatColumnName(col) knownColumnNames = []string{
knownNames := []string{
"date", "date",
"phone", "phone",
"username", "username",
"iban",
"address", "address",
"email", "email",
"postal_code", "postal_code",
@@ -28,47 +33,49 @@ func getSuggestion(col string) string {
"url", "url",
"ip", "ip",
} }
if slices.Contains(knownNames, col) {
return col suggestions = map[string]string{
"user": "username",
"login": "username",
"sex": "gender",
"genre": "gender",
"ipaddress": "ip",
"firstname": "first_name",
"prenom": "first_name",
"lastname": "last_name",
"nom": "last_name",
"fullname": "full_name",
"nomcomplet": "full_name",
"adresse": "address",
"streetaddress": "address",
"ville": "city",
"pays": "country",
"mail": "email",
"zip": "postal_code",
"postalcode": "postal_code",
"zipcode": "postal_code",
"postal": "postal_code",
"codepostal": "postal_code",
"hash": "password_hash",
"hashedpassword": "password_hash",
"hashpassword": "password_hash",
"passwordhashed": "password_hash",
"birthdate": "birth_date",
"dob": "birth_date",
"dateofbirth": "birth_date",
} }
if col == "user" { )
return "username"
func getSuggestion(col string) string {
colFormated := formatColumnName(col)
if slices.Contains(knownColumnNames, colFormated) {
return colFormated
} }
if col == "login" {
return "username" col = cleanString(col)
}
if col == "sex" { if val, ok := suggestions[col]; ok {
return "gender" return val
}
if col == "ip_address" {
return "ip"
}
if col == "password_hashed" {
return "password_hash"
}
if col == "firstname" {
return "first_name"
}
if col == "lastname" {
return "last_name"
}
if col == "fullname" {
return "full_name"
}
if col == "mail" {
return "email"
}
if col == "zip" || col == "postalcode" || col == "zipcode" || col == "postal" || col == "zip_code" {
return "postal_code"
}
if col == "street_address" {
return "address"
}
if col == "hash" || col == "hashed_password" || col == "hash_password" {
return "password_hash"
}
if col == "birthdate" || col == "dob" || col == "date_of_birth" {
return "birth_date"
} }
return "" return ""
@@ -79,3 +86,18 @@ func getSuggestion(col string) string {
// url: _url, link // url: _url, link
// address: _address // address: _address
// //
func cleanString(input string) string {
t := norm.NFD.String(input)
var sb strings.Builder
for _, r := range t {
if unicode.Is(unicode.Mn, r) {
continue
}
sb.WriteRune(r)
}
s := strings.ToLower(sb.String())
reg, _ := regexp.Compile("[^a-z]+")
s = reg.ReplaceAllString(s, "")
return s
}

View File

@@ -9,7 +9,7 @@
pname = name; pname = name;
version = "0.1.0"; version = "0.1.0";
src = ../leak-utils; src = ../leak-utils;
vendorHash = "sha256-rTfbXCiwv/+tVXZmgztt088Zhz0OQaVTfvxXVzw4o4Q="; vendorHash = "sha256-qgDqmEgL7B8FvoKNwLG0buLmg9Yt54cyWwmXBifgr/g=";
buildInputs = [ buildInputs = [
pkgs.duckdb pkgs.duckdb