Change suggestion strategy
Signed-off-by: Hadi <112569860+anotherhadi@users.noreply.github.com>
This commit is contained in:
@@ -7,6 +7,7 @@ require (
|
||||
github.com/charmbracelet/log v0.4.2
|
||||
github.com/marcboeker/go-duckdb/v2 v2.4.0
|
||||
github.com/spf13/pflag v1.0.10
|
||||
golang.org/x/text v0.28.0
|
||||
)
|
||||
|
||||
require (
|
||||
|
||||
@@ -100,6 +100,8 @@ golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
|
||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI=
|
||||
golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
|
||||
golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng=
|
||||
golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU=
|
||||
golang.org/x/tools v0.36.0 h1:kWS0uv/zsvHEle1LbV5LE8QujrxB3wfQyxHfhOk0Qkg=
|
||||
golang.org/x/tools v0.36.0/go.mod h1:WBDiHKJK8YgLHlcQPYQzNCkUxUypCaa5ZegCVutKm+s=
|
||||
golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da h1:noIWHXmPHxILtqtCOPIhSt0ABwskkZKjD3bXGnZGpNY=
|
||||
|
||||
@@ -1,15 +1,20 @@
|
||||
package parquet
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"slices"
|
||||
"strings"
|
||||
"unicode"
|
||||
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
|
||||
func getSuggestion(col string) string {
|
||||
col = formatColumnName(col)
|
||||
knownNames := []string{
|
||||
var (
|
||||
knownColumnNames = []string{
|
||||
"date",
|
||||
"phone",
|
||||
"username",
|
||||
"iban",
|
||||
"address",
|
||||
"email",
|
||||
"postal_code",
|
||||
@@ -28,47 +33,49 @@ func getSuggestion(col string) string {
|
||||
"url",
|
||||
"ip",
|
||||
}
|
||||
if slices.Contains(knownNames, col) {
|
||||
return col
|
||||
|
||||
suggestions = map[string]string{
|
||||
"user": "username",
|
||||
"login": "username",
|
||||
"sex": "gender",
|
||||
"genre": "gender",
|
||||
"ipaddress": "ip",
|
||||
"firstname": "first_name",
|
||||
"prenom": "first_name",
|
||||
"lastname": "last_name",
|
||||
"nom": "last_name",
|
||||
"fullname": "full_name",
|
||||
"nomcomplet": "full_name",
|
||||
"adresse": "address",
|
||||
"streetaddress": "address",
|
||||
"ville": "city",
|
||||
"pays": "country",
|
||||
"mail": "email",
|
||||
"zip": "postal_code",
|
||||
"postalcode": "postal_code",
|
||||
"zipcode": "postal_code",
|
||||
"postal": "postal_code",
|
||||
"codepostal": "postal_code",
|
||||
"hash": "password_hash",
|
||||
"hashedpassword": "password_hash",
|
||||
"hashpassword": "password_hash",
|
||||
"passwordhashed": "password_hash",
|
||||
"birthdate": "birth_date",
|
||||
"dob": "birth_date",
|
||||
"dateofbirth": "birth_date",
|
||||
}
|
||||
if col == "user" {
|
||||
return "username"
|
||||
)
|
||||
|
||||
func getSuggestion(col string) string {
|
||||
colFormated := formatColumnName(col)
|
||||
if slices.Contains(knownColumnNames, colFormated) {
|
||||
return colFormated
|
||||
}
|
||||
if col == "login" {
|
||||
return "username"
|
||||
}
|
||||
if col == "sex" {
|
||||
return "gender"
|
||||
}
|
||||
if col == "ip_address" {
|
||||
return "ip"
|
||||
}
|
||||
if col == "password_hashed" {
|
||||
return "password_hash"
|
||||
}
|
||||
if col == "firstname" {
|
||||
return "first_name"
|
||||
}
|
||||
if col == "lastname" {
|
||||
return "last_name"
|
||||
}
|
||||
if col == "fullname" {
|
||||
return "full_name"
|
||||
}
|
||||
if col == "mail" {
|
||||
return "email"
|
||||
}
|
||||
if col == "zip" || col == "postalcode" || col == "zipcode" || col == "postal" || col == "zip_code" {
|
||||
return "postal_code"
|
||||
}
|
||||
if col == "street_address" {
|
||||
return "address"
|
||||
}
|
||||
if col == "hash" || col == "hashed_password" || col == "hash_password" {
|
||||
return "password_hash"
|
||||
}
|
||||
if col == "birthdate" || col == "dob" || col == "date_of_birth" {
|
||||
return "birth_date"
|
||||
|
||||
col = cleanString(col)
|
||||
|
||||
if val, ok := suggestions[col]; ok {
|
||||
return val
|
||||
}
|
||||
|
||||
return ""
|
||||
@@ -79,3 +86,18 @@ func getSuggestion(col string) string {
|
||||
// url: _url, link
|
||||
// address: _address
|
||||
//
|
||||
|
||||
func cleanString(input string) string {
|
||||
t := norm.NFD.String(input)
|
||||
var sb strings.Builder
|
||||
for _, r := range t {
|
||||
if unicode.Is(unicode.Mn, r) {
|
||||
continue
|
||||
}
|
||||
sb.WriteRune(r)
|
||||
}
|
||||
s := strings.ToLower(sb.String())
|
||||
reg, _ := regexp.Compile("[^a-z]+")
|
||||
s = reg.ReplaceAllString(s, "")
|
||||
return s
|
||||
}
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
pname = name;
|
||||
version = "0.1.0";
|
||||
src = ../leak-utils;
|
||||
vendorHash = "sha256-rTfbXCiwv/+tVXZmgztt088Zhz0OQaVTfvxXVzw4o4Q=";
|
||||
vendorHash = "sha256-qgDqmEgL7B8FvoKNwLG0buLmg9Yt54cyWwmXBifgr/g=";
|
||||
|
||||
buildInputs = [
|
||||
pkgs.duckdb
|
||||
|
||||
Reference in New Issue
Block a user