Change suggestion strategy
Signed-off-by: Hadi <112569860+anotherhadi@users.noreply.github.com>
This commit is contained in:
@@ -7,6 +7,7 @@ require (
|
|||||||
github.com/charmbracelet/log v0.4.2
|
github.com/charmbracelet/log v0.4.2
|
||||||
github.com/marcboeker/go-duckdb/v2 v2.4.0
|
github.com/marcboeker/go-duckdb/v2 v2.4.0
|
||||||
github.com/spf13/pflag v1.0.10
|
github.com/spf13/pflag v1.0.10
|
||||||
|
golang.org/x/text v0.28.0
|
||||||
)
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
|
|||||||
@@ -100,6 +100,8 @@ golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
|
|||||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI=
|
golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI=
|
||||||
golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
|
golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
|
||||||
|
golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng=
|
||||||
|
golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU=
|
||||||
golang.org/x/tools v0.36.0 h1:kWS0uv/zsvHEle1LbV5LE8QujrxB3wfQyxHfhOk0Qkg=
|
golang.org/x/tools v0.36.0 h1:kWS0uv/zsvHEle1LbV5LE8QujrxB3wfQyxHfhOk0Qkg=
|
||||||
golang.org/x/tools v0.36.0/go.mod h1:WBDiHKJK8YgLHlcQPYQzNCkUxUypCaa5ZegCVutKm+s=
|
golang.org/x/tools v0.36.0/go.mod h1:WBDiHKJK8YgLHlcQPYQzNCkUxUypCaa5ZegCVutKm+s=
|
||||||
golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da h1:noIWHXmPHxILtqtCOPIhSt0ABwskkZKjD3bXGnZGpNY=
|
golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da h1:noIWHXmPHxILtqtCOPIhSt0ABwskkZKjD3bXGnZGpNY=
|
||||||
|
|||||||
@@ -1,15 +1,20 @@
|
|||||||
package parquet
|
package parquet
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"regexp"
|
||||||
"slices"
|
"slices"
|
||||||
|
"strings"
|
||||||
|
"unicode"
|
||||||
|
|
||||||
|
"golang.org/x/text/unicode/norm"
|
||||||
)
|
)
|
||||||
|
|
||||||
func getSuggestion(col string) string {
|
var (
|
||||||
col = formatColumnName(col)
|
knownColumnNames = []string{
|
||||||
knownNames := []string{
|
|
||||||
"date",
|
"date",
|
||||||
"phone",
|
"phone",
|
||||||
"username",
|
"username",
|
||||||
|
"iban",
|
||||||
"address",
|
"address",
|
||||||
"email",
|
"email",
|
||||||
"postal_code",
|
"postal_code",
|
||||||
@@ -28,47 +33,49 @@ func getSuggestion(col string) string {
|
|||||||
"url",
|
"url",
|
||||||
"ip",
|
"ip",
|
||||||
}
|
}
|
||||||
if slices.Contains(knownNames, col) {
|
|
||||||
return col
|
suggestions = map[string]string{
|
||||||
|
"user": "username",
|
||||||
|
"login": "username",
|
||||||
|
"sex": "gender",
|
||||||
|
"genre": "gender",
|
||||||
|
"ipaddress": "ip",
|
||||||
|
"firstname": "first_name",
|
||||||
|
"prenom": "first_name",
|
||||||
|
"lastname": "last_name",
|
||||||
|
"nom": "last_name",
|
||||||
|
"fullname": "full_name",
|
||||||
|
"nomcomplet": "full_name",
|
||||||
|
"adresse": "address",
|
||||||
|
"streetaddress": "address",
|
||||||
|
"ville": "city",
|
||||||
|
"pays": "country",
|
||||||
|
"mail": "email",
|
||||||
|
"zip": "postal_code",
|
||||||
|
"postalcode": "postal_code",
|
||||||
|
"zipcode": "postal_code",
|
||||||
|
"postal": "postal_code",
|
||||||
|
"codepostal": "postal_code",
|
||||||
|
"hash": "password_hash",
|
||||||
|
"hashedpassword": "password_hash",
|
||||||
|
"hashpassword": "password_hash",
|
||||||
|
"passwordhashed": "password_hash",
|
||||||
|
"birthdate": "birth_date",
|
||||||
|
"dob": "birth_date",
|
||||||
|
"dateofbirth": "birth_date",
|
||||||
}
|
}
|
||||||
if col == "user" {
|
)
|
||||||
return "username"
|
|
||||||
|
func getSuggestion(col string) string {
|
||||||
|
colFormated := formatColumnName(col)
|
||||||
|
if slices.Contains(knownColumnNames, colFormated) {
|
||||||
|
return colFormated
|
||||||
}
|
}
|
||||||
if col == "login" {
|
|
||||||
return "username"
|
col = cleanString(col)
|
||||||
}
|
|
||||||
if col == "sex" {
|
if val, ok := suggestions[col]; ok {
|
||||||
return "gender"
|
return val
|
||||||
}
|
|
||||||
if col == "ip_address" {
|
|
||||||
return "ip"
|
|
||||||
}
|
|
||||||
if col == "password_hashed" {
|
|
||||||
return "password_hash"
|
|
||||||
}
|
|
||||||
if col == "firstname" {
|
|
||||||
return "first_name"
|
|
||||||
}
|
|
||||||
if col == "lastname" {
|
|
||||||
return "last_name"
|
|
||||||
}
|
|
||||||
if col == "fullname" {
|
|
||||||
return "full_name"
|
|
||||||
}
|
|
||||||
if col == "mail" {
|
|
||||||
return "email"
|
|
||||||
}
|
|
||||||
if col == "zip" || col == "postalcode" || col == "zipcode" || col == "postal" || col == "zip_code" {
|
|
||||||
return "postal_code"
|
|
||||||
}
|
|
||||||
if col == "street_address" {
|
|
||||||
return "address"
|
|
||||||
}
|
|
||||||
if col == "hash" || col == "hashed_password" || col == "hash_password" {
|
|
||||||
return "password_hash"
|
|
||||||
}
|
|
||||||
if col == "birthdate" || col == "dob" || col == "date_of_birth" {
|
|
||||||
return "birth_date"
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return ""
|
return ""
|
||||||
@@ -79,3 +86,18 @@ func getSuggestion(col string) string {
|
|||||||
// url: _url, link
|
// url: _url, link
|
||||||
// address: _address
|
// address: _address
|
||||||
//
|
//
|
||||||
|
|
||||||
|
func cleanString(input string) string {
|
||||||
|
t := norm.NFD.String(input)
|
||||||
|
var sb strings.Builder
|
||||||
|
for _, r := range t {
|
||||||
|
if unicode.Is(unicode.Mn, r) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
sb.WriteRune(r)
|
||||||
|
}
|
||||||
|
s := strings.ToLower(sb.String())
|
||||||
|
reg, _ := regexp.Compile("[^a-z]+")
|
||||||
|
s = reg.ReplaceAllString(s, "")
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
pname = name;
|
pname = name;
|
||||||
version = "0.1.0";
|
version = "0.1.0";
|
||||||
src = ../leak-utils;
|
src = ../leak-utils;
|
||||||
vendorHash = "sha256-rTfbXCiwv/+tVXZmgztt088Zhz0OQaVTfvxXVzw4o4Q=";
|
vendorHash = "sha256-qgDqmEgL7B8FvoKNwLG0buLmg9Yt54cyWwmXBifgr/g=";
|
||||||
|
|
||||||
buildInputs = [
|
buildInputs = [
|
||||||
pkgs.duckdb
|
pkgs.duckdb
|
||||||
|
|||||||
Reference in New Issue
Block a user