From ba5ef5c0f39aee7460311504ec7c539d410b201c Mon Sep 17 00:00:00 2001 From: Hadi <112569860+anotherhadi@users.noreply.github.com> Date: Wed, 24 Sep 2025 21:12:31 +0200 Subject: [PATCH] new action: countLinesWithAt (both txt/csv & parquet) (#5) Signed-off-by: Hadi <112569860+anotherhadi@users.noreply.github.com> --- leak-utils/leak-utils/main.go | 19 ++++++++ leak-utils/misc/countLinesWithAt.go | 73 +++++++++++++++++++++++++++++ leak-utils/parquet/parquet.go | 2 +- leak-utils/parquet/utils.go | 4 +- 4 files changed, 95 insertions(+), 3 deletions(-) create mode 100644 leak-utils/misc/countLinesWithAt.go diff --git a/leak-utils/leak-utils/main.go b/leak-utils/leak-utils/main.go index 9b2fdce..f8b395d 100644 --- a/leak-utils/leak-utils/main.go +++ b/leak-utils/leak-utils/main.go @@ -33,6 +33,7 @@ func main() { "mergeFiles", "deleteFirstLines", "deleteLastLines", + "countLinesWithAt", "removeUrlSchemeFromUlp", } @@ -162,6 +163,24 @@ func main() { log.Fatal("Failed to remove last lines", "error", err) } return + case "countLinesWithAt": + var inputFile *string = flag.StringP("input", "i", "", "Input file") + var noColors *bool = flag.Bool("no-colors", false, "Remove all colors") + var debug *bool = flag.Bool("debug", false, "Debug mode") + flag.Parse() + if *inputFile == "" { + log.Fatal("Input file are required") + } + if *noColors { + settings.DisableColors() + } + lu.Debug = *debug + countAt, countLine, err := misc.CountLinesWithAt(lu, *inputFile) + if err != nil { + log.Fatal("Failed to count @", "error", err) + } + fmt.Println(settings.Base.Render("There are"), settings.Accent.Render(fmt.Sprintf("%d", countAt)), settings.Base.Render("lines with @ out of"), settings.Accent.Render(fmt.Sprintf("%d", countLine)), settings.Base.Render("lines in"), settings.Accent.Render(*inputFile)) + return case "removeUrlSchemeFromUlp": var inputFile *string = flag.StringP("input", "i", "", "Input Parquet file") var noColors *bool = flag.Bool("no-colors", false, "Remove all colors") diff --git a/leak-utils/misc/countLinesWithAt.go b/leak-utils/misc/countLinesWithAt.go new file mode 100644 index 0000000..fcd87c7 --- /dev/null +++ b/leak-utils/misc/countLinesWithAt.go @@ -0,0 +1,73 @@ +package misc + +import ( + "bufio" + "fmt" + "os" + "strings" + + "github.com/anotherhadi/eleakxir/leak-utils/parquet" + "github.com/anotherhadi/eleakxir/leak-utils/settings" +) + +// Count the line with "@" in a file +func CountLinesWithAt(lu settings.LeakUtils, inputFile string) (nAt, nLines int, err error) { + if strings.HasSuffix(inputFile, ".parquet") { + return countRowsWithAtInParquet(lu, inputFile) + } + + in, err := os.Open(inputFile) + if err != nil { + return 0, 0, err + } + defer in.Close() + + scanner := bufio.NewScanner(in) + countAt := 0 + countLine := 0 + for scanner.Scan() { + line := scanner.Text() + if strings.Contains(line, "@") { + countAt++ + } + countLine++ + } + + if err := scanner.Err(); err != nil { + return 0, 0, err + } + + return countAt, countLine, nil +} + +func countRowsWithAtInParquet(lu settings.LeakUtils, inputFile string) (nAt, nLine int, err error) { + cols, err := parquet.GetColumns(lu.Db, inputFile) + if err != nil { + return 0, 0, err + } + if len(cols) == 0 { + return 0, 0, nil + } + + whereParts := []string{} + for _, col := range cols { + whereParts = append(whereParts, fmt.Sprintf("%s LIKE '%%@%%'", col)) + } + whereClause := strings.Join(whereParts, " OR ") + + query := fmt.Sprintf("SELECT COUNT(*) FROM read_parquet('%s') WHERE %s", inputFile, whereClause) + var countAt int + err = lu.Db.QueryRow(query).Scan(&countAt) + if err != nil { + return 0, 0, err + } + + query = fmt.Sprintf("SELECT COUNT(*) FROM read_parquet('%s')", inputFile) + var countLine int + err = lu.Db.QueryRow(query).Scan(&countLine) + if err != nil { + return 0, 0, err + } + + return countAt, countLine, nil +} diff --git a/leak-utils/parquet/parquet.go b/leak-utils/parquet/parquet.go index 538a982..930918b 100644 --- a/leak-utils/parquet/parquet.go +++ b/leak-utils/parquet/parquet.go @@ -252,7 +252,7 @@ func GetParquet(db *sql.DB, inputFile string) (parquet *Parquet, err error) { parquet = &Parquet{} parquet.Filepath = inputFile - parquet.Columns, err = getColumns(db, inputFile) + parquet.Columns, err = GetColumns(db, inputFile) if err != nil { return } diff --git a/leak-utils/parquet/utils.go b/leak-utils/parquet/utils.go index e227865..3a63402 100644 --- a/leak-utils/parquet/utils.go +++ b/leak-utils/parquet/utils.go @@ -7,8 +7,8 @@ import ( "strings" ) -// getColumns retrieves the column names from the Parquet file. -func getColumns(db *sql.DB, filepath string) ([]string, error) { +// GetColumns retrieves the column names from the Parquet file. +func GetColumns(db *sql.DB, filepath string) ([]string, error) { // Create a view from the parquet file query := fmt.Sprintf("CREATE OR REPLACE VIEW parquet_view AS SELECT * FROM read_parquet('%s')", filepath) _, err := db.Exec(query)