new action: countLinesWithAt (both txt/csv & parquet) (#5)
Signed-off-by: Hadi <112569860+anotherhadi@users.noreply.github.com>
This commit is contained in:
@@ -33,6 +33,7 @@ func main() {
|
||||
"mergeFiles",
|
||||
"deleteFirstLines",
|
||||
"deleteLastLines",
|
||||
"countLinesWithAt",
|
||||
"removeUrlSchemeFromUlp",
|
||||
}
|
||||
|
||||
@@ -162,6 +163,24 @@ func main() {
|
||||
log.Fatal("Failed to remove last lines", "error", err)
|
||||
}
|
||||
return
|
||||
case "countLinesWithAt":
|
||||
var inputFile *string = flag.StringP("input", "i", "", "Input file")
|
||||
var noColors *bool = flag.Bool("no-colors", false, "Remove all colors")
|
||||
var debug *bool = flag.Bool("debug", false, "Debug mode")
|
||||
flag.Parse()
|
||||
if *inputFile == "" {
|
||||
log.Fatal("Input file are required")
|
||||
}
|
||||
if *noColors {
|
||||
settings.DisableColors()
|
||||
}
|
||||
lu.Debug = *debug
|
||||
countAt, countLine, err := misc.CountLinesWithAt(lu, *inputFile)
|
||||
if err != nil {
|
||||
log.Fatal("Failed to count @", "error", err)
|
||||
}
|
||||
fmt.Println(settings.Base.Render("There are"), settings.Accent.Render(fmt.Sprintf("%d", countAt)), settings.Base.Render("lines with @ out of"), settings.Accent.Render(fmt.Sprintf("%d", countLine)), settings.Base.Render("lines in"), settings.Accent.Render(*inputFile))
|
||||
return
|
||||
case "removeUrlSchemeFromUlp":
|
||||
var inputFile *string = flag.StringP("input", "i", "", "Input Parquet file")
|
||||
var noColors *bool = flag.Bool("no-colors", false, "Remove all colors")
|
||||
|
||||
73
leak-utils/misc/countLinesWithAt.go
Normal file
73
leak-utils/misc/countLinesWithAt.go
Normal file
@@ -0,0 +1,73 @@
|
||||
package misc
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/anotherhadi/eleakxir/leak-utils/parquet"
|
||||
"github.com/anotherhadi/eleakxir/leak-utils/settings"
|
||||
)
|
||||
|
||||
// Count the line with "@" in a file
|
||||
func CountLinesWithAt(lu settings.LeakUtils, inputFile string) (nAt, nLines int, err error) {
|
||||
if strings.HasSuffix(inputFile, ".parquet") {
|
||||
return countRowsWithAtInParquet(lu, inputFile)
|
||||
}
|
||||
|
||||
in, err := os.Open(inputFile)
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
defer in.Close()
|
||||
|
||||
scanner := bufio.NewScanner(in)
|
||||
countAt := 0
|
||||
countLine := 0
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
if strings.Contains(line, "@") {
|
||||
countAt++
|
||||
}
|
||||
countLine++
|
||||
}
|
||||
|
||||
if err := scanner.Err(); err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
|
||||
return countAt, countLine, nil
|
||||
}
|
||||
|
||||
func countRowsWithAtInParquet(lu settings.LeakUtils, inputFile string) (nAt, nLine int, err error) {
|
||||
cols, err := parquet.GetColumns(lu.Db, inputFile)
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
if len(cols) == 0 {
|
||||
return 0, 0, nil
|
||||
}
|
||||
|
||||
whereParts := []string{}
|
||||
for _, col := range cols {
|
||||
whereParts = append(whereParts, fmt.Sprintf("%s LIKE '%%@%%'", col))
|
||||
}
|
||||
whereClause := strings.Join(whereParts, " OR ")
|
||||
|
||||
query := fmt.Sprintf("SELECT COUNT(*) FROM read_parquet('%s') WHERE %s", inputFile, whereClause)
|
||||
var countAt int
|
||||
err = lu.Db.QueryRow(query).Scan(&countAt)
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
|
||||
query = fmt.Sprintf("SELECT COUNT(*) FROM read_parquet('%s')", inputFile)
|
||||
var countLine int
|
||||
err = lu.Db.QueryRow(query).Scan(&countLine)
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
|
||||
return countAt, countLine, nil
|
||||
}
|
||||
@@ -252,7 +252,7 @@ func GetParquet(db *sql.DB, inputFile string) (parquet *Parquet, err error) {
|
||||
parquet = &Parquet{}
|
||||
parquet.Filepath = inputFile
|
||||
|
||||
parquet.Columns, err = getColumns(db, inputFile)
|
||||
parquet.Columns, err = GetColumns(db, inputFile)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
@@ -7,8 +7,8 @@ import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
// getColumns retrieves the column names from the Parquet file.
|
||||
func getColumns(db *sql.DB, filepath string) ([]string, error) {
|
||||
// GetColumns retrieves the column names from the Parquet file.
|
||||
func GetColumns(db *sql.DB, filepath string) ([]string, error) {
|
||||
// Create a view from the parquet file
|
||||
query := fmt.Sprintf("CREATE OR REPLACE VIEW parquet_view AS SELECT * FROM read_parquet('%s')", filepath)
|
||||
_, err := db.Exec(query)
|
||||
|
||||
Reference in New Issue
Block a user