new action: countLinesWithAt (both txt/csv & parquet) (#5)
Signed-off-by: Hadi <112569860+anotherhadi@users.noreply.github.com>
This commit is contained in:
@@ -33,6 +33,7 @@ func main() {
|
|||||||
"mergeFiles",
|
"mergeFiles",
|
||||||
"deleteFirstLines",
|
"deleteFirstLines",
|
||||||
"deleteLastLines",
|
"deleteLastLines",
|
||||||
|
"countLinesWithAt",
|
||||||
"removeUrlSchemeFromUlp",
|
"removeUrlSchemeFromUlp",
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -162,6 +163,24 @@ func main() {
|
|||||||
log.Fatal("Failed to remove last lines", "error", err)
|
log.Fatal("Failed to remove last lines", "error", err)
|
||||||
}
|
}
|
||||||
return
|
return
|
||||||
|
case "countLinesWithAt":
|
||||||
|
var inputFile *string = flag.StringP("input", "i", "", "Input file")
|
||||||
|
var noColors *bool = flag.Bool("no-colors", false, "Remove all colors")
|
||||||
|
var debug *bool = flag.Bool("debug", false, "Debug mode")
|
||||||
|
flag.Parse()
|
||||||
|
if *inputFile == "" {
|
||||||
|
log.Fatal("Input file are required")
|
||||||
|
}
|
||||||
|
if *noColors {
|
||||||
|
settings.DisableColors()
|
||||||
|
}
|
||||||
|
lu.Debug = *debug
|
||||||
|
countAt, countLine, err := misc.CountLinesWithAt(lu, *inputFile)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal("Failed to count @", "error", err)
|
||||||
|
}
|
||||||
|
fmt.Println(settings.Base.Render("There are"), settings.Accent.Render(fmt.Sprintf("%d", countAt)), settings.Base.Render("lines with @ out of"), settings.Accent.Render(fmt.Sprintf("%d", countLine)), settings.Base.Render("lines in"), settings.Accent.Render(*inputFile))
|
||||||
|
return
|
||||||
case "removeUrlSchemeFromUlp":
|
case "removeUrlSchemeFromUlp":
|
||||||
var inputFile *string = flag.StringP("input", "i", "", "Input Parquet file")
|
var inputFile *string = flag.StringP("input", "i", "", "Input Parquet file")
|
||||||
var noColors *bool = flag.Bool("no-colors", false, "Remove all colors")
|
var noColors *bool = flag.Bool("no-colors", false, "Remove all colors")
|
||||||
|
|||||||
73
leak-utils/misc/countLinesWithAt.go
Normal file
73
leak-utils/misc/countLinesWithAt.go
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
package misc
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/anotherhadi/eleakxir/leak-utils/parquet"
|
||||||
|
"github.com/anotherhadi/eleakxir/leak-utils/settings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Count the line with "@" in a file
|
||||||
|
func CountLinesWithAt(lu settings.LeakUtils, inputFile string) (nAt, nLines int, err error) {
|
||||||
|
if strings.HasSuffix(inputFile, ".parquet") {
|
||||||
|
return countRowsWithAtInParquet(lu, inputFile)
|
||||||
|
}
|
||||||
|
|
||||||
|
in, err := os.Open(inputFile)
|
||||||
|
if err != nil {
|
||||||
|
return 0, 0, err
|
||||||
|
}
|
||||||
|
defer in.Close()
|
||||||
|
|
||||||
|
scanner := bufio.NewScanner(in)
|
||||||
|
countAt := 0
|
||||||
|
countLine := 0
|
||||||
|
for scanner.Scan() {
|
||||||
|
line := scanner.Text()
|
||||||
|
if strings.Contains(line, "@") {
|
||||||
|
countAt++
|
||||||
|
}
|
||||||
|
countLine++
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := scanner.Err(); err != nil {
|
||||||
|
return 0, 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return countAt, countLine, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func countRowsWithAtInParquet(lu settings.LeakUtils, inputFile string) (nAt, nLine int, err error) {
|
||||||
|
cols, err := parquet.GetColumns(lu.Db, inputFile)
|
||||||
|
if err != nil {
|
||||||
|
return 0, 0, err
|
||||||
|
}
|
||||||
|
if len(cols) == 0 {
|
||||||
|
return 0, 0, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
whereParts := []string{}
|
||||||
|
for _, col := range cols {
|
||||||
|
whereParts = append(whereParts, fmt.Sprintf("%s LIKE '%%@%%'", col))
|
||||||
|
}
|
||||||
|
whereClause := strings.Join(whereParts, " OR ")
|
||||||
|
|
||||||
|
query := fmt.Sprintf("SELECT COUNT(*) FROM read_parquet('%s') WHERE %s", inputFile, whereClause)
|
||||||
|
var countAt int
|
||||||
|
err = lu.Db.QueryRow(query).Scan(&countAt)
|
||||||
|
if err != nil {
|
||||||
|
return 0, 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
query = fmt.Sprintf("SELECT COUNT(*) FROM read_parquet('%s')", inputFile)
|
||||||
|
var countLine int
|
||||||
|
err = lu.Db.QueryRow(query).Scan(&countLine)
|
||||||
|
if err != nil {
|
||||||
|
return 0, 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return countAt, countLine, nil
|
||||||
|
}
|
||||||
@@ -252,7 +252,7 @@ func GetParquet(db *sql.DB, inputFile string) (parquet *Parquet, err error) {
|
|||||||
parquet = &Parquet{}
|
parquet = &Parquet{}
|
||||||
parquet.Filepath = inputFile
|
parquet.Filepath = inputFile
|
||||||
|
|
||||||
parquet.Columns, err = getColumns(db, inputFile)
|
parquet.Columns, err = GetColumns(db, inputFile)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -7,8 +7,8 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
// getColumns retrieves the column names from the Parquet file.
|
// GetColumns retrieves the column names from the Parquet file.
|
||||||
func getColumns(db *sql.DB, filepath string) ([]string, error) {
|
func GetColumns(db *sql.DB, filepath string) ([]string, error) {
|
||||||
// Create a view from the parquet file
|
// Create a view from the parquet file
|
||||||
query := fmt.Sprintf("CREATE OR REPLACE VIEW parquet_view AS SELECT * FROM read_parquet('%s')", filepath)
|
query := fmt.Sprintf("CREATE OR REPLACE VIEW parquet_view AS SELECT * FROM read_parquet('%s')", filepath)
|
||||||
_, err := db.Exec(query)
|
_, err := db.Exec(query)
|
||||||
|
|||||||
Reference in New Issue
Block a user