jsonToParquet: add --delete-temp-file option

Signed-off-by: Hadi <112569860+anotherhadi@users.noreply.github.com>
This commit is contained in:
Hadi
2025-10-01 12:07:00 +02:00
parent 81ff9804bb
commit e12e3e6e1b
2 changed files with 6 additions and 3 deletions

View File

@@ -133,6 +133,7 @@ func main() {
var outputFile *string = flag.StringP("output", "o", "", "Output Parquet file")
var compression *string = flag.StringP("compression", "c", "ZSTD", "Compression codec (UNCOMPRESSED, SNAPPY, GZIP, BROTLI, LZ4, ZSTD)")
var noColors *bool = flag.Bool("no-colors", false, "Remove all colors")
var deleteTempFile *bool = flag.Bool("delete-temp-file", true, "Delete the temporary file")
var debug *bool = flag.Bool("debug", false, "Debug mode")
flag.Parse()
if *inputFile == "" || *outputFile == "" {
@@ -143,7 +144,7 @@ func main() {
}
lu.Compression = *compression
lu.Debug = *debug
err := misc.JsonToParquet(lu, *inputFile, *outputFile)
err := misc.JsonToParquet(lu, *inputFile, *outputFile, *deleteTempFile)
if err != nil {
log.Fatal("Failed to transform JSON file", "error", err)
}

View File

@@ -110,10 +110,12 @@ func flattenJSONFile(inputFile string, outputFile string) error {
return nil
}
func JsonToParquet(lu settings.LeakUtils, inputFile string, outputFile string) error {
func JsonToParquet(lu settings.LeakUtils, inputFile string, outputFile string, deleteTempFile bool) error {
tmpFile := filepath.Join("/tmp", "leak-utils.flat.json")
err := flattenJSONFile(inputFile, tmpFile)
defer os.Remove(tmpFile)
if deleteTempFile {
defer os.Remove(tmpFile)
}
query := fmt.Sprintf(`COPY (FROM read_json('%s', union_by_name=true, ignore_errors=true)) TO '%s' (FORMAT 'parquet', COMPRESSION '%s', ROW_GROUP_SIZE 200000);`, tmpFile, outputFile, lu.Compression)