Parquet Parser
6 min
this connector reads in a parquet file and converts it into a json object, which can then be ingested by downstream applications metadata sample metadata returned "metadata" { "num rows" 156, "num columns" 20, "column names" \[ "metadata", "cloud", "src endpoint", "dst endpoint", "connection info", "traffic", "time", "start time", "end time", "severity id", "severity", "class name", "class uid", "category name", "category uid", "activity name", "activity id", "type uid", "type name", "unmapped" ], "column types" { "metadata" "object", "cloud" "object", "src endpoint" "object", "dst endpoint" "object", "connection info" "object", "traffic" "object", "time" "int64", "start time" "int64", "end time" "int64", "severity id" "int32", "severity" "object", "class name" "object", "class uid" "int32", "category name" "object", "category uid" "int32", "activity name" "object", "activity id" "int32", "type uid" "int32", "type name" "object", "unmapped" "object" }, "memory usage" 279461, "describe" { "time" \[ 156 0, 1678822323576 923, 92110 1751558733, 1678822108000 0, 1678822249500 0, 1678822323000 0, 1678822398750 0, 1678822494000 0 ], "start time" \[ 156 0, 1678822323576 923, 92110 1751558733, 1678822108000 0, 1678822249500 0, 1678822323000 0, 1678822398750 0, 1678822494000 0 ], "end time" \[ 156 0, 1678822342006 4102, 91207 1372981708, 1678822139000 0, 1678822270750 0, 1678822335000 0, 1678822420000 0, 1678822503000 0 ], "severity id" \[ 156 0, 1 0, 0 0, 1 0, 1 0, 1 0, 1 0, 1 0 ], "class uid" \[ 156 0, 4001 0, 0 0, 4001 0, 4001 0, 4001 0, 4001 0, 4001 0 ], "category uid" \[ 156 0, 4 0, 0 0, 4 0, 4 0, 4 0, 4 0, 4 0 ], "activity id" \[ 156 0, 0 5064102564102564, 0 5015690847060224, 0 0, 0 0, 1 0, 1 0, 1 0 ], "type uid" \[ 156 0, 400100 50641025644, 0 5015690847060224, 400100 0, 400100 0, 400101 0, 400101 0, 400101 0 ] } }, actions parse parquet parse a parquet file endpoint method get input argument name type required description metadata boolean optional include metadata attachments array required file to be analysed file string optional parameter for parse parquet file name string optional name of the resource output parameter type description metadata object response data num rows number output field num rows num columns number output field num columns column names array name of the resource column types object type of the resource metadata string response data cloud string output field cloud src endpoint string output field src endpoint dst endpoint string output field dst endpoint connection info string output field connection info traffic string output field traffic time string time value start time string time value end time string time value severity id string unique identifier severity string output field severity class name string name of the resource class uid string unique identifier category name string name of the resource category uid string unique identifier activity name string name of the resource activity id string unique identifier type uid string unique identifier type name string name of the resource unmapped string output field unmapped example \[ { "metadata" { "num rows" 156, "num columns" 20, "column names" \[], "column types" {}, "memory usage" 279461, "describe" {} }, "data" \[ { "metadata" { "product" { "version" "5", "name" "amazon vpc", "feature" { "name" "flowlogs" }, "vendor name" "aws" }, "profiles" \[ "cloud" ], "version" "0 39 0" }, "cloud" { "account uid" "657944144804", "region" "us west 2", "zone" "usw2 az1", "provider" "aws" }, "src endpoint" { "port" null, "svc name" " ", "ip" " ", "intermediate ips" null, "interface uid" "eni 04aa1bea5f6444911", "vpc uid" "vpc 02e3e6d4cda0b46b4", "instance uid" " ", "subnet uid" "subnet 0c1474cfacf7e0af9" }, "dst endpoint" { "port" null, "svc name" " ", "ip" " ", "intermediate ips" null, "interface uid" null, "vpc uid" null, "instance uid" null, "subnet uid" null }, "connection info" { "protocol num" null, "tcp flags" null, "protocol ver" " ", "direction" " ", "boundary id" 0, "boundary" "unknown", "direction id" 0 }, "traffic" { "packets" null, "bytes" null }, "time" 1678822108000, "start time" 1678822108000, "end time" 1678822139000, "severity id" 1, "severity" "other", "class name" "network activity", "class uid" 4001, "category name" "network activity", "category uid" 4, "activity name" "", "activity id" 0, "type uid" 400100, "type name" "network activity unknown", "unmapped" \[ \[ "log status", "nodata" ], \[ "sublocation id", " " ], \[ "sublocation type", " " ] ] }, { "metadata" { "product" { "version" "5", "name" "amazon vpc", "feature" { "name" "flowlogs"