Source code for oasislmf.pytools.converters.bintoparquet.manager

#!/usr/bin/env python

from contextlib import ExitStack
import logging
import sys
import numpy as np
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq

from oasislmf.pytools.common.data import resolve_file
from oasislmf.pytools.converters.data import TOOL_INFO

[docs] logger = logging.getLogger(__name__)
[docs] def default_toparquet(stack, file_in, file_out, file_type): headers = TOOL_INFO[file_type]["headers"] dtype = TOOL_INFO[file_type]["dtype"] file_in = resolve_file(file_in, "rb", stack) if file_in == sys.stdin.buffer: data = np.frombuffer(file_in.read(), dtype=dtype) else: data = np.fromfile(file_in, dtype=dtype) df = pd.DataFrame(data, columns=headers) table = pa.Table.from_pandas(df) pq.write_table(table, file_out)
[docs] def bintoparquet(file_in, file_out, file_type, **kwargs): """Convert bin file to parquet file based on file type Args: file_in (str | os.PathLike): Input file path file_out (str | os.PathLike): Output file path file_type (str): File type str from SUPPORTED_BINTOPARQUET """ with ExitStack() as stack: file_out = resolve_file(file_out, "wb", stack) default_toparquet(stack, file_in, file_out, file_type, **kwargs)