Source code for oasislmf.pytools.converters.parquettobin.manager
#!/usr/bin/env python
from contextlib import ExitStack
import logging
import numpy as np
import pyarrow.parquet as pq
from oasislmf.pytools.common.data import DEFAULT_BUFFER_SIZE, resolve_file
from oasislmf.pytools.converters.data import TOOL_INFO
[docs]
logger = logging.getLogger(__name__)
[docs]
def default_tobin(stack, file_in, file_out, file_type):
dtype = TOOL_INFO[file_type]["dtype"]
file_in = resolve_file(file_in, "rb", stack)
for batch in pq.ParquetFile(file_in).iter_batches(batch_size=DEFAULT_BUFFER_SIZE):
data = np.empty(len(batch), dtype=dtype)
for col in dtype.names:
data[col] = batch.column(col).to_numpy(zero_copy_only=False)
data.tofile(file_out)
[docs]
def parquettobin(file_in, file_out, file_type, **kwargs):
"""Convert parquet file to bin file based on file type
Args:
file_in (str | os.PathLike): Input file path
file_out (str | os.PathLike): Output file path
file_type (str): File type str from SUPPORTED_PARQUETTOBIN
"""
with ExitStack() as stack:
file_out = resolve_file(file_out, "wb", stack)
default_tobin(stack, file_in, file_out, file_type, **kwargs)