Source code for oasislmf.pytools.converters.csvtobin.utils.occurrence

import numba as nb
import numpy as np
from oasislmf.pytools.common.data import DEFAULT_BUFFER_SIZE, generate_output_metadata, occurrence_dtype, occurrence_granular_dtype
from oasislmf.pytools.common.input_files import occ_get_date_id
from oasislmf.pytools.converters.csvtobin.utils.common import read_csv_as_ndarray
from oasislmf.pytools.converters.data import TOOL_INFO


[docs] def occurrence_tobin(stack, file_in, file_out, file_type, no_of_periods, no_date_alg=False, granular=False): @nb.njit(cache=True, error_model="numpy") def _get_occ_data_with_date_ids(occ_csv, occ_dtype): buffer_size = DEFAULT_BUFFER_SIZE buffer = np.zeros(buffer_size, dtype=occ_dtype) idx = 0 for row in occ_csv: if idx >= buffer_size: yield buffer[:idx] buffer = np.zeros(buffer_size, dtype=occ_dtype) idx = 0 occ_date_id = occ_get_date_id( False, row["occ_year"], row["occ_month"], row["occ_day"], ) buffer[idx]["event_id"] = row["event_id"] buffer[idx]["period_no"] = row["period_no"] buffer[idx]["occ_date_id"] = occ_date_id idx += 1 yield buffer[:idx] @nb.njit(cache=True, error_model="numpy") def _get_occ_data_with_date_ids_gran(occ_csv, occ_dtype): buffer_size = DEFAULT_BUFFER_SIZE buffer = np.zeros(buffer_size, dtype=occ_dtype) idx = 0 for row in occ_csv: if idx >= buffer_size: yield buffer[:idx] buffer = np.zeros(buffer_size, dtype=occ_dtype) idx = 0 occ_date_id = occ_get_date_id( True, row["occ_year"], row["occ_month"], row["occ_day"], row["occ_hour"], row["occ_minute"], ) buffer[idx]["event_id"] = row["event_id"] buffer[idx]["period_no"] = row["period_no"] buffer[idx]["occ_date_id"] = occ_date_id idx += 1 yield buffer[:idx] if no_date_alg and granular: raise RuntimeError("Cannot have an occurrence file with granular dates and no date algorithm. Use at most one of -D, -G, but not both") # Write date opts date_opts = granular << 1 | (not no_date_alg) np.array([date_opts], dtype="i4").tofile(file_out) np.array([no_of_periods], dtype="i4").tofile(file_out) headers = TOOL_INFO[file_type]["headers"] dtype = TOOL_INFO[file_type]["dtype"] if no_date_alg: csv_data = read_csv_as_ndarray(stack, file_in, headers, dtype) csv_data.tofile(file_out) else: occ_csv_output = [ ("event_id", 'i4', "%d"), ("period_no", 'i4', "%d"), ("occ_year", 'i4', "%d"), ("occ_month", 'i4', "%d"), ("occ_day", 'i4', "%d"), ] if granular: occ_csv_output += [ ("occ_hour", 'i4', "%d"), ("occ_minute", 'i4', "%d"), ] headers, dtype, fmt = generate_output_metadata(occ_csv_output) csv_data = read_csv_as_ndarray(stack, file_in, headers, dtype) gen = _get_occ_data_with_date_ids(csv_data, occurrence_dtype) if granular: gen = _get_occ_data_with_date_ids_gran(csv_data, occurrence_granular_dtype) for data in gen: if any(data["period_no"] > no_of_periods): raise RuntimeError("FATAL: Period number exceeds maximum supplied") data.tofile(file_out)