Source code for oasislmf.pytools.converters.csvtobin.utils.footprint
importzlibimportnumpyasnpimportpandasaspdfromoasislmf.pytools.common.dataimportresolve_filefromoasislmf.pytools.converters.csvtobin.utils.commonimportread_csv_as_ndarrayfromoasislmf.pytools.converters.dataimportTOOL_INFOfromoasislmf.pytools.getmodel.commonimportEvent_dtype,EventIndexBin_dtype,EventIndexBinZ_dtypefromoasislmf.utils.exceptionsimportOasisExceptiondef_validate(data):df=pd.DataFrame(data)# Check probability sums to 1 for each (event_id, areaperil_id) groupprob_sums=df.groupby(["event_id","areaperil_id"])["probability"].sum()invalid_sums=prob_sums[~np.isclose(prob_sums,1,atol=1e-6)]ifnotinvalid_sums.empty:error_msg="\n".join([f"Group (event_id={idx[0]}, areaperil_id={idx[1]}) has prob sum = {val:.6f}"foridx,valininvalid_sums.items()])raiseOasisException(f"Error: Probabilities do not sum to 1 for the following groups: \n{error_msg}")# Check sorted by event_id, areaperil_idexpected_order=df.sort_values(['event_id','areaperil_id']).reset_index(drop=True)ifnotdf[['event_id','areaperil_id']].equals(expected_order[['event_id','areaperil_id']]):unordered_rows=df[['event_id','areaperil_id']].ne(expected_order[['event_id','areaperil_id']]).any(axis=1)mismatch_indices=df.index[unordered_rows].tolist()raiseOasisException(f"IDs not in ascending order. First few mismatched indices: \n{df.iloc[mismatch_indices[:10]]}")# Check intensity bin uniqueness for each (event_id, areaperil_id) groupduplicates=df.duplicated(subset=['event_id','areaperil_id','intensity_bin_id'],keep=False)ifduplicates.any():dup_rows=df[duplicates]error_msg=dup_rows[['event_id','areaperil_id','intensity_bin_id']].drop_duplicates(keep="last").to_string()raiseOasisException(f"Error: Duplicate intensity bins found: \n{error_msg}")
[docs]deffootprint_tobin(stack,file_in,file_out,file_type,idx_file_out,zip_files,max_intensity_bin_idx,no_intensity_uncertainty,decompressed_size,no_validation):headers=TOOL_INFO[file_type]["headers"]dtype=TOOL_INFO[file_type]["dtype"]idx_file_out=resolve_file(idx_file_out,"wb",stack)data=read_csv_as_ndarray(stack,file_in,headers,dtype)ifnotno_validation:_validate(data)# Write bin file headernp.array([max_intensity_bin_idx],dtype=np.int32).tofile(file_out)zip_opts=decompressed_size<<1|(notno_intensity_uncertainty)np.array([zip_opts],dtype=np.int32).tofile(file_out)offset=np.dtype(np.int32).itemsize*2unique_events=np.unique(data["event_id"])forevent_idinunique_events:event_mask=data["event_id"]==event_idevent_data=data[event_mask]bin_data=np.empty(len(event_data),dtype=Event_dtype)bin_data["areaperil_id"]=event_data["areaperil_id"]bin_data["intensity_bin_id"]=event_data["intensity_bin_id"]bin_data["probability"]=event_data["probability"]ifany(bin_data["intensity_bin_id"]>max_intensity_bin_idx):raiseOasisException(f"Error: Found intensity_bin_idx in data larger than max_intensity_bin_idx: {max_intensity_bin_idx}")bin_data=bin_data.tobytes()dsize=len(bin_data)ifzip_files:bin_data=zlib.compress(bin_data)file_out.write(bin_data)size=len(bin_data)ifdecompressed_size:np.array([(event_id,offset,size,dsize)],dtype=EventIndexBinZ_dtype).tofile(idx_file_out)else:np.array([(event_id,offset,size)],dtype=EventIndexBin_dtype).tofile(idx_file_out)offset+=size