Source code for utils.data_io

"""Data I/O utilities for HDF5 and pandas integration.

This module provides helpers for loading solver results from HDF5 files
into pandas DataFrames for analysis and plotting.
"""

from pathlib import Path
from typing import Dict, Any, List, Union
import numpy as np
import pandas as pd
import h5py


[docs] def load_run_data(path: Union[str, Path]) -> pd.DataFrame: """Load HDF5 run data as DataFrame for plotting. Loads time-series data (residuals) along with metadata as columns. This format is optimized for seaborn plotting with `hue` parameter. Parameters ---------- path : str or Path Path to HDF5 file. Returns ------- pd.DataFrame DataFrame with columns: - iteration: Iteration number (0, 1, 2, ...) - residual: Residual value at each iteration - u_residual, v_residual, continuity_residual: Component residuals (if available) - Re: Reynolds number (from metadata) - converged: Whether solver converged (from metadata) - All other metadata fields as additional columns Examples -------- >>> df = load_run_data('run.h5') >>> df.head() iteration residual Re converged mesh_path ... 0 0 1.000000 100 True fine.msh ... 1 1 0.500000 100 True fine.msh ... >>> # Plot multiple runs >>> import seaborn as sns >>> df1 = load_run_data('run1.h5').assign(run='Run 1') >>> df2 = load_run_data('run2.h5').assign(run='Run 2') >>> df = pd.concat([df1, df2]) >>> sns.lineplot(data=df, x='iteration', y='residual', hue='run') """ path = Path(path) with h5py.File(path, "r") as f: # Load time-series data residual = f["time_series/residual"][:] n_iter = len(residual) # Start building DataFrame with time-series data = { "iteration": np.arange(n_iter), "residual": residual, } # Add other time-series if available ts_group = f["time_series"] for key in ts_group.keys(): if key != "residual": # Already added data[key] = ts_group[key][:] # Load metadata and broadcast to all rows metadata = dict(f.attrs) # Create DataFrame df = pd.DataFrame(data) # Add metadata as columns (broadcast to all rows) for key, value in metadata.items(): df[key] = value return df
[docs] def load_fields(path: Union[str, Path]) -> pd.DataFrame: """Load spatial fields as DataFrame. Parameters ---------- path : str or Path Path to HDF5 file. Returns ------- pd.DataFrame DataFrame with columns: - x, y: Spatial coordinates - u, v, p: Velocity and pressure fields - velocity_magnitude: Magnitude of velocity Examples -------- >>> df = load_fields('run.h5') >>> df.head() x y u v p velocity_magnitude 0 0.000000 0.000000 0.000000 0.000000 0.500000 0.000000 1 0.031250 0.000000 0.000000 0.000000 0.490000 0.000000 """ path = Path(path) with h5py.File(path, "r") as f: # Load grid points grid_points = f["grid_points"][:] # Load fields u = f["fields/u"][:] v = f["fields/v"][:] p = f["fields/p"][:] vel_mag = f["fields/velocity_magnitude"][:] # Create DataFrame df = pd.DataFrame( { "x": grid_points[:, 0], "y": grid_points[:, 1], "u": u, "v": v, "p": p, "velocity_magnitude": vel_mag, } ) return df
[docs] def load_metadata(path: Union[str, Path]) -> Dict[str, Any]: """Load only metadata from HDF5 file. Parameters ---------- path : str or Path Path to HDF5 file. Returns ------- dict Metadata dictionary containing solver config and convergence info. Examples -------- >>> metadata = load_metadata('run.h5') >>> print(f"Re={metadata['Re']}, converged={metadata['converged']}") Re=100.0, converged=True """ path = Path(path) with h5py.File(path, "r") as f: metadata = dict(f.attrs) return metadata
[docs] def load_multiple_runs( paths: List[Union[str, Path]], labels: List[str] = None ) -> pd.DataFrame: """Load multiple runs into single DataFrame for comparison. Parameters ---------- paths : list of str or Path Paths to HDF5 files. labels : list of str, optional Labels for each run. If None, uses filenames. Returns ------- pd.DataFrame Combined DataFrame with 'run' column for distinguishing runs. Examples -------- >>> df = load_multiple_runs( ... ['run1.h5', 'run2.h5'], ... labels=['32x32', '64x64'] ... ) >>> sns.lineplot(data=df, x='iteration', y='residual', hue='run') """ if labels is None: labels = [Path(p).stem for p in paths] dfs = [] for path, label in zip(paths, labels): df = load_run_data(path) df["run"] = label dfs.append(df) return pd.concat(dfs, ignore_index=True)