diff --git a/benchmark/scripts/benchmark_matrix_gen.py b/benchmark/scripts/benchmark_matrix_gen.py new file mode 100644 index 00000000..7572d36a --- /dev/null +++ b/benchmark/scripts/benchmark_matrix_gen.py @@ -0,0 +1,422 @@ +#!/usr/bin/env python3 +""" +Benchmark script for linopy matrix generation and solution-unpacking performance. + +Covers the code paths optimised by PRs #616–#619: + - #616 cached_property on MatrixAccessor (flat_vars / flat_cons) + - #617 np.char.add for label string concatenation + - #618 sparse matrix slicing in MatrixAccessor.A + - #619 numpy solution unpacking in Model.solve + +Usage +----- + # Quick run (24 snapshots only): + python benchmark/scripts/benchmark_matrix_gen.py --quick + + # Full matrix-generation sweep with JSON output: + python benchmark/scripts/benchmark_matrix_gen.py -o results.json --label "after-PR-616" + + # Include solution-unpacking benchmark (requires HiGHS solver, #619): + python benchmark/scripts/benchmark_matrix_gen.py --include-solve -o results.json + + # Compare two runs: + python benchmark/scripts/benchmark_matrix_gen.py --compare before.json after.json +""" + +from __future__ import annotations + +import argparse +import gc +import json +import platform +import sys +import time +from pathlib import Path + +import numpy as np +import pandas as pd + +# --------------------------------------------------------------------------- +# Model builders +# --------------------------------------------------------------------------- + + +def build_scigrid_network(n_snapshots: int): + """Return a PyPSA Network (SciGrid-DE) with extended snapshots, without building the model.""" + import pypsa + + n = pypsa.examples.scigrid_de() + orig_snapshots = n.snapshots + orig_len = len(orig_snapshots) + + new_snapshots = pd.date_range(orig_snapshots[0], periods=n_snapshots, freq="h") + n.set_snapshots(new_snapshots) + + for component_t in (n.generators_t, n.loads_t, n.storage_units_t): + for attr in list(component_t): + df = getattr(component_t, attr) + if df is not None and not df.empty: + tiles = int(np.ceil(n_snapshots / orig_len)) + 1 + tiled = np.tile(df.values, (tiles, 1))[:n_snapshots] + setattr( + component_t, + attr, + pd.DataFrame( + tiled, + index=new_snapshots, + columns=df.columns, + ), + ) + return n + + +def build_scigrid(n_snapshots: int): + """Return a linopy Model from PyPSA SciGrid-DE with extended snapshots.""" + n = build_scigrid_network(n_snapshots) + n.optimize.create_model(include_objective_constant=False) + return n.model + + +def build_synthetic(n: int): + """Return a linopy Model with 2×N² variables and 2×N² constraints.""" + from linopy import Model + + m = Model() + N = np.arange(n) + x = m.add_variables(coords=[N, N], name="x") + y = m.add_variables(coords=[N, N], name="y") + m.add_constraints(x - y >= N, name="lower") + m.add_constraints(x + y >= 0, name="upper") + m.add_objective(2 * x.sum() + y.sum()) + return m + + +# --------------------------------------------------------------------------- +# Benchmark phases +# --------------------------------------------------------------------------- + + +def time_phase(func, label: str, repeats: int = 3) -> dict: + """Time a callable, return best-of-N result.""" + times = [] + for _ in range(repeats): + gc.collect() + gc.disable() + t0 = time.perf_counter() + result = func() + elapsed = time.perf_counter() - t0 + gc.enable() + times.append(elapsed) + del result + return { + "phase": label, + "best_s": min(times), + "median_s": sorted(times)[len(times) // 2], + "times": times, + } + + +def benchmark_model(model, repeats: int = 3) -> list[dict]: + """Benchmark all matrix generation phases on a linopy Model.""" + results = [] + matrices = model.matrices + + # Phase 1: flat_vars (exercises #616 caching + #617 label vectorisation) + def do_flat_vars(): + matrices.clean_cached_properties() + return matrices.flat_vars + + results.append(time_phase(do_flat_vars, "flat_vars", repeats)) + + # Phase 2: flat_cons (exercises #616 caching + #617 label vectorisation) + def do_flat_cons(): + matrices.clean_cached_properties() + return matrices.flat_cons + + results.append(time_phase(do_flat_cons, "flat_cons", repeats)) + + # Phase 3: vlabels + clabels (vector creation from flat data) + # Ensure flat data is cached first + matrices.clean_cached_properties() + _ = matrices.flat_vars + _ = matrices.flat_cons + + def do_labels(): + return (matrices.vlabels, matrices.clabels) + + results.append(time_phase(do_labels, "vlabels+clabels", repeats)) + + # Phase 4: A matrix (exercises #618 sparse slicing) + def do_A(): + return matrices.A + + results.append(time_phase(do_A, "A_matrix", repeats)) + + # Phase 5: full get_matrix_data pipeline (end-to-end) + def do_full(): + matrices.clean_cached_properties() + _ = matrices.vlabels + _ = matrices.clabels + _ = matrices.lb + _ = matrices.ub + _ = matrices.A + _ = matrices.b + _ = matrices.sense + return True + + results.append(time_phase(do_full, "full_pipeline", repeats)) + + return results + + +# --------------------------------------------------------------------------- +# Solution-unpacking benchmark (#619) +# --------------------------------------------------------------------------- + + +def benchmark_solution_unpack(n_snapshots: int, repeats: int = 3) -> list[dict]: + """ + Benchmark the solution-assignment loop in Model.solve (PR #619). + + Strategy: solve once with HiGHS to get a real solution vector, then + re-run only the assignment loop (sol[idx] → var.solution) repeatedly + without re-solving, isolating the unpacking cost from solver time. + """ + import xarray as xr + + n = build_scigrid_network(n_snapshots) + n.optimize.create_model(include_objective_constant=False) + model = n.model + + # Solve once to populate the raw solution + status, _ = model.solve(solver_name="highs", io_api="direct") + if status != "ok": + print(f" WARNING: solve failed ({status}), skipping solution-unpack benchmark") + return [] + + # Reconstruct the raw solution Series (as returned by the solver): + # a float-indexed Series mapping variable label → solution value. + nan = float("nan") + parts = [] + for name, var in model.variables.items(): + if var.solution is None: + continue + labels = np.ravel(var.labels) + values = np.ravel(var.solution.values) + parts.append(pd.Series(values, index=labels.astype(float))) + if not parts: + print(" WARNING: no solution found on variables, was solve successful?") + return [] + sol_series = pd.concat(parts).drop_duplicates() + sol_series.loc[-1] = nan + + n_vars = sum( + np.ravel(model.variables[name].labels).size for name in model.variables + ) + results = [] + + # ----- Old path (pandas label-based, pre-#619) ----- + def unpack_pandas(): + for name, var in model.variables.items(): + idx = np.ravel(var.labels).astype(float) + try: + vals = sol_series[idx].values.reshape(var.labels.shape) + except KeyError: + vals = sol_series.reindex(idx).values.reshape(var.labels.shape) + var.solution = xr.DataArray(vals, var.coords) + + results.append(time_phase(unpack_pandas, "unpack_pandas (before)", repeats)) + + # ----- New path (numpy dense array, #619) ----- + def unpack_numpy(): + sol_max_idx = int(max(sol_series.index.max(), 0)) + sol_arr = np.full(sol_max_idx + 1, nan) + mask = sol_series.index >= 0 + valid = sol_series.index[mask].astype(int) + sol_arr[valid] = sol_series.values[mask] + for name, var in model.variables.items(): + idx = np.ravel(var.labels) + safe_idx = np.clip(idx, 0, sol_max_idx) + vals = sol_arr[safe_idx] + vals[idx < 0] = nan + var.solution = xr.DataArray(vals.reshape(var.labels.shape), var.coords) + + results.append(time_phase(unpack_numpy, "unpack_numpy (after)", repeats)) + + for r in results: + r.update(model_type="scigrid_solve", size=n_snapshots, n_vars=n_vars, n_cons=0) + return results + + +# --------------------------------------------------------------------------- +# Runners +# --------------------------------------------------------------------------- + +QUICK_SNAPSHOTS = [24] +FULL_SNAPSHOTS = [24, 100, 200, 500] +SYNTHETIC_SIZES = [20, 50, 100, 200] + + +def run_benchmarks( + model_type: str, quick: bool, repeats: int, include_solve: bool = False +) -> list[dict]: + """Run benchmarks across problem sizes, return flat list of results.""" + all_results = [] + + if model_type in ("scigrid", "all"): + sizes = QUICK_SNAPSHOTS if quick else FULL_SNAPSHOTS + for n_snap in sizes: + print(f"\n{'=' * 60}") + print(f"SciGrid-DE {n_snap} snapshots") + print(f"{'=' * 60}") + model = build_scigrid(n_snap) + n_vars = len(model.variables.flat) + n_cons = len(model.constraints.flat) + print(f" {n_vars:,} variables, {n_cons:,} constraints") + + for r in benchmark_model(model, repeats): + r.update( + model_type="scigrid", size=n_snap, n_vars=n_vars, n_cons=n_cons + ) + all_results.append(r) + print( + f" {r['phase']:20s} {r['best_s']:.4f}s (median {r['median_s']:.4f}s)" + ) + + del model + gc.collect() + + if include_solve: + # Solution-unpacking benchmark for PR #619 (SciGrid-DE only, small sizes) + solve_sizes = QUICK_SNAPSHOTS if quick else [24, 100] + for n_snap in solve_sizes: + print(f"\n{'=' * 60}") + print(f"SciGrid-DE solve + unpack {n_snap} snapshots (#619)") + print(f"{'=' * 60}") + for r in benchmark_solution_unpack(n_snap, repeats): + all_results.append(r) + print( + f" {r['phase']:30s} {r['best_s']:.4f}s (median {r['median_s']:.4f}s)" + ) + gc.collect() + + if model_type in ("synthetic", "all"): + sizes = [20, 50] if quick else SYNTHETIC_SIZES + for n in sizes: + print(f"\n{'=' * 60}") + print(f"Synthetic N={n} ({2 * n * n} vars, {2 * n * n} cons)") + print(f"{'=' * 60}") + model = build_synthetic(n) + n_vars = 2 * n * n + n_cons = 2 * n * n + + for r in benchmark_model(model, repeats): + r.update(model_type="synthetic", size=n, n_vars=n_vars, n_cons=n_cons) + all_results.append(r) + print( + f" {r['phase']:20s} {r['best_s']:.4f}s (median {r['median_s']:.4f}s)" + ) + + del model + gc.collect() + + return all_results + + +def format_comparison(before: list[dict], after: list[dict]) -> str: + """Format a before/after comparison table.""" + df_b = pd.DataFrame(before).set_index(["model_type", "size", "phase"]) + df_a = pd.DataFrame(after).set_index(["model_type", "size", "phase"]) + merged = df_b[["best_s"]].join( + df_a[["best_s"]], lsuffix="_before", rsuffix="_after" + ) + merged["speedup"] = merged["best_s_before"] / merged["best_s_after"] + lines = [ + f"{'Model':>10s} {'Size':>6s} {'Phase':>20s} {'Before':>8s} {'After':>8s} {'Speedup':>8s}", + "-" * 70, + ] + for (mtype, size, phase), row in merged.iterrows(): + lines.append( + f"{mtype:>10s} {size:>6} {phase:>20s} " + f"{row['best_s_before']:>7.4f}s {row['best_s_after']:>7.4f}s " + f"{row['speedup']:>7.2f}x" + ) + return "\n".join(lines) + + +# --------------------------------------------------------------------------- +# CLI +# --------------------------------------------------------------------------- + + +def main(): + parser = argparse.ArgumentParser( + description="Benchmark linopy matrix generation (PRs #616–#619)" + ) + parser.add_argument( + "--model", + choices=["scigrid", "synthetic", "all"], + default="all", + help="Model type to benchmark (default: all)", + ) + parser.add_argument( + "--quick", action="store_true", help="Quick mode (smallest sizes only)" + ) + parser.add_argument( + "--repeats", type=int, default=3, help="Timing repeats per phase (default: 3)" + ) + parser.add_argument("-o", "--output", type=str, help="Save results to JSON file") + parser.add_argument("--label", type=str, default="", help="Label for this run") + parser.add_argument( + "--compare", + nargs=2, + metavar=("BEFORE", "AFTER"), + help="Compare two JSON result files instead of running benchmarks", + ) + parser.add_argument( + "--include-solve", + action="store_true", + help="Also benchmark solution unpacking (PR #619); requires HiGHS solver", + ) + args = parser.parse_args() + + if args.compare: + before = json.loads(Path(args.compare[0]).read_text())["results"] + after = json.loads(Path(args.compare[1]).read_text())["results"] + print(format_comparison(before, after)) + return + + print("linopy matrix generation benchmark") + print( + f"Python {sys.version.split()[0]}, numpy {np.__version__}, " + f"{platform.machine()}, {platform.system()}" + ) + + results = run_benchmarks(args.model, args.quick, args.repeats, args.include_solve) + + if args.output: + out = { + "label": args.label, + "python": sys.version.split()[0], + "numpy": np.__version__, + "platform": f"{platform.system()} {platform.machine()}", + "results": results, + } + Path(args.output).write_text(json.dumps(out, indent=2, default=str)) + print(f"\nResults saved to {args.output}") + + # Summary table + print(f"\n{'=' * 60}") + print("Summary (best times)") + print(f"{'=' * 60}") + df = pd.DataFrame(results) + for (mtype, size), group in df.groupby(["model_type", "size"]): + n_vars = group.iloc[0]["n_vars"] + n_cons = group.iloc[0]["n_cons"] + print(f"\n {mtype} size={size} ({n_vars:,} vars, {n_cons:,} cons)") + for _, row in group.iterrows(): + print(f" {row['phase']:20s} {row['best_s']:.4f}s") + + +if __name__ == "__main__": + main() diff --git a/linopy/matrices.py b/linopy/matrices.py index e1489e76..416cd184 100644 --- a/linopy/matrices.py +++ b/linopy/matrices.py @@ -51,7 +51,22 @@ def __init__(self, model: Model) -> None: def clean_cached_properties(self) -> None: """Clear the cache for all cached properties of an object""" - for cached_prop in ["flat_vars", "flat_cons", "sol", "dual"]: + for cached_prop in [ + "flat_vars", + "flat_cons", + "sol", + "dual", + "vlabels", + "clabels", + "A", + "c", + "b", + "sense", + "lb", + "ub", + "vtypes", + "Q", + ]: # check existence of cached_prop without creating it if cached_prop in self.__dict__: delattr(self, cached_prop) @@ -66,13 +81,13 @@ def flat_cons(self) -> pd.DataFrame: m = self._parent return m.constraints.flat - @property + @cached_property def vlabels(self) -> ndarray: """Vector of labels of all non-missing variables.""" df: pd.DataFrame = self.flat_vars return create_vector(df.key, df.labels, -1) - @property + @cached_property def vtypes(self) -> ndarray: """Vector of types of all non-missing variables.""" m = self._parent @@ -93,7 +108,7 @@ def vtypes(self) -> ndarray: ds = df.set_index("key").labels.map(ds) return create_vector(ds.index, ds.to_numpy(), fill_value="") - @property + @cached_property def lb(self) -> ndarray: """Vector of lower bounds of all non-missing variables.""" df: pd.DataFrame = self.flat_vars @@ -123,13 +138,13 @@ def dual(self) -> ndarray: ) return create_vector(df.key, df.dual, fill_value=np.nan) - @property + @cached_property def ub(self) -> ndarray: """Vector of upper bounds of all non-missing variables.""" df: pd.DataFrame = self.flat_vars return create_vector(df.key, df.upper) - @property + @cached_property def clabels(self) -> ndarray: """Vector of labels of all non-missing constraints.""" df: pd.DataFrame = self.flat_cons @@ -137,7 +152,7 @@ def clabels(self) -> ndarray: return np.array([], dtype=int) return create_vector(df.key, df.labels, fill_value=-1) - @property + @cached_property def A(self) -> csc_matrix | None: """Constraint matrix of all non-missing constraints and variables.""" m = self._parent @@ -146,19 +161,19 @@ def A(self) -> csc_matrix | None: A: csc_matrix = m.constraints.to_matrix(filter_missings=False) return A[self.clabels][:, self.vlabels] - @property + @cached_property def sense(self) -> ndarray: """Vector of senses of all non-missing constraints.""" df: pd.DataFrame = self.flat_cons return create_vector(df.key, df.sign.astype(np.dtype(" ndarray: """Vector of right-hand-sides of all non-missing constraints.""" df: pd.DataFrame = self.flat_cons return create_vector(df.key, df.rhs) - @property + @cached_property def c(self) -> ndarray: """Vector of objective coefficients of all non-missing variables.""" m = self._parent @@ -171,7 +186,7 @@ def c(self) -> ndarray: shape: int = self.flat_vars.key.max() + 1 return create_vector(vars, ds.coeffs, fill_value=0.0, shape=shape) - @property + @cached_property def Q(self) -> csc_matrix | None: """Matrix objective coefficients of quadratic terms of all non-missing variables.""" m = self._parent diff --git a/test/test_matrices.py b/test/test_matrices.py index 98a73564..dd33e277 100644 --- a/test/test_matrices.py +++ b/test/test_matrices.py @@ -77,3 +77,36 @@ def test_matrices_float_c() -> None: c = m.matrices.c assert np.all(c == np.array([1.5, 1.5])) + + +def test_matrices_properties_are_cached() -> None: + """Verify that MatrixAccessor properties are cached after first access.""" + m = Model() + + lower = xr.DataArray(np.zeros((10, 10)), coords=[range(10), range(10)]) + upper = xr.DataArray(np.ones((10, 10)), coords=[range(10), range(10)]) + x = m.add_variables(lower, upper, name="x") + y = m.add_variables(name="y") + + m.add_constraints(1 * x + 10 * y, EQUAL, 0) + m.add_objective((10 * x + 5 * y).sum()) + + M = m.matrices + + # Access each property twice — second access should return the same object + for prop in ("vlabels", "clabels", "lb", "ub", "b", "sense", "c"): + first = getattr(M, prop) + second = getattr(M, prop) + assert first is second, f"{prop} is not cached (returns new object each time)" + + # A and Q return complex objects — verify they are also cached + first_A = M.A + second_A = M.A + assert first_A is second_A, "A is not cached" + + # Verify clean_cached_properties clears the cache + M.clean_cached_properties() + fresh = M.vlabels + assert fresh is not first # cache was cleared — should be a new object + # After cleaning, accessing again should still work + assert np.array_equal(fresh, M.vlabels)