Source code for solvers.solver

# time_lapse_inversion/solver.py
import numpy as np
import scipy.sparse
from joblib import Parallel, delayed
import pygimli as pg

# Optional GPU acceleration with CuPy (if available)
try:
    import cupy as cp
    from cupyx.scipy.sparse import csr_matrix
    gpu_available = True
except ImportError:
    gpu_available = False


[docs]def generalized_solver(A, b, method="cgls", x=None, maxiter=2000, tol=1e-8, verbose=False, damp=0.0, use_gpu=False, parallel=False, n_jobs=-1): """ Generalized solver for Ax = b with optional GPU acceleration and parallelism. Parameters ---------- A : array_like or sparse matrix The system matrix (Jacobian or forward operator). b : array_like Right-hand side vector. method : str, optional Solver method: 'lsqr', 'rrlsqr', 'cgls', or 'rrls'. Default is 'cgls'. x : array_like, optional Initial guess for the solution. If None, zeros are used. maxiter : int, optional Maximum number of iterations. tol : float, optional Convergence tolerance. verbose : bool, optional Print progress information every 10 iterations. damp : float, optional Damping factor (Tikhonov regularization). use_gpu : bool, optional Use GPU acceleration with CuPy (if available). parallel : bool, optional Use parallel CPU computations. n_jobs : int, optional Number of parallel jobs (if parallel is True). Returns ------- x : array_like The computed solution vector. """ # Choose the backend (NumPy or CuPy) xp = cp if use_gpu and gpu_available else np # Convert A and b to appropriate arrays if use_gpu and gpu_available: if scipy.sparse.isspmatrix(A): A = csr_matrix(A) else: A = cp.asarray(A) b = cp.asarray(b) else: if scipy.sparse.isspmatrix(A): A = A.tocsr() else: A = np.asarray(A) b = np.asarray(b) # Initialize solution and residual if x is None: x = xp.zeros(A.shape[1]) r = b.copy() else: x = xp.asarray(x) r = b - A.dot(x) # Precompute initial quantities s = A.T.dot(r) p = s.copy() gamma = xp.dot(s.T, s) rr = xp.dot(r.T, r) rr0 = rr # Choose the solver routine based on method if method == "lsqr": return _lsqr(A, b, x, r, s, gamma, rr, rr0, maxiter, tol, verbose, damp, use_gpu, parallel, n_jobs, xp) elif method == "rrlsqr": return _rrlsqr(A, b, x, r, s, gamma, rr, rr0, maxiter, tol, verbose, damp, use_gpu, parallel, n_jobs, xp) elif method == "cgls": return _cgls(A, b, x, r, s, gamma, rr, rr0, maxiter, tol, verbose, damp, use_gpu, parallel, n_jobs, xp) elif method == "rrls": return _rrls(A, b, x, r, s, gamma, rr, rr0, maxiter, tol, verbose, damp, use_gpu, parallel, n_jobs, xp) else: raise ValueError("Unknown method: {}".format(method))
def _matrix_multiply(A, v, use_gpu, parallel, n_jobs, xp): """ Helper routine for matrix-vector multiplication with optional GPU or parallel CPU support. """ if use_gpu: v = xp.asarray(v) return A.dot(v) else: if scipy.sparse.isspmatrix(A): return A.dot(v) else: if parallel: result = Parallel(n_jobs=n_jobs, backend='threading')( delayed(xp.dot)(A[i], v) for i in range(A.shape[0]) ) return xp.array(result) else: return A.dot(v) def _lsqr(A, b, x, r, s, gamma, rr, rr0, maxiter, tol, verbose, damp, use_gpu, parallel, n_jobs, xp): """ LSQR solver with proper shape handling. """ # Ensure x and r are column vectors if x is None: x = xp.zeros((A.shape[1], 1)) else: x = xp.asarray(x) if x.ndim == 1: x = x.reshape(-1, 1) if r.ndim == 1: r = r.reshape(-1, 1) # Initialize u and beta u = r.copy() beta = xp.sqrt(float(xp.dot(u.T, u))) if beta > 0: u = u / beta # Initialize v and alpha v = _matrix_multiply(A.T, u, use_gpu, parallel, n_jobs, xp) if v.ndim == 1: v = v.reshape(-1, 1) alpha = xp.sqrt(float(xp.dot(v.T, v))) if alpha > 0: v = v / alpha w = v.copy() phi_bar = beta rho_bar = alpha for i in range(maxiter): if verbose and i % 10 == 0: pg.info("LSQR Iteration:", i, "residual:", rr, "relative:", rr / rr0) # Bidiagonalization u_next = _matrix_multiply(A, v, use_gpu, parallel, n_jobs, xp) if u_next.ndim == 1: u_next = u_next.reshape(-1, 1) u_next = u_next - alpha * u beta = xp.sqrt(float(xp.dot(u_next.T, u_next))) if beta > 0: u = u_next / beta v_next = _matrix_multiply(A.T, u, use_gpu, parallel, n_jobs, xp) if v_next.ndim == 1: v_next = v_next.reshape(-1, 1) v_next = v_next - beta * v alpha = xp.sqrt(float(xp.dot(v_next.T, v_next))) if alpha > 0: v = v_next / alpha # Construct and apply orthogonal transformation rho = xp.sqrt(rho_bar**2 + beta**2) c = rho_bar / rho s = beta / rho theta = s * alpha rho_bar = -c * alpha phi = c * phi_bar phi_bar = s * phi_bar # Update x and w t = phi / rho x = x + t * w w = v - (theta / rho) * w rr = phi_bar**2 if rr / rr0 < tol: break return x.get() if use_gpu and gpu_available else x def _rrlsqr(A, b, x, r, s, gamma, rr, rr0, maxiter, tol, verbose, damp, use_gpu, parallel, n_jobs, xp): """ Regularized LSQR solver with proper shape handling. """ # Ensure x and r are column vectors if x is None: x = xp.zeros((A.shape[1], 1)) else: x = xp.asarray(x) if x.ndim == 1: x = x.reshape(-1, 1) if r.ndim == 1: r = r.reshape(-1, 1) # Initialize u and beta u = r.copy() beta = xp.sqrt(float(xp.dot(u.T, u))) if beta > 0: u = u / beta # Initialize v and alpha with regularization v = _matrix_multiply(A.T, u, use_gpu, parallel, n_jobs, xp) if v.ndim == 1: v = v.reshape(-1, 1) if damp > 0: v = v + damp * x alpha = xp.sqrt(float(xp.dot(v.T, v))) if alpha > 0: v = v / alpha w = v.copy() phi_bar = beta rho_bar = alpha for i in range(maxiter): if verbose and i % 10 == 0: pg.info("RRLSQR Iteration:", i, "residual:", rr, "relative:", rr / rr0) # Bidiagonalization with regularization u_next = _matrix_multiply(A, v, use_gpu, parallel, n_jobs, xp) if u_next.ndim == 1: u_next = u_next.reshape(-1, 1) u_next = u_next - alpha * u beta = xp.sqrt(float(xp.dot(u_next.T, u_next))) if beta > 0: u = u_next / beta v_next = _matrix_multiply(A.T, u, use_gpu, parallel, n_jobs, xp) if v_next.ndim == 1: v_next = v_next.reshape(-1, 1) v_next = v_next - beta * v if damp > 0: v_next = v_next + damp * x alpha = xp.sqrt(float(xp.dot(v_next.T, v_next))) if alpha > 0: v = v_next / alpha # Apply regularization and update solution rho = xp.sqrt(rho_bar**2 + beta**2 + damp**2) c = rho_bar / rho s = beta / rho theta = s * alpha rho_bar = -c * alpha phi = c * phi_bar phi_bar = s * phi_bar # Update x and w t = phi / rho x = x + t * w w = v - (theta / rho) * w rr = phi_bar**2 if rr / rr0 < tol: break return x.get() if use_gpu and gpu_available else x def _cgls(A, b, x, r, s, gamma, rr, rr0, maxiter, tol, verbose, damp, use_gpu, parallel, n_jobs, xp): """ CGLS solver core routine to solve A^T A x = A^T b. """ p = s.copy() for i in range(maxiter): if verbose and i % 10 == 0: pg.info("Iteration:", i, "residual:", rr, "relative:", rr / rr0) q = _matrix_multiply(A, p, use_gpu, parallel, n_jobs, xp) if damp > 0: q += damp * p # Ensure q is a column vector and update solution q = q.reshape(-1, 1) x = x.reshape(-1, 1) alfa = gamma / xp.dot(q.T, q) x += p.reshape(-1, 1) * alfa r -= q * alfa # Update s = A^T r (and include damping if needed) s = _matrix_multiply(A.T, r, use_gpu, parallel, n_jobs, xp) if damp > 0: s += damp * r newgamma = xp.dot(s.T, s) p = s + float(newgamma / gamma) * p gamma = float(newgamma) rr = xp.dot(r.T, r) if rr / rr0 < tol: break return x.get() if use_gpu and gpu_available else x def _rrls(A, b, x, r, s, gamma, rr, rr0, maxiter, tol, verbose, damp, use_gpu, parallel, n_jobs, xp): # Ensure x, r, and s are column vectors if x is None: x = xp.zeros((A.shape[1], 1)) else: x = xp.asarray(x) if x.ndim == 1: x = x.reshape(-1, 1) if r.ndim == 1: r = r.reshape(-1, 1) if s.ndim == 1: s = s.reshape(-1, 1) w = s.copy() for i in range(maxiter): if verbose and i % 10 == 0: pg.info("RRLS Iteration:", i, "residual:", rr, "relative:", rr / rr0) p = _matrix_multiply(A, w, use_gpu, parallel, n_jobs, xp) if p.ndim == 1: p = p.reshape(-1, 1) denom = xp.dot(p.T, p) if xp.isclose(denom, 0.0): break lam = xp.dot(p.T, r) / denom x = x + w * float(lam) # Convert lam to scalar r = r - p * float(lam) s = _matrix_multiply(A.T, r, use_gpu, parallel, n_jobs, xp) if s.ndim == 1: s = s.reshape(-1, 1) if damp > 0: s = s + damp * x w = s rr = float(xp.dot(r.T, r)) if rr / rr0 < tol: break return x.get() if use_gpu and gpu_available else x