Implicit Autodiff Primitive Rules

Created with ❤️ by Machine Learning & Simulation.

Follow @felix_m_koehler

Nonlinear system solving also refers to implicit autodiff of general optimality criterion problems, see also this paper.

Primitive Primal Pushforward/Jvp Pullback/vJp
Discrete Problems
Scalar Root-Finding $ x = \left\{ \text{solve} \; g(x, \theta) \; \text{for} \; x\right\} $ $ \dot{x} = - \frac{ \frac{\partial g}{\partial \theta} }{ \frac{\partial g}{\partial x} } \dot{\theta} $ $ \bar{\theta} = - \bar{x} \frac{ \frac{\partial g}{\partial \theta} }{ \frac{\partial g}{\partial x} } $
Linear System Solving $ \mathbf{x} = \left\{ \text{solve} \; \mathbf{A} \mathbf{x} = \mathbf{b} \; \text{for} \; \mathbf{x} \right\} $ $ \begin{align} \mathbf{d} &= \dot{\mathbf{b}} - \dot{\mathbf{A}} \mathbf{x} \\ \dot{\mathbf{x}} &= \left\{ \text{solve} \; \mathbf{A} \dot{\mathbf{x}} = \mathbf{d} \; \text{for} \; \dot{\mathbf{x}} \right\} \end{align} $ $ \begin{align} \mathbf{\lambda} &= \left\{ \text{solve} \; \mathbf{A}^T \mathbf{\lambda} = \bar{\mathbf{x}} \; \text{for} \; \mathbf{\lambda} \right\} \\ \bar{\mathbf{b}} &= \mathbf{\lambda} \\ \bar{\mathbf{A}} &= - \mathbf{\lambda} \mathbf{x}^T \end{align} $
Nonlinear System Solving $ \mathbf{x} = \left\{ \text{solve} \; \mathbf{g}(\mathbf{x}, \theta) = \mathbf{0} \; \text{for} \; \mathbf{x} \right\} $ $ \begin{align} \mathbf{d} &= - \frac{\partial \mathbf{g}}{\partial \mathbf{\theta}} \dot{\mathbf{\theta}} \\ \dot{\mathbf{x}} &= \left\{ \text{solve} \; \frac{\partial \mathbf{g}}{\partial \mathbf{x}} \dot{\mathbf{x}} = \mathbf{d} \; \text{for} \; \dot{\mathbf{x}} \right\} \end{align} $ $ \begin{align} \mathbf{\lambda} &= \left\{ \text{solve} \; \left( \frac{\partial \mathbf{g}}{\partial \mathbf{x}}\right)^T \mathbf{\lambda} = \bar{\mathbf{x}} \; \text{for} \; \mathbf{\lambda} \right\} \\ \bar{\mathbf{\theta}} &= - \left( \frac{\partial \mathbf{g}}{\partial \mathbf{\theta}} \right)^T \mathbf{\lambda} \end{align} $
Fixed-Point Iteration $ \mathbf{x} = \left\{ \text{iterate} \; \mathbf{x} \leftarrow \mathbf{\phi}(\mathbf{x}, \theta) \; \text{until convergence} \right\} $ $ \begin{align} \mathbf{d} &= - \frac{\partial \mathbf{\phi}}{\partial \mathbf{\theta}} \dot{\mathbf{\theta}} \\ \dot{\mathbf{x}} &= \left\{ \text{solve} \; \left(\mathbf{I} - \frac{\partial \mathbf{\phi}}{\partial \mathbf{x}}\right) \dot{\mathbf{x}} = \mathbf{d} \; \text{for} \; \dot{\mathbf{x}} \right\} \end{align} $ $ \begin{align} \mathbf{\lambda} &= \left\{ \text{solve} \; \left( \mathbf{I} - \frac{\partial \mathbf{\phi}}{\partial \mathbf{x}} \right)^T \mathbf{\lambda} = \bar{\mathbf{x}} \; \text{for} \; \mathbf{\lambda} \right\} \\ \bar{\mathbf{\theta}} &= - \left( \frac{\partial \mathbf{\phi}}{\partial \mathbf{\theta}} \right)^T \mathbf{\lambda} \end{align} $
Picard-Iteration $ \mathbf{x} = \left\{ \text{iterate} \; \mathbf{x} \leftarrow \mathbf{A}(x)^{-1}\mathbf{b}(\mathbf{\theta}) \; \text{until convergence} \right\} $ $ \begin{align} \mathbf{d} &= \frac{\partial \mathbf{b}}{\partial \mathbf{\theta}} \dot{\mathbf{\theta}} \\ \dot{\mathbf{x}} &= \left\{ \text{solve} \; \left(\frac{\partial \mathbf{A(\mathbf{x})\mathbf{x}}}{\partial \mathbf{x}}\right) \dot{\mathbf{x}} = \mathbf{d} \; \text{for} \; \dot{\mathbf{x}} \right\} \end{align} $ $ \begin{align} \mathbf{\lambda} &= \left\{ \text{solve} \; \left( \frac{\partial \mathbf{A(\mathbf{x})\mathbf{x}}}{\partial \mathbf{x}} \right)^T \mathbf{\lambda} = \bar{\mathbf{x}} \; \text{for} \; \mathbf{\lambda} \right\} \\ \bar{\mathbf{\theta}} &= \left( \frac{\partial \mathbf{b}}{\partial \mathbf{\theta}} \right)^T \mathbf{\lambda} \end{align} $
ODE Problems
Neural ODEs (Final Time Integration) $ \mathbf{u} = \left\{ \text{integrate} \; \frac{d \mathbf{u}}{d t} = \mathbf{f}(\mathbf{u}, \mathbf{\theta}) \; \text{with} \; \mathbf{u}|_{t=0} = \mathbf{u}_0 \; \text{to} \; \mathbf{u}|_{t=T} \right\} $ $ \begin{align} \dot{\mathbf{u}}_\theta &= \left\{ \text{integrate} \; \frac{d \dot{\mathbf{u}}_\theta}{d t} = \frac{\partial \mathbf{f}}{\partial \mathbf{u}} \dot{\mathbf{u}}_{\theta} + \frac{\partial \mathbf{f}}{\partial \mathbf{\theta}} \dot{\mathbf{\theta}} \; \text{with} \; \dot{\mathbf{u}}_\theta|_{t=0} = \mathbf{0} \; \text{to} \; \dot{\mathbf{u}}_\theta|_{t=T} \right\} \\ \dot{\mathbf{u}}_{u_0} &= \left\{ \text{integrate} \; \frac{d \dot{\mathbf{u}}_{u_0}}{d t} = \frac{\partial \mathbf{f}}{\partial \mathbf{u}} \dot{\mathbf{u}}_{u_0} \; \text{with} \; \dot{\mathbf{u}}_{u_0}|_{t=0} = \dot{\mathbf{u}}_0 \; \text{to} \; \dot{\mathbf{u}}_{u_0}|_{t=T} \right\} \\ \dot{\mathbf{u}}_T &= \mathbf{f}(\mathbf{u}(T), \mathbf{\theta}) \cdot \dot{T} \\ \dot{\mathbf{u}} &= \dot{\mathbf{u}}_\theta + \dot{\mathbf{u}}_{u_0} + \dot{\mathbf{u}}_T \end{align} $ $ \begin{align} \mathbf{\lambda}(t) &= \left\{ \text{integrate} \; \frac{d \mathbf{\lambda}}{d t} = - \left( \frac{\partial \mathbf{f}}{\partial \mathbf{u}} \right)^T \mathbf{\lambda} \; \text{with} \; \mathbf{\lambda}|_{t=T} = \bar{\mathbf{u}}|_{t=T} \; \text{over} \; t \in (T, 0) \right\} \\ \bar{\mathbf{\theta}} &= \langle \mathbf{\lambda}, \frac{\partial \mathbf{f}}{\partial \mathbf{\theta}} \rangle \\ \bar{\mathbf{u}}_0 &= \mathbf{\lambda} |_{t=0} \\ \bar{T} &= \bar{\mathbf{u}}(T)^T \mathbf{f}(\mathbf{u}(T), \mathbf{\theta}) \end{align} $
Full ODE Integration $ \mathbf{u}(t) = \left\{ \text{integrate} \; \frac{d \mathbf{u}}{d t} = \mathbf{f}(\mathbf{u}, \mathbf{\theta}) \; \text{with} \; \mathbf{u}|_{t=0} = \mathbf{u}_0 \; \text{over} \; t \in (0, T) \right\} $ $ \begin{align} \dot{\mathbf{u}}_\theta(t) &= \left\{ \text{integrate} \; \frac{d \dot{\mathbf{u}}_\theta}{d t} = \frac{\partial \mathbf{f}}{\partial \mathbf{u}} \dot{\mathbf{u}}_{\theta} + \frac{\partial \mathbf{f}}{\partial \mathbf{\theta}}\dot{\mathbf{\theta}} \; \text{with} \; \dot{\mathbf{u}}_\theta|_{t=0} = \mathbf{0} \; \text{over} \; t \in (0, T) \right\} \\ \dot{\mathbf{u}}_{u_0}(t) &= \left\{ \text{integrate} \; \frac{d \dot{\mathbf{u}}_{u_0}}{d t} = \frac{\partial \mathbf{f}}{\partial \mathbf{u}} \dot{\mathbf{u}}_{u_0} \; \text{with} \; \dot{\mathbf{u}}_{u_0}|_{t=0} = \dot{\mathbf{u}}_0 \; \text{over} \; t \in (0, T) \right\} \\ \dot{\mathbf{u}}(t) &= \dot{\mathbf{u}}_\theta(t) + \dot{\mathbf{u}}_{u_0}(t) \end{align} $ $ \begin{align} \mathbf{\lambda}(t) &= \left\{ \text{integrate} \; \frac{d \mathbf{\lambda}}{d t} = - \left( \frac{\partial \mathbf{f}}{\partial \mathbf{u}} \right)^T \mathbf{\lambda} - \bar{\mathbf{u}}(t) \; \text{with} \; \mathbf{\lambda}|_{t=T} = \mathbf{0} \; \text{over} \; t \in (T, 0) \right\} \\ \bar{\mathbf{\theta}} &= \langle \mathbf{\lambda}, \frac{\partial \mathbf{f}}{\partial \mathbf{\theta}} \rangle \\ \bar{\mathbf{u}}_0 &= \mathbf{\lambda} |_{t=0} \end{align} $
PDE Problems
Poisson with Dirichlet BC TODO