Atkeson said the original neural networks were just doing automatic differentiation, which I still don't understand…
param N; param x_0; param y_0; param vx_0; param vy_0; param x_n; param y_n; param vx_n; param vy_n; param cL_0; param cL_n; param cL_min; param cL_max; param c0; param k; param S; param rho; param m; param g; param W := m*g; var T >= 0; # State Variables var x {i in 0..N}; var y {i in 0..N}; # Control variables var cL {i in 1..N-1} >= cL_min, <= cL_max; # Abbreviations var vx {i in 0..N-1} = N*(x[i+1]-x[i])/T; var vy {i in 0..N-1} = N*(y[i+1]-y[i])/T; var ax {i in 1..N-1} = N*(vx[i]-vx[i-1])/T; var ay {i in 1..N-1} = N*(vy[i]-vy[i-1])/T; var cD {i in 1..N-1} = c0+k*cL[i]^2; var Vx {i in 1..N-1} = (vx[i]+vx[i-1])/2; var Vy {i in 1..N-1} = (vy[i]+vy[i-1])/2; var vr {i in 1..N-1} = sqrt(((vx[i]+vx[i-1])/2)^2 + Vy[i]^2); var D {i in 1..N-1} = .5*cD[i]*rho*S*vr[i]^2; var L {i in 1..N-1} = .5*cL[i]*rho*S*vr[i]^2; var sin_eta {i in 1..N-1} = Vy[i]/vr[i]; var cos_eta {i in 1..N-1} = Vx[i]/vr[i]; maximize final_x: x[N]; s.t. newt_x{i in 1..N-1}: ax[i] = (-L[i]*sin_eta[i] - D[i]*cos_eta[i])/m; s.t. newt_y{i in 1..N-1}: ay[i] = (L[i]*cos_eta[i] - D[i]*sin_eta[i] - W)/m; s.t. novomit_x {i in 1..N-1}: -3 <= ax[i] <= 3; s.t. novomit_y {i in 1..N-1}: -3 <= ay[i] <= 3; # Boundary Conditions s.t. x_ic : x[0] = x_0; s.t. y_ic : y[0] = y_0; s.t. vx_ic: vx[0] = vx_0; s.t. vy_ic: vy[0] = vy_0; s.t. y_fc : y[N] = y_n; s.t. vx_fc: vx[N-1] = vx_n; s.t. vy_fc: vy[N-1] = vy_n; # Data which needs to be reinitialized data; param N := 70; param x_0 := 0; param y_0 := 1000; param vx_0 := 13.23; param vy_0 := -1.29; param y_n := 900; param vx_n := 13.23; param vy_n := -1.29; param cL_min := 0; param cL_max := 1.4; param c0 := 0.034; param k := 0.069662; param S := 14; param rho := 1.13; param m := 100; param g := 9.80665; # initial guess let {j in 0..N} x[j] := 5000*j/N; let {j in 0..N} y[j] := -100*j/N+1000; let {j in 1..N-1} cL[j] := .7; let T := 30; solve;