LMFIT 与 Scipy 为什么我在最小化中得到不同的结果

LMFIT vs. Scipy Why am I getting different results in minimize

提问人:samman 提问时间:7/25/2023 最后编辑:samman 更新时间:7/25/2023 访问量:171

问:

简而言之,我有我正在尝试拟合的数据,LMFIT 和 Scipy 给出了 2 种不同的解决方案,其中 LMFIT 要差得多。

关于正在发生的事情的一些细节:

“模型”是总体加权平均值。“种群”来自 4 个变量(可调参数)。有 2 个独立的“模型”(n 和 h),但这些模型在最后组合在一起。残差是这两个模型的残差与其各自数据的组合。

假设数学是正确的。那么问题来了,当给定相同的函数、变量、边界和方法时,为什么会出现不同的解决方案。

鉴于此 MVE

import numpy as np
import scipy.optimize as so
from scipy.sparse.linalg import lsmr
from lmfit import minimize,Parameters
from lmfit.printfuncs import report_fit

data_1=[[117.417, 117.423, 117.438, 117.501], [124.16, 124.231, 124.089, 124.1], [115.632, 115.645, 115.828, 115.947], [118.314, 118.317, 118.287, 118.228], [108.407, 108.419, 108.396, 108.564]]
data_2=[[9.05, 9.044, 9.057, 9.079], [9.178, 9.167, 9.16, 9.176], [7.888, 7.893, 7.911, 7.895], [7.198, 7.202, 7.197, 7.213], [7.983, 7.976, 7.979, 8.02]]
def get_populations_lmfit(initial,io):
    k,k1,x,y=initial['kvar'],initial['k1var'],initial['xvar'],initial['yvar']
    kx,k1x=k*x,k1*x
    ky,k1y=k*y,k1*y
    kxy,k1xy=k*x*y,k1*x*y   
    partial_free_concentration_WT=(np.sqrt((k*k1)**2+(8*io*k*k1)+(8*io*k*k1**2))-(k*k1))/(4*(1+k1))
    partial_closed_concentration_WT=(((4*io)+(4*io*k1))/(4*(1+k1)**2))-(partial_free_concentration_WT/(1+k1))
    partial_open_concentration_WT=k1*partial_closed_concentration_WT
    partial_free_concentration_L273A=(np.sqrt((kx*k1x)**2+(8*io*kx*k1x)+(8*io*kx*k1x**2))-(kx*k1x))/(4*(1+k1x))
    partial_closed_concentration_L273A=(((4*io)+(4*io*k1x))/(4*(1+k1x)**2))-(partial_free_concentration_L273A/(1+k1x))
    partial_open_concentration_L273A=k1x*partial_closed_concentration_L273A
    partial_free_concentration_I272A=(np.sqrt((ky*k1y)**2+(8*io*ky*k1y)+(8*io*ky*k1y**2))-(ky*k1y))/(4*(1+k1y))
    partial_closed_concentration_I272A=(((4*io)+(4*io*k1y))/(4*(1+k1y)**2))-(partial_free_concentration_I272A/(1+k1y))
    partial_open_concentration_I272A=k1y*partial_closed_concentration_I272A
    partial_free_concentration_ILAA=(np.sqrt((kxy*k1xy)**2+(8*io*kxy*k1xy)+(8*io*kxy*k1xy**2))-(kxy*k1xy))/(4*(1+k1xy))
    partial_closed_concentration_ILAA=(((4*io)+(4*io*k1xy))/(4*(1+k1xy)**2))-(partial_free_concentration_ILAA/(1+k1xy))
    partial_open_concentration_ILAA=k1xy*partial_closed_concentration_ILAA
    local_chi2=0
    for experimental_shifts_n,experimental_shifts_h in zip(data_1,data_2):
        populations=np.array([[partial_free_concentration_WT,partial_open_concentration_WT,partial_closed_concentration_WT],[partial_free_concentration_L273A,partial_open_concentration_L273A,partial_closed_concentration_L273A],[partial_free_concentration_I272A,partial_open_concentration_I272A,partial_closed_concentration_I272A],[partial_free_concentration_ILAA,partial_open_concentration_ILAA,partial_closed_concentration_ILAA]])
        experimental_shifts_n=(np.array([experimental_shifts_n])/10)*800
        experimental_shifts_h=(np.array([experimental_shifts_h]))*800
        least_squared_fit_n=lsmr(populations/io,experimental_shifts_n,maxiter=10)
        least_squared_fit_h=lsmr(populations/io,experimental_shifts_h,maxiter=10)
        local_chi2+=((least_squared_fit_n[3])**2+least_squared_fit_h[3]**2)
    return local_chi2

def get_populations_scipy(initial,io):
    k,k1,x,y=initial[0],initial[1],initial[2],initial[3]
    kx,k1x=k*x,k1*x
    ky,k1y=k*y,k1*y
    kxy,k1xy=k*x*y,k1*x*y
    partial_free_concentration_WT=(np.sqrt((k*k1)**2+(8*io*k*k1)+(8*io*k*k1**2))-(k*k1))/(4*(1+k1))
    partial_closed_concentration_WT=(((4*io)+(4*io*k1))/(4*(1+k1)**2))-(partial_free_concentration_WT/(1+k1))
    partial_open_concentration_WT=k1*partial_closed_concentration_WT
    partial_free_concentration_L273A=(np.sqrt((kx*k1x)**2+(8*io*kx*k1x)+(8*io*kx*k1x**2))-(kx*k1x))/(4*(1+k1x))
    partial_closed_concentration_L273A=(((4*io)+(4*io*k1x))/(4*(1+k1x)**2))-(partial_free_concentration_L273A/(1+k1x))
    partial_open_concentration_L273A=k1x*partial_closed_concentration_L273A
    partial_free_concentration_I272A=(np.sqrt((ky*k1y)**2+(8*io*ky*k1y)+(8*io*ky*k1y**2))-(ky*k1y))/(4*(1+k1y))
    partial_closed_concentration_I272A=(((4*io)+(4*io*k1y))/(4*(1+k1y)**2))-(partial_free_concentration_I272A/(1+k1y))
    partial_open_concentration_I272A=k1y*partial_closed_concentration_I272A
    partial_free_concentration_ILAA=(np.sqrt((kxy*k1xy)**2+(8*io*kxy*k1xy)+(8*io*kxy*k1xy**2))-(kxy*k1xy))/(4*(1+k1xy))
    partial_closed_concentration_ILAA=(((4*io)+(4*io*k1xy))/(4*(1+k1xy)**2))-(partial_free_concentration_ILAA/(1+k1xy))
    partial_open_concentration_ILAA=k1xy*partial_closed_concentration_ILAA
    local_chi2=0
    for experimental_shifts_n,experimental_shifts_h in zip(data_1,data_2):
        populations=np.array([[partial_free_concentration_WT,partial_open_concentration_WT,partial_closed_concentration_WT],[partial_free_concentration_L273A,partial_open_concentration_L273A,partial_closed_concentration_L273A],[partial_free_concentration_I272A,partial_open_concentration_I272A,partial_closed_concentration_I272A],[partial_free_concentration_ILAA,partial_open_concentration_ILAA,partial_closed_concentration_ILAA]])
        experimental_shifts_n=(np.array([experimental_shifts_n])/10)*800
        experimental_shifts_h=(np.array([experimental_shifts_h]))*800
        least_squared_fit_n=lsmr(populations/io,experimental_shifts_n,maxiter=10)
        least_squared_fit_h=lsmr(populations/io,experimental_shifts_h,maxiter=10)
        local_chi2+=((least_squared_fit_n[3])**2+least_squared_fit_h[3]**2)
    return local_chi2

io=270000
params=Parameters()
params.add('kvar',value=500,min=0,max=np.inf)
params.add('k1var',value=0.02,min=0,max=np.inf)
params.add('xvar',value=7,min=0,max=np.inf)
params.add('yvar',value=30,min=0,max=np.inf)
lmfit_solution=minimize(get_populations_lmfit,params,args=(io,),method='nelder',max_nfev=1000)
scipy_solution=so.minimize(get_populations_scipy,args=(io,), x0=np.array([500,0.02,7,30]),bounds=((0,np.inf),)*4,method='Nelder-Mead',options={'maxiter':1000})
print(report_fit(lmfit_solution))
print(scipy_solution)

您会注意到 scipy 与 lmfit 的解决方案完全不同。不仅如此,scipy 的 chi2 明显优于 lmfit。然而,我不太明白为什么。设置实际上是相同的,条件、边界和方法都是相同的,为什么它们会给出不同的结果?即为什么LMFIT的解决方案要差得多?

现在我知道你可以给 LMFIT 一个残差数组,而不是像我一样的总和,虽然这确实稍微改进了解决方案,但它仍然比 Scipy 差,而且 chi2 要差得多。

python numpy scipy 最小化 lmfit

评论

0赞 Reinderien 7/25/2023
这两个结果是否都有效(除了具有不同的最优水平)?
1赞 M Newville 7/25/2023
我没有看到任何证据表明结果不同。这是从哪里给出的?我确实看到被描述为 MVE 的不可读代码。看起来您正在遍历数据,并在标量目标函数内执行一堆迭代最小二乘拟合,以由 scipy 或 lmfit 求解。在迭代求解器中使用迭代求解器对我来说似乎很奇怪。
0赞 samman 7/25/2023
@Reinderien 对于最小的可验证示例,是的。答案是不同的,但并不明显。在我自己更复杂的数据集中,情况并非如此(lmfit 给出了截然不同的解决方案)。然而,根据我的理解,lmfit 只是一个 scipy 包装器,因此 scipy 函数(例如 minimize 及其方法)在 lmfit 中的工作方式应该与在 scipy 中相同。因此,如果 lmfit 和 scipy 都存在相同的问题。您应该得到相同的解决方案。在上述情况下,虽然你得到相似的答案,但它们并不完全相同。
0赞 samman 7/25/2023
我担心的是,我是否误解了lmfit的某些事情?我设置不正确吗?尽管所有条件/方法都相同,但为什么我的解决方案并不完全相同?以上 2 个函数的唯一区别是,一个使用 scipy,一个使用 lmfit。其他一切都是一样的。我想确保 lmfit 工作正常,然后再进一步用 lmfit 替换 scipy(这是我简化的测试用例,以确保我可以使用 lmfit 获得与使用 scipy 相同的答案,这样我就可以使用 lmfit 来优化我的解决方案)
0赞 M Newville 7/25/2023
但你没有证明它们是不同的。事实上,你已经显示了零输出,所以你没有显示任何东西。您的脚本甚至具有用于 lmfit fit_report 和 scipy 解决方案输出的 print() 函数,而您不显示它们。你说“你会注意到 scipy 和 lmfit 的解决方案完全不同......”。不,我没有注意到这一点。

答:

0赞 Reinderien 7/25/2023 #1

我认为你问错了问题。 在这里没有优势,所以简单的决定就是废弃它,直接使用scipy;但是 - 你当前的代码被诅咒了,所以应该修复它。矢量化,分解出常用表达式,并简化为单个内部调用。如下图所示,新旧方法在数值上等效于 ~13 位有效数字,但新方法会更快,调试更简单。在许多其他原因中,您的旧代码经历了许多内部线性拟合的所有麻烦,但总是丢弃结果。lmfitlstsq

import numpy as np
import scipy.optimize as so

data_1 = np.array([
    [117.417, 117.423, 117.438, 117.501],
    [124.160, 124.231, 124.089, 124.100],
    [115.632, 115.645, 115.828, 115.947],
    [118.314, 118.317, 118.287, 118.228],
    [108.407, 108.419, 108.396, 108.564],
])
data_2 = np.array([
    [9.050, 9.044, 9.057, 9.079],
    [9.178, 9.167, 9.160, 9.176],
    [7.888, 7.893, 7.911, 7.895],
    [7.198, 7.202, 7.197, 7.213],
    [7.983, 7.976, 7.979, 8.020],
])


def setup_for_chi(initial: np.ndarray, io: float) -> tuple[
    np.ndarray,
    np.ndarray,
    np.ndarray,
]:
    k, k1, x, y = initial
    xy1 = np.array((1, x, y, x*y))
    k4 = k*xy1
    k14 = k1*xy1

    partial_free_concentration = (
        np.sqrt(
            (k4*k14)**2
            + 8*io*k4*k14*(1 + k14)
        ) - k4*k14
    )/(4*(1 + k14))

    partial_closed_concentration = (
        4*io + 4*io*k14
    )/(
        4*(1 + k14)**2
    ) - partial_free_concentration/(1 + k14)

    partial_open_concentration = k14 * partial_closed_concentration

    populations = np.stack((
        partial_free_concentration,
        partial_open_concentration,
        partial_closed_concentration,
    ), axis=1)

    return (
        np.broadcast_to(populations/io, (len(data_1), *populations.shape)),
        data_1*80,
        data_2*800,
    )


def fit_populations(initial: np.ndarray, io: float) -> tuple[np.ndarray, float]:
    populations_io, experimental_shifts_n, experimental_shifts_h = setup_for_chi(initial, io)

    # 4x3
    A = populations_io[0, ...]

    # 4x10
    b = np.vstack((
        experimental_shifts_n,
        experimental_shifts_h,
    )).T

    x, chi2, rank, s = np.linalg.lstsq(a=A, b=b, rcond=None)
    return x, chi2.sum()


def get_populations(initial: np.ndarray, io: float) -> float:
    x, chi2 = fit_populations(initial, io)
    return chi2


def get_populations_old(initial,io, debug=False):
    from scipy.sparse.linalg import lsmr
    k,k1,x,y=initial[0],initial[1],initial[2],initial[3]
    kx,k1x=k*x,k1*x
    ky,k1y=k*y,k1*y
    kxy,k1xy=k*x*y,k1*x*y
    partial_free_concentration_WT=(np.sqrt((k*k1)**2+(8*io*k*k1)+(8*io*k*k1**2))-(k*k1))/(4*(1+k1))
    partial_closed_concentration_WT=(((4*io)+(4*io*k1))/(4*(1+k1)**2))-(partial_free_concentration_WT/(1+k1))
    partial_open_concentration_WT=k1*partial_closed_concentration_WT
    partial_free_concentration_L273A=(np.sqrt((kx*k1x)**2+(8*io*kx*k1x)+(8*io*kx*k1x**2))-(kx*k1x))/(4*(1+k1x))
    partial_closed_concentration_L273A=(((4*io)+(4*io*k1x))/(4*(1+k1x)**2))-(partial_free_concentration_L273A/(1+k1x))
    partial_open_concentration_L273A=k1x*partial_closed_concentration_L273A
    partial_free_concentration_I272A=(np.sqrt((ky*k1y)**2+(8*io*ky*k1y)+(8*io*ky*k1y**2))-(ky*k1y))/(4*(1+k1y))
    partial_closed_concentration_I272A=(((4*io)+(4*io*k1y))/(4*(1+k1y)**2))-(partial_free_concentration_I272A/(1+k1y))
    partial_open_concentration_I272A=k1y*partial_closed_concentration_I272A
    partial_free_concentration_ILAA=(np.sqrt((kxy*k1xy)**2+(8*io*kxy*k1xy)+(8*io*kxy*k1xy**2))-(kxy*k1xy))/(4*(1+k1xy))
    partial_closed_concentration_ILAA=(((4*io)+(4*io*k1xy))/(4*(1+k1xy)**2))-(partial_free_concentration_ILAA/(1+k1xy))
    partial_open_concentration_ILAA=k1xy*partial_closed_concentration_ILAA
    local_chi2=0
    for experimental_shifts_n,experimental_shifts_h in zip(data_1,data_2):
        populations=np.array([[partial_free_concentration_WT,partial_open_concentration_WT,partial_closed_concentration_WT],[partial_free_concentration_L273A,partial_open_concentration_L273A,partial_closed_concentration_L273A],[partial_free_concentration_I272A,partial_open_concentration_I272A,partial_closed_concentration_I272A],[partial_free_concentration_ILAA,partial_open_concentration_ILAA,partial_closed_concentration_ILAA]])
        experimental_shifts_n=(np.array([experimental_shifts_n])/10)*800
        experimental_shifts_h=(np.array([experimental_shifts_h]))*800
        least_squared_fit_n=lsmr(populations/io,experimental_shifts_n,maxiter=10)
        least_squared_fit_h=lsmr(populations/io,experimental_shifts_h,maxiter=10)
        if debug:
            print(least_squared_fit_n[0])
            print(least_squared_fit_h[0])
        local_chi2+=((least_squared_fit_n[3])**2+least_squared_fit_h[3]**2)
    return local_chi2


io = 270_000


def new_optimize() -> None:
    print('New:')
    solution = so.minimize(
        fun=get_populations,
        args=(io,),
        x0=np.array([500, 0.02, 7, 30]),
        bounds=((0, np.inf),)*4,
        options={'maxiter': 1_000},
    )
    assert solution.success, solution.message
    print('Initial:', solution.x)
    x, chi2 = fit_populations(solution.x, io)
    print('Residual:', chi2)


def regression_test() -> None:
    popio, exp_n, exp_h = setup_for_chi(initial=np.array([500, 0.02, 7, 30]), io=io)

    assert len(popio) == 5

    for pop in popio:
        assert np.allclose(
            pop,
            np.array([
                [0.00425185, 0.01952447, 0.97622368],
                [0.02781779, 0.11939080, 0.85279142],
                [0.09698655, 0.33863005, 0.56438341],
                [0.32547629, 0.54480761, 0.1297161]],
            ),
        )

    assert np.allclose(
        np.vstack(exp_n),
        np.array([
            [9393.36, 9393.84, 9395.04, 9400.08],
            [9932.8 , 9938.48, 9927.12, 9928.  ],
            [9250.56, 9251.6 , 9266.24, 9275.76],
            [9465.12, 9465.36, 9462.96, 9458.24],
            [8672.56, 8673.52, 8671.68, 8685.12],
        ]),
    )
    assert np.allclose(
        np.vstack(exp_h),
        np.array([
            [7240. , 7235.2, 7245.6, 7263.2],
            [7342.4, 7333.6, 7328. , 7340.8],
            [6310.4, 6314.4, 6328.8, 6316. ],
            [5758.4, 5761.6, 5757.6, 5770.4],
            [6386.4, 6380.8, 6383.2, 6416. ],
        ]),
    )


def old_optimize() -> None:
    scipy_solution = so.minimize(get_populations_old, args=(io,), x0=np.array([500, 0.02, 7, 30]),
                                 bounds=((0, np.inf),) * 4, method='Nelder-Mead',
                                 options={'maxiter': 1000})
    assert scipy_solution.success, scipy_solution.message
    print('OLD -')
    print('Initial:', scipy_solution.x)
    print('Residual:', scipy_solution.fun)
    print('Inner factor:')
    get_populations_old(scipy_solution.x, io, debug=True)
    x, chi2 = fit_populations(scipy_solution.x, io)
    print('chi2 from new, to compare:', chi2)
    print('Inner factor from new:')
    print(x)
    print()


if __name__ == '__main__':
    regression_test()
    old_optimize()
    new_optimize()
OLD -
Initial: [8.43488198e+02 3.73287986e-02 1.24710782e+00 5.87182669e+00]
Residual: 61.1805732870818
Inner factor:
[13574.53212153  8420.98162826  9397.58024441]
[22773.61024014  3634.75940246  7252.0978788 ]
[11112.84033704  9604.77003648  9938.98540985]
[23099.71790687  3549.89959355  7358.35542221]
[14598.2687031   8102.07994522  9252.20266122]
[-9605.44078682 10177.17124497  6290.34488533]
[ 5574.63331111 10364.43102419  9461.76279142]
[17134.2141557   3071.33941429  5772.55420423]
[20950.90064132  5776.67678267  8686.4137172 ]
[37642.15814366  -977.72464129  6417.34277753]
chi2 from new, to compare: 61.18057328708239
Inner factor from new:
[[13574.53212153 11112.84033703 14598.26870309  5574.63331116
  20950.90064131 22773.61024013 23099.71790684 -9605.44078684
  17134.21415572 37642.15814367]
 [ 8420.98162821  9604.77003642  8102.07994516 10364.43102393
   5776.6767826   3634.75940237  3549.89959343 10177.17124489
   3071.33941434  -977.72464127]
 [ 9397.58024442  9938.98540985  9252.20266123  9461.76279145
   8686.4137172   7252.09787881  7358.35542223  6290.34488534
   5772.55420422  6417.34277752]]

New:
Initial: [4.99999627e+02 1.02141914e-01 1.65423176e+00 3.11581574e+01]
Residual: 61.18057338613728

评论

0赞 samman 7/25/2023
感谢您的改进,但 lmfit 确实有很多改进。最直接的是,使用不同的方法和输出进行错误分析非常简单。如果我想在不同的方法之间切换,例如示例中的 Nelder-Mead 或 LM,我需要更改输出。更不用说 numdifftools 的整个设置,用于各种统计信息和错误分析,这些设置已经内置在 lmfit 中。我确实喜欢矢量化,肯定会清理它,但我对回归测试的目的是什么有点困惑?
0赞 Reinderien 7/25/2023
回归测试的目的是什么?- 这样,当我改变机具站时,我可以保证等效性(你也应该这样做)。
0赞 Reinderien 7/25/2023
如果我想在不同的方法之间切换,例如示例中的 Nelder-Mead 或 LM,我需要更改输出 - 哪个输出?更改方法非常简单,只需将一个字符串参数更改为 。minimize
0赞 samman 7/25/2023
最小化没有最小二乘法(如 Levenberg-Marquardt)。我需要更改为最小二乘法,并且必须更改格式。使用全局求解器(如双退火或盆地跳跃)也是如此(当然这些都是很小的更改,但有一个函数可以完成所有操作真的很方便)。lmfit 将最小化、全局求解器和最小二乘法全部包装为一个。相当干净和方便