使用 numpy 进行多项式外推

polynomial extrapolation using numpy

提问人:MRR 提问时间:6/14/2023 更新时间:6/16/2023 访问量:95

问:

我有一个信号,我希望先使用多项式拟合进行拟合,然后使用该多项式表达式进行外推。意思是,最初,我的 x 轴范围是 192.1-196THz。对于这个范围,我有相应的 y 轴值。但是我想绘制 191.325-196.125 THz 的信号,这意味着我需要预测原始 x 轴范围之外的 y 轴值。这是我的代码

import numpy as np
import matplotlib.pyplot as plt
import numpy.polynomial.polynomial as poly




y=[9.996651009,10.13327408,10.27003353,10.43754263,10.60523071,10.71136134,10.81753489,10.92299416,11.02853271,11.10093835,11.17349619,11.16565259,11.15787539,11.16191795,11.16626826,11.16087842,11.15561384,11.16940196,11.18346164,11.22434472,11.26537218,11.35981126,11.45427174,11.6102862,11.76633923,11.89058563,12.01486743,12.21133227,12.40782023,12.55682327,12.7059072,12.88910065,13.07234341,13.20139753,13.33048759,13.46960371,13.60875466,13.71956075,13.83042354,13.91157425,13.99277241,14.08548465,14.17824161,14.26091536,14.34364144,14.43504833,14.52651725,14.59548578,14.66449439,14.73751377,14.81058218,14.88687742,14.96322511,15.038278,15.11342222,15.1644832,15.21556914,15.31798775,15.42044285,15.51000031,15.59959896,15.68089263,15.76229354,15.86213633,15.96214484,16.0269976,16.0923806,16.16483146,16.23762336,16.24233076,16.24719576,16.26116812,16.27543692,16.28311024,16.29128992,16.23458771,16.17830522,16.12636211,16.07478778]

y.reverse()


# x axis for polyfit
i = 192.1e12
x=[]
while i <= 196e12:
  x.append(i)
  i += 50e9

plt.plot(x, y)


# x axis for extrapolation
i1= 191.325e12
x2=[]
while i1 <= 196.125e12:

  x2.append(i1)
  i1 += 50e9


z = poly.polyfit(x, y, 20)
f = poly.polyval(x, z)

plt.plot(x2, f(x2), linestyle='--')

问题:

1. 我收到的错误:“numpy.ndarray”对象不可调用。(第 37 行) 我找不到解决方法。

2. 如果我将超过 21 作为系数数,那么我会遇到以下问题:SVD 没有收敛于线性最小二乘法。(第 34 行) 我基本上不知道这个问题。

谁能给我一些想法? 谢谢。

numpy numpy-ndarray svd 收敛

评论

0赞 Nick ODell 6/14/2023
If I put more than 21 as the number of coefficients then I run into the following problem: SVD did not converge有没有充分的理由期望一个真正高的多项式来提供良好的拟合?我敢打赌,一旦你稍微移出已知值的区域,多项式就会发疯。
0赞 MRR 6/15/2023
@NickODell多项式确实会发疯。但是,我确实想知道我一般收到的这条错误消息,并且我还想比较不同多项式拟合阶数的精度。

答:

2赞 jared 6/14/2023 #1

外推是不准确的,但你可以使用 scipy.interpolate.interp1d 来做到这一点,方法是告诉它不要给出边界误差 () 并通过外推 () 填充值。在我的代码中,我使用了三次样条拟合。此外,利用 numpy 函数生成数组。bounds_error=Falsefill_value="extrapolation"x

import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import interp1d

plt.close("all")


y = [9.996651009, 10.13327408, 10.27003353, 10.43754263, 10.60523071, 
     10.71136134, 10.81753489, 10.92299416, 11.02853271, 11.10093835, 
     11.17349619, 11.16565259, 11.15787539, 11.16191795, 11.16626826, 
     11.16087842, 11.15561384, 11.16940196, 11.18346164, 11.22434472, 
     11.26537218, 11.35981126, 11.45427174, 11.6102862, 11.76633923, 
     11.89058563, 12.01486743, 12.21133227, 12.40782023, 12.55682327, 
     12.7059072, 12.88910065, 13.07234341, 13.20139753, 13.33048759, 
     13.46960371, 13.60875466, 13.71956075, 13.83042354, 13.91157425, 
     13.99277241, 14.08548465, 14.17824161, 14.26091536, 14.34364144, 
     14.43504833, 14.52651725, 14.59548578, 14.66449439, 14.73751377, 
     14.81058218, 14.88687742, 14.96322511, 15.038278, 15.11342222, 
     15.1644832, 15.21556914, 15.31798775, 15.42044285, 15.51000031, 
     15.59959896, 15.68089263, 15.76229354, 15.86213633, 15.96214484, 
     16.0269976, 16.0923806, 16.16483146, 16.23762336, 16.24233076, 
     16.24719576, 16.26116812, 16.27543692, 16.28311024, 16.29128992, 
     16.23458771, 16.17830522, 16.12636211, 16.07478778]
y = np.array(y)[::-1]

step = 50e9
x = np.arange(192.1e12, 196e12+step, step)
x2 = np.arange(191.325e12, 196.125e12+step, step)


f = interp1d(x, y, kind="cubic", bounds_error=False, fill_value="extrapolate")

fig, ax = plt.subplots()
ax.plot(x, y, ".", label="data")
ax.plot(x2, f(x2), "--", label="fit")
ax.legend()
ax.set_xlabel("x")
ax.set_ylabel("y")
fig.show()

enter image description here


编辑:如果您确实需要多项式拟合,请使用 .这将返回一个可以计算的函数,与此不同的是,该函数仅返回值(因此您得到的错误)。np.poly1dpolyval

import numpy as np
import matplotlib.pyplot as plt

plt.close("all")

y = [9.996651009, 10.13327408, 10.27003353, 10.43754263, 10.60523071, 
     10.71136134, 10.81753489, 10.92299416, 11.02853271, 11.10093835, 
     11.17349619, 11.16565259, 11.15787539, 11.16191795, 11.16626826, 
     11.16087842, 11.15561384, 11.16940196, 11.18346164, 11.22434472, 
     11.26537218, 11.35981126, 11.45427174, 11.6102862, 11.76633923, 
     11.89058563, 12.01486743, 12.21133227, 12.40782023, 12.55682327, 
     12.7059072, 12.88910065, 13.07234341, 13.20139753, 13.33048759, 
     13.46960371, 13.60875466, 13.71956075, 13.83042354, 13.91157425, 
     13.99277241, 14.08548465, 14.17824161, 14.26091536, 14.34364144, 
     14.43504833, 14.52651725, 14.59548578, 14.66449439, 14.73751377, 
     14.81058218, 14.88687742, 14.96322511, 15.038278, 15.11342222, 
     15.1644832, 15.21556914, 15.31798775, 15.42044285, 15.51000031, 
     15.59959896, 15.68089263, 15.76229354, 15.86213633, 15.96214484, 
     16.0269976, 16.0923806, 16.16483146, 16.23762336, 16.24233076, 
     16.24719576, 16.26116812, 16.27543692, 16.28311024, 16.29128992, 
     16.23458771, 16.17830522, 16.12636211, 16.07478778]
y = np.array(y)[::-1]

step = 50e9
x = np.arange(192.1e12, 196e12+step, step)
x2 = np.arange(191.325e12, 196.125e12+step, step)

z = np.polyfit(x, y, 20)
p = np.poly1d(z)

fig, ax = plt.subplots()
ax.plot(x, y, ".", label="data")
ax.plot(x2, p(x2), "--", label="fit")
ax.legend()
ax.set_xlabel("x")
ax.set_ylabel("y")
fig.show()

enter image description here


编辑:使用更新的方法,我们使用 5 阶多项式拟合得到类似的结果。np.polynomial.Polynomial

import numpy as np
import matplotlib.pyplot as plt
from numpy.polynomial import Polynomial

plt.close("all")

y = [9.996651009, 10.13327408, 10.27003353, 10.43754263, 10.60523071, 
     10.71136134, 10.81753489, 10.92299416, 11.02853271, 11.10093835, 
     11.17349619, 11.16565259, 11.15787539, 11.16191795, 11.16626826, 
     11.16087842, 11.15561384, 11.16940196, 11.18346164, 11.22434472, 
     11.26537218, 11.35981126, 11.45427174, 11.6102862, 11.76633923, 
     11.89058563, 12.01486743, 12.21133227, 12.40782023, 12.55682327, 
     12.7059072, 12.88910065, 13.07234341, 13.20139753, 13.33048759, 
     13.46960371, 13.60875466, 13.71956075, 13.83042354, 13.91157425, 
     13.99277241, 14.08548465, 14.17824161, 14.26091536, 14.34364144, 
     14.43504833, 14.52651725, 14.59548578, 14.66449439, 14.73751377, 
     14.81058218, 14.88687742, 14.96322511, 15.038278, 15.11342222, 
     15.1644832, 15.21556914, 15.31798775, 15.42044285, 15.51000031, 
     15.59959896, 15.68089263, 15.76229354, 15.86213633, 15.96214484, 
     16.0269976, 16.0923806, 16.16483146, 16.23762336, 16.24233076, 
     16.24719576, 16.26116812, 16.27543692, 16.28311024, 16.29128992, 
     16.23458771, 16.17830522, 16.12636211, 16.07478778]
y = np.array(y)[::-1]

step = 50e9
x = np.arange(192.1e12, 196e12+step, step)
x2 = np.arange(191.325e12, 196.125e12+step, step)

p = Polynomial.fit(x, y, 5)

fig, ax = plt.subplots()
ax.plot(x, y, ".", label="data")
ax.plot(x2, p(x2), "--", label="fit")
ax.legend()
ax.set_xlabel("x")
ax.set_ylabel("y")
fig.show()

enter image description here

评论

0赞 MRR 6/14/2023
谢谢你的详细解释。但是,文档 (numpy.org/doc/stable/reference/routines.polynomials.html) 明确指出要避免使用 np.polyfit、np.polyval 和 np.poly1d,而只使用 new(er) 包。这就是为什么我试图避免np.poly1d。
0赞 jared 6/14/2023
我已经更新了我的答案,以包含使用 .np.polynomial.Polynomial
0赞 jared 6/15/2023
现在@MRR这回答了您的问题?
0赞 MRR 6/15/2023
感谢您的修改回答。它回答了我的第一个问题。但是,我正在等待第二个答案。
0赞 jared 6/16/2023
根据文档,“请注意,当多项式的阶数很大时,拟合多项式系数本质上是不良条件的。它建议使用来改善拟合度,根据我的测试,拟合度实际上有所改善,但外推更差。x - x.mean()
0赞 Nick ODell 6/16/2023 #2

的确,多项式会发疯。但是,我确实想知道我一般收到的此错误消息,并且我还想比较不同多项式拟合阶数的精度

花点时间思考多项式拟合代码在拟合多项式时将具有哪些中间值是很有用的。您的 X 值约为 1.9 * 10^14。如果你取它的 22 次方,你得到数字 2.8 * 10^314。这大于最大可表示的双精度浮点数

有趣的是,即使你的 X^20 项适合浮点数的范围,它似乎仍然受到浮点精度问题的困扰。以下是它适合 20 度多项式的系数:

>>> print(z)
[ 7.01389718e+008 -7.23272471e-006 -6.18262251e-021  1.28113394e-034
  6.59034262e-049 -5.97742101e-066 -1.75197282e-077 -9.00617222e-092
  1.16972981e-106  3.58691274e-120 -9.24694178e-135  0.00000000e+000
  0.00000000e+000  0.00000000e+000  0.00000000e+000  0.00000000e+000
  0.00000000e+000  0.00000000e+000  0.00000000e+000  0.00000000e+000
  0.00000000e+000]

换句话说,它对 X^10 以外的所有事物的拟合系数为零。这是由于浮点精度问题而发生的。

如何解决这个问题?你可以从机器学习从业者那里获得灵感,并标准化你的输入。“标准化”是指减去平均值并除以输入的标准差。由于这是线性变换,因此在数学上等同于没有标准化的拟合,同时避免了溢出问题。当我这样做时,它为 X^22 选择的系数是 -0.11,相当于 -5.3 * 10^-316 没有标准化。如果没有标准化,这个数字就不能表示为浮点数。

这是我使用的代码:

import numpy as np
import matplotlib.pyplot as plt
import numpy.polynomial.polynomial as poly
from sklearn.preprocessing import StandardScaler




y=[9.996651009,10.13327408,10.27003353,10.43754263,10.60523071,10.71136134,10.81753489,10.92299416,11.02853271,11.10093835,11.17349619,11.16565259,11.15787539,11.16191795,11.16626826,11.16087842,11.15561384,11.16940196,11.18346164,11.22434472,11.26537218,11.35981126,11.45427174,11.6102862,11.76633923,11.89058563,12.01486743,12.21133227,12.40782023,12.55682327,12.7059072,12.88910065,13.07234341,13.20139753,13.33048759,13.46960371,13.60875466,13.71956075,13.83042354,13.91157425,13.99277241,14.08548465,14.17824161,14.26091536,14.34364144,14.43504833,14.52651725,14.59548578,14.66449439,14.73751377,14.81058218,14.88687742,14.96322511,15.038278,15.11342222,15.1644832,15.21556914,15.31798775,15.42044285,15.51000031,15.59959896,15.68089263,15.76229354,15.86213633,15.96214484,16.0269976,16.0923806,16.16483146,16.23762336,16.24233076,16.24719576,16.26116812,16.27543692,16.28311024,16.29128992,16.23458771,16.17830522,16.12636211,16.07478778]

y.reverse()


# x axis for polyfit
i = 192.1e12
x=[]
while i <= 196e12:
    x.append(i)
    i += 50e9

plt.plot(x, y)


# x axis for extrapolation
i1= 191.325e12
x2=[]
while i1 <= 196.125e12:

    x2.append(i1)
    i1 += 50e9

x = np.array(x)
x2 = np.array(x2)

scaler = StandardScaler()
scaler.fit(x.reshape(-1, 1))
x_rescaled = scaler.transform(x.reshape(-1, 1))
x2_rescaled = scaler.transform(x2.reshape(-1, 1))

z = poly.polyfit(x_rescaled.reshape(-1), y, 20)
f = poly.polyval(x2_rescaled.reshape(-1), z)

plt.plot(x2, f, linestyle='--')
plt.ylim([7, 20])

20th degree poly

我应该提一下,对 20 次多项式进行外推在您拟合的区域之外可能毫无用处。这种程度的多项式对微小的变化极为敏感。

例如,我添加了 0.01 级的随机噪声,然后重新拟合多项式。我重复了这个过程九次。从视觉上看,您几乎看不到噪声,但噪声对多项式拟合有巨大影响。得到的多项式中有一半指向上,一半指向下。

unreliable polynomials

参见 为什么不鼓励使用高阶多项式进行回归?

评论

0赞 MRR 6/17/2023
感谢您提供这些信息丰富的见解。但我有点好奇为什么外推部分(在我的拟合区域之外)有时会上升,有时下降——为什么我没有得到一致的模式?
0赞 Nick ODell 6/17/2023
@MRR 我添加的随机噪声最终与多项式的高阶元素具有非零相关性。这是一个很小的相关性,有时是负的,有时是正的,但它不是零。因此,通过改变高阶多项式系数,可以使多项式拟合稍微好一些。改变高阶多项式系数在数据中间的影响很小,但在拟合区域之外的影响很大。