提问人:Cary H 提问时间:11/17/2023 最后编辑:Cary H 更新时间:11/17/2023 访问量:53
如何在列表中查找相似的数字序列并返回索引偏移量
How to find a similar sequence of numbers in a list and return the index offset
问:
给定 2 个相似但不相同的列表,我如何找到一个列表与另一个列表匹配的偏移量。它们对单列重叠的黑白图像的像素值进行像素化处理,因此它们的值将接近但不精确。我根据它们的值将数字分为 3 个“箱”(0,1,2)。我正在使用 Python。在下面的列表中,列表 A 在列表 B 之前 44 个位置(像素)开始。索引 44 之后的数字“接近”但不准确。预期输出为 44。
列表
A [1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
B [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
初始代码
import matplotlib.pyplot as plt
import numpy as np
A = [1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
B = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
def plot_this(aa, bb, idx, rating ):
xs = [x for x in range(len(aa))]
fig, ax = plt.subplots(figsize=(8, 4), layout='constrained')
plt.plot(xs, aa, label ="A")
plt.plot(xs, bb, label ="B")
title = str(idx) + " = " + str(rating)
ax.set_title(title) # Add a title to the axes.
ax.legend()
plt.show()
x = np.linspace(0, 2 * np.pi, 200)
y = np.sin(x)
fig, ax = plt.subplots()
ax.plot(x, y)
plt.show()
A1 = A.copy()
B1 = B.copy()
score = 0
bad = 0
score_dict = {}
for i in range(len(A1)):
for j in range(len(A1)):
if A1[j] == B1[j]:
score += 1
else:
bad += 1
if bad >= score:
score_dict[i] = 0
else:
rating = (score-bad)/len(A)
score_dict[i] = rating
if rating > 0.51:
plot_this(A1, B1, i, rating)
score = 0
bad = 0
A1.pop(0)
B1.pop(-1)
for kk, vv in score_dict.items():
print(f"index {kk} Score {vv}")
更新的代码
import matplotlib.pyplot as plt
import numpy as np
A = [1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
B = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
def plot_this(aa, bb, idx, rating ):
xs = [x for x in range(len(aa))]
fig, ax = plt.subplots(figsize=(8, 4), layout='constrained')
plt.plot(xs, aa, label ="A")
plt.plot(xs, bb, label ="B")
title = str(idx) + " = " + str(rating)
ax.set_title(title) # Add a title to the axes.
ax.legend()
plt.show()
A1 = A.copy()
B1 = B.copy()
score_dict = {}
for i in range(int(2*len(A1)/3)): #Only go 2/3 way
coeff = np.corrcoef(A1, B1)
ranking = coeff[0,1]
score_dict[i] = coeff[0,1]
if ranking > 0.9:
plot_this(A1, B1, i, ranking)
A1.pop(0) #pop off both ends instead of using numpy.roll()
B1.pop(-1) #Use numpy.roll() and go all the way around if you don't know which way to go.
for kk, vv in score_dict.items():
print(f"index {kk} Score {vv}")
high_score = max(score_dict, key=score_dict.get)
print(high_score)
答:
2赞
Cornelia
11/17/2023
#1
使用 numpy 可以吗?然后,我建议看一下如何移动第一个数组并计算数组之间的相关性。如果相关系数接近 1,则两个数组之间有很好的一致性。numpy.roll()
numpy.corrcoef()
评论
0赞
Cary H
11/17/2023
我现在会检查一下。Numpy 会很棒。
1赞
Cary H
11/17/2023
我使用 numpy.corrcoef() 来关联 2 个数组,效果很好。我没有使用 numpy.roll(),因为我不希望结束数据滚动到数组的前面。取而代之的是,我弹出了数组“A”的开头和数组“B”的结尾,以沿着索引移动数组。
1赞
Cornelia
11/17/2023
好的,我明白了。你甚至不必弹出它。只需使用 A[i:] 和 B[:-i]。
1赞
Cary H
11/17/2023
我将在更大的实现中使用 numpy.roll(),因为我不知道以哪种方式对齐数组,我将一直“滚动”。谢谢!
评论