在 pandas 中搜索多个浮点列,并在最匹配值的新列中返回结果

search multiple float columns in pandas and return the result in a new column of the value that is the closest match

提问人:Paul Corcoran 提问时间:9/30/2022 更新时间:10/2/2022 访问量:164

问:

import pandas as pd
import requests
from bs4 import BeautifulSoup
import re

#default scrap time frame is 12 hours and anytime date range.

url = 'https://www.betexplorer.com/odds-movements/soccer/'
soup = BeautifulSoup(requests.get(url).content)
matches = soup.find_all("td", class_="table-main__tt")
best_odds = soup.find_all("td",class_ ="bestbet-odd")
BestOddsBookie = soup.find_all("td",class_ ="bestbet-logo")

data = []
for m in soup.select('#odds-movements tr:has(.table-main__tt)'):
    data.append({
        'match':m.a.text,
        'time':m.span.text,
        'odds':[o.get('data-odd') for o in m.select('a[data-odd]')],
        'bestOddsBookie':[m.get('title') for m in BestOddsBookie]
    })

best_odds = [m.get('data-odd') for m in best_odds]
bookieTitles = [m.get('title') for m in BestOddsBookie]

urls = []
for match in matches:
    link = 'https://www.betexplorer.com' + match.a.get('href')
    urls.append(link)

data = pd.DataFrame(data,columns=['match','time','odds','BestOdds','BestOddsBookie'])
data['BestOdds'] = best_odds
data['BestOdds'] = data['BestOdds'].astype(float)
data['BestOddsBookie'] = bookieTitles
data['odds'] = data['odds'].apply(lambda x: list(pd.unique(x)))
data['HomeOdds'] = data.odds.apply(lambda x: x[0]).astype(float)
data['DrawOdds'] = data.odds.apply(lambda x: x[1]).astype(float)
data['AwayOdds'] = data.odds.apply(lambda x: x[2]).astype(float)
data['url'] = urls
data['Home%'] = 1/data['HomeOdds']
data['Draw%'] = 1/data['DrawOdds']
data['Away%'] = 1/data['AwayOdds']
data['BestOdds%'] = 1/data['BestOdds']
#data['BestOddsDifference%'] = data['BestOdds'] -
data = data[['match','time','HomeOdds','DrawOdds','AwayOdds','BestOdds','Home%','Draw%','Away%','BestOdds%','BestOddsBookie','url']]

上面的代码包含主/平局/客队的百分比值。我想创建一个新的列 data['ClosestMatch'],它使用 BestOdds% 列搜索主场/平局/客场% 获胜列的每一行和子行(绝对)。

结果应该是最接近匹配的值,我已经搜索了很多答案,但似乎没有一个适用于这个 pandas 数据帧。从本质上讲,它是一个类似于 excel 中的 vlookup 函数。

蟒蛇 熊猫 numpy

评论


答:

1赞 Laurent 10/2/2022 #1

使用您提供的数据帧:

import pandas as pd

df = pd.DataFrame({'match': ['Juventus W - Pomigliano W', 'Brusaporto - Desenzano', 'Fiorentina U19 - Verona U19', 'Yangon Utd - Rakhine United', 'West Brom - Luton', 'Eintracht Frankfurt W - Werder Bremen W', 'Gondomar - Penafiel', 'Brea - Mutilvera', 'Teplice B - Jablonec B', 'Zalaegerszegi II - Tatabanya', 'Altach U21 - Wolfurt', 'Csikszereda M. Ciuc - Metaloglobus Bucharest', 'Antoniana - Sportivo Las Parejas', 'HSL Derthona - Gozzano', 'Debrecen II - Fuzesgyarmati SK', 'Hradec Kralove B - Kolin', 'Molde 2 - Floro', 'Leviatan - Montaneses', 'Jagiellonia - Korona Kielce', 'Pero Pinheiro - Ferreiras', 'Samorin - Zilina B', 'Minaj - Dyn. Kyiv', 'EG El Palmar CF - La Union', 'Dak Lak - Long An', 'FUS Rabat - Maghreb Fez', 'Aubstadt - Bayern II', 'Xerez CD - Pozoblanco'], 'time': ['12:30', '15:00', '13:00', '11:00', '16:00', '13:01', '12:00', '16:30', '10:00', '11:00', '15:30', '10:00', '00:00', '15:00', '11:00', '10:15', '14:00', '19:00', '15:00', '16:00', '10:30', '12:00', '17:30', '10:30', '17:00', '14:00', '12:00'], 'HomeOdds': [1.03, 4.84, 1.63, 1.04, 1.87, 1.11, 9.33, 2.84, 2.26, 5.18, 1.37, 1.53, 1.39, 2.07, 3.01, 1.81, 2.1, 7.09, 1.79, 1.74, 1.38, 11.39, 3.27, 3.57, 2.16, 2.5, 1.92], 'DrawOdds': [12.44, 3.65, 3.91, 9.72, 3.53, 7.73, 5.15, 2.92, 3.58, 4.18, 5.18, 3.75, 3.88, 3.24, 3.67, 3.58, 4.38, 4.76, 3.64, 3.36, 4.48, 5.33, 3.16, 3.09, 2.76, 3.44, 3.13], 'AwayOdds': [29.44, 1.58, 4.26, 30.0, 4.03, 15.1, 1.27, 2.43, 2.44, 1.51, 5.36, 5.34, 7.07, 3.07, 1.97, 3.3, 2.42, 1.31, 4.21, 4.47, 6.11, 1.23, 2.0, 2.01, 3.56, 2.45, 3.66], 'BestOdds': [46.0, 1.6, 4.75, 41.0, 1.91, 16.29, 5.5, 3.0, 3.0, 1.62, 1.41, 5.8, 1.48, 3.4, 2.1, 2.1, 2.2, 8.0, 4.55, 4.76, 7.0, 13.58, 3.6, 3.8, 4.01, 2.53, 3.85], 'Home%': [0.970873786407767, 0.2066115702479339, 0.6134969325153374, 0.9615384615384615, 0.53475935828877, 0.9009009009009008, 0.10718113612004287, 0.35211267605633806, 0.4424778761061947, 0.19305019305019305, 0.7299270072992701, 0.6535947712418301, 0.7194244604316548, 0.48309178743961356, 0.33222591362126247, 0.5524861878453039, 0.47619047619047616, 0.14104372355430184, 0.5586592178770949, 0.5747126436781609, 0.7246376811594204, 0.08779631255487269, 0.3058103975535168, 0.2801120448179272, 0.4629629629629629, 0.4, 0.5208333333333334], 'Draw%': [0.08038585209003216, 0.273972602739726, 0.2557544757033248, 0.10288065843621398, 0.28328611898017, 0.129366106080207, 0.1941747572815534, 0.3424657534246575, 0.27932960893854747, 0.23923444976076558, 0.19305019305019305, 0.26666666666666666, 0.2577319587628866, 0.30864197530864196, 0.2724795640326976, 0.27932960893854747, 0.22831050228310504, 0.21008403361344538, 0.2747252747252747, 0.2976190476190476, 0.2232142857142857, 0.18761726078799248, 0.3164556962025316, 0.3236245954692557, 0.3623188405797102, 0.29069767441860467, 0.3194888178913738], 'Away%': [0.033967391304347824, 0.6329113924050632, 0.2347417840375587, 0.03333333333333333, 0.24813895781637715, 0.06622516556291391, 0.7874015748031495, 0.4115226337448559, 0.4098360655737705, 0.6622516556291391, 0.18656716417910446, 0.18726591760299627, 0.14144271570014144, 0.32573289902280134, 0.5076142131979695, 0.30303030303030304, 0.4132231404958678, 0.7633587786259541, 0.23752969121140144, 0.2237136465324385, 0.1636661211129296, 0.8130081300813008, 0.5, 0.49751243781094534, 0.2808988764044944, 0.4081632653061224, 0.27322404371584696], 'BestOdds%': [0.021739130434782608, 0.625, 0.21052631578947367, 0.024390243902439025, 0.5235602094240838, 0.061387354205033766, 0.18181818181818182, 0.3333333333333333, 0.3333333333333333, 0.6172839506172839, 0.7092198581560284, 0.1724137931034483, 0.6756756756756757, 0.29411764705882354, 0.47619047619047616, 0.47619047619047616, 0.45454545454545453, 0.125, 0.21978021978021978, 0.21008403361344538, 0.14285714285714285, 0.07363770250368189, 0.2777777777777778, 0.2631578947368421, 0.24937655860349128, 0.3952569169960475, 0.2597402597402597], 'BestOddsBookie': ['Betfair', 'Interwetten', 'BetVictor', 'Betfair', 'William Hill', 'Pinnacle', 'ComeOn', 'Unibet', 'Betfair', 'Betfair', '10Bet', 'Unibet', 'Unibet', 'Betway', 'Unibet', 'William Hill', 'BetVictor', 'Betfair', 'Unibet', 'Pinnacle', 'Unibet', 'Pinnacle', 'Betfair', 'Betfair', 'Pinnacle', 'Pinnacle', 'ComeOn'], 'url': ['https://www.betexplorer.com/soccer/italy/serie-a-women/juventus-pomigliano/QJ5QYNdk/', 'https://www.betexplorer.com/soccer/italy/serie-d-group-b/brusaporto-desenzano-calvina/6Nfh5QLd/', 'https://www.betexplorer.com/soccer/italy/primavera-1/fiorentina-verona/21tMntMN/', 'https://www.betexplorer.com/soccer/myanmar/national-league/yangon-united-rakhine-united/AJFE9PuB/', 'https://www.betexplorer.com/soccer/england/championship/west-brom-luton/nLI4lK3H/', 'https://www.betexplorer.com/soccer/germany/bundesliga-women/eintracht-frankfurt-werder-bremen/Y1W1OFSK/', 'https://www.betexplorer.com/soccer/portugal/taca-de-portugal/gondomar-penafiel/bFxjOfB0/', 'https://www.betexplorer.com/soccer/spain/segunda-rfef-group-2/brea-mutilvera/dKDnjACl/', 'https://www.betexplorer.com/soccer/czech-republic/cfl-group-b/teplice-jablonec/63SO9WFN/', 'https://www.betexplorer.com/soccer/hungary/nb-iii-west/zalaegerszegi-tatabanya/ttRVat7A/', 'https://www.betexplorer.com/soccer/austria/regionalliga-west-vorarlberg/altach-fc-wolfurt/KvKyrn7K/', 'https://www.betexplorer.com/soccer/romania/liga-2/miercurea-ciuc-metaloglobus-bucharest/KdD5FXNc/', 'https://www.betexplorer.com/soccer/argentina/torneo-federal/antoniana-sportivo-las-parejas/EVSCz12F/', 'https://www.betexplorer.com/soccer/italy/serie-d-group-a/hsl-derthona-gozzano/KYG0ofJO/', 'https://www.betexplorer.com/soccer/hungary/nb-iii-east/debrecen-fuzesgyarmati/KCPlF2RB/', 'https://www.betexplorer.com/soccer/czech-republic/cfl-group-b/hradec-kralove-kolin/jy4e4Awo/', 'https://www.betexplorer.com/soccer/norway/division-3-group-2/molde-floro/67hGnReE/', 'https://www.betexplorer.com/soccer/mexico/liga-premier-serie-a/leviatan-montaneses/z179xiMm/', 'https://www.betexplorer.com/soccer/poland/ekstraklasa/jagiellonia-korona-kielce/GhviXwuP/', 'https://www.betexplorer.com/soccer/portugal/taca-de-portugal/pero-pinheiro-ferreiras/4jnly26n/', 'https://www.betexplorer.com/soccer/slovakia/2-liga/samorin-zilina/UytRix9L/', 'https://www.betexplorer.com/soccer/ukraine/premier-league/minaj-dynamo-kyiv/voA6ahaQ/', 'https://www.betexplorer.com/soccer/spain/tercera-rfef-group-13/e-g-el-palmar-la-union/vkKKPPvP/', 'https://www.betexplorer.com/soccer/vietnam/v-league-2/dak-lak-long-an/WtrbcKZ9/', 'https://www.betexplorer.com/soccer/morocco/botola-pro/fus-rabat-maghreb-fez/KOPF7CuB/', 'https://www.betexplorer.com/soccer/germany/regionalliga-bayern/tsv-aubstadt-bayern/WUuFtZx2/', 'https://www.betexplorer.com/soccer/spain/tercera-rfef-group-10/xerez-cd-pozoblanco/vqiNtz7A/']})

这里有一种方法可以做到这一点:

# Shorter version of the dataframe (optional step)
df = df[["match", "Home%", "Draw%", "Away%", "BestOdds%"]]

# Add new column with the closest match
df["ClosestMatch"] = df.apply(
    lambda x: min(
        abs(x["Home%"] - x["BestOdds%"]),
        abs(x["Draw%"] - x["BestOdds%"]),
        abs(x["Away%"] - x["BestOdds%"]),
    ),
    axis=1,
)
print(df)
# Output
             match     Home%     Draw%     Away%  BestOdds%  ClosestMatch
0   Juventus W ...  0.970874  0.080386  0.033967   0.021739      0.012228
1   Brusaporto ...  0.206612  0.273973  0.632911   0.625000      0.007911
2   Fiorentina ...  0.613497  0.255754  0.234742   0.210526      0.024215
3   Yangon Utd ...  0.961538  0.102881  0.033333   0.024390      0.008943
4   West Brom -...  0.534759  0.283286  0.248139   0.523560      0.011199
5   Eintracht F...  0.900901  0.129366  0.066225   0.061387      0.004838
6   Gondomar - ...  0.107181  0.194175  0.787402   0.181818      0.012357
7   Brea - Muti...  0.352113  0.342466  0.411523   0.333333      0.009132
8   Teplice B -...  0.442478  0.279330  0.409836   0.333333      0.054004
9   Zalaegersze...  0.193050  0.239234  0.662252   0.617284      0.044968
10  Altach U21 ...  0.729927  0.193050  0.186567   0.709220      0.020707
11  Csikszereda...  0.653595  0.266667  0.187266   0.172414      0.014852
12  Antoniana -...  0.719424  0.257732  0.141443   0.675676      0.043749
13  HSL Derthon...  0.483092  0.308642  0.325733   0.294118      0.014524
14  Debrecen II...  0.332226  0.272480  0.507614   0.476190      0.031424
15  Hradec Kral...  0.552486  0.279330  0.303030   0.476190      0.076296
16  Molde 2 - F...  0.476190  0.228311  0.413223   0.454545      0.021645
17  Leviatan - ...  0.141044  0.210084  0.763359   0.125000      0.016044
18  Jagiellonia...  0.558659  0.274725  0.237530   0.219780      0.017749
19  Pero Pinhei...  0.574713  0.297619  0.223714   0.210084      0.013630
20  Samorin - Z...  0.724638  0.223214  0.163666   0.142857      0.020809
21  Minaj - Dyn...  0.087796  0.187617  0.813008   0.073638      0.014159
22  EG El Palma...  0.305810  0.316456  0.500000   0.277778      0.028033
23  Dak Lak - L...  0.280112  0.323625  0.497512   0.263158      0.016954
24  FUS Rabat -...  0.462963  0.362319  0.280899   0.249377      0.031522
25  Aubstadt - ...  0.400000  0.290698  0.408163   0.395257      0.004743
26  Xerez CD - ...  0.520833  0.319489  0.273224   0.259740      0.013484