提问人:Paul Corcoran 提问时间:9/30/2022 更新时间:10/2/2022 访问量:164
在 pandas 中搜索多个浮点列,并在最匹配值的新列中返回结果
search multiple float columns in pandas and return the result in a new column of the value that is the closest match
问:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import re
#default scrap time frame is 12 hours and anytime date range.
url = 'https://www.betexplorer.com/odds-movements/soccer/'
soup = BeautifulSoup(requests.get(url).content)
matches = soup.find_all("td", class_="table-main__tt")
best_odds = soup.find_all("td",class_ ="bestbet-odd")
BestOddsBookie = soup.find_all("td",class_ ="bestbet-logo")
data = []
for m in soup.select('#odds-movements tr:has(.table-main__tt)'):
data.append({
'match':m.a.text,
'time':m.span.text,
'odds':[o.get('data-odd') for o in m.select('a[data-odd]')],
'bestOddsBookie':[m.get('title') for m in BestOddsBookie]
})
best_odds = [m.get('data-odd') for m in best_odds]
bookieTitles = [m.get('title') for m in BestOddsBookie]
urls = []
for match in matches:
link = 'https://www.betexplorer.com' + match.a.get('href')
urls.append(link)
data = pd.DataFrame(data,columns=['match','time','odds','BestOdds','BestOddsBookie'])
data['BestOdds'] = best_odds
data['BestOdds'] = data['BestOdds'].astype(float)
data['BestOddsBookie'] = bookieTitles
data['odds'] = data['odds'].apply(lambda x: list(pd.unique(x)))
data['HomeOdds'] = data.odds.apply(lambda x: x[0]).astype(float)
data['DrawOdds'] = data.odds.apply(lambda x: x[1]).astype(float)
data['AwayOdds'] = data.odds.apply(lambda x: x[2]).astype(float)
data['url'] = urls
data['Home%'] = 1/data['HomeOdds']
data['Draw%'] = 1/data['DrawOdds']
data['Away%'] = 1/data['AwayOdds']
data['BestOdds%'] = 1/data['BestOdds']
#data['BestOddsDifference%'] = data['BestOdds'] -
data = data[['match','time','HomeOdds','DrawOdds','AwayOdds','BestOdds','Home%','Draw%','Away%','BestOdds%','BestOddsBookie','url']]
上面的代码包含主/平局/客队的百分比值。我想创建一个新的列 data['ClosestMatch'],它使用 BestOdds% 列搜索主场/平局/客场% 获胜列的每一行和子行(绝对)。
结果应该是最接近匹配的值,我已经搜索了很多答案,但似乎没有一个适用于这个 pandas 数据帧。从本质上讲,它是一个类似于 excel 中的 vlookup 函数。
答:
1赞
Laurent
10/2/2022
#1
使用您提供的数据帧:
import pandas as pd
df = pd.DataFrame({'match': ['Juventus W - Pomigliano W', 'Brusaporto - Desenzano', 'Fiorentina U19 - Verona U19', 'Yangon Utd - Rakhine United', 'West Brom - Luton', 'Eintracht Frankfurt W - Werder Bremen W', 'Gondomar - Penafiel', 'Brea - Mutilvera', 'Teplice B - Jablonec B', 'Zalaegerszegi II - Tatabanya', 'Altach U21 - Wolfurt', 'Csikszereda M. Ciuc - Metaloglobus Bucharest', 'Antoniana - Sportivo Las Parejas', 'HSL Derthona - Gozzano', 'Debrecen II - Fuzesgyarmati SK', 'Hradec Kralove B - Kolin', 'Molde 2 - Floro', 'Leviatan - Montaneses', 'Jagiellonia - Korona Kielce', 'Pero Pinheiro - Ferreiras', 'Samorin - Zilina B', 'Minaj - Dyn. Kyiv', 'EG El Palmar CF - La Union', 'Dak Lak - Long An', 'FUS Rabat - Maghreb Fez', 'Aubstadt - Bayern II', 'Xerez CD - Pozoblanco'], 'time': ['12:30', '15:00', '13:00', '11:00', '16:00', '13:01', '12:00', '16:30', '10:00', '11:00', '15:30', '10:00', '00:00', '15:00', '11:00', '10:15', '14:00', '19:00', '15:00', '16:00', '10:30', '12:00', '17:30', '10:30', '17:00', '14:00', '12:00'], 'HomeOdds': [1.03, 4.84, 1.63, 1.04, 1.87, 1.11, 9.33, 2.84, 2.26, 5.18, 1.37, 1.53, 1.39, 2.07, 3.01, 1.81, 2.1, 7.09, 1.79, 1.74, 1.38, 11.39, 3.27, 3.57, 2.16, 2.5, 1.92], 'DrawOdds': [12.44, 3.65, 3.91, 9.72, 3.53, 7.73, 5.15, 2.92, 3.58, 4.18, 5.18, 3.75, 3.88, 3.24, 3.67, 3.58, 4.38, 4.76, 3.64, 3.36, 4.48, 5.33, 3.16, 3.09, 2.76, 3.44, 3.13], 'AwayOdds': [29.44, 1.58, 4.26, 30.0, 4.03, 15.1, 1.27, 2.43, 2.44, 1.51, 5.36, 5.34, 7.07, 3.07, 1.97, 3.3, 2.42, 1.31, 4.21, 4.47, 6.11, 1.23, 2.0, 2.01, 3.56, 2.45, 3.66], 'BestOdds': [46.0, 1.6, 4.75, 41.0, 1.91, 16.29, 5.5, 3.0, 3.0, 1.62, 1.41, 5.8, 1.48, 3.4, 2.1, 2.1, 2.2, 8.0, 4.55, 4.76, 7.0, 13.58, 3.6, 3.8, 4.01, 2.53, 3.85], 'Home%': [0.970873786407767, 0.2066115702479339, 0.6134969325153374, 0.9615384615384615, 0.53475935828877, 0.9009009009009008, 0.10718113612004287, 0.35211267605633806, 0.4424778761061947, 0.19305019305019305, 0.7299270072992701, 0.6535947712418301, 0.7194244604316548, 0.48309178743961356, 0.33222591362126247, 0.5524861878453039, 0.47619047619047616, 0.14104372355430184, 0.5586592178770949, 0.5747126436781609, 0.7246376811594204, 0.08779631255487269, 0.3058103975535168, 0.2801120448179272, 0.4629629629629629, 0.4, 0.5208333333333334], 'Draw%': [0.08038585209003216, 0.273972602739726, 0.2557544757033248, 0.10288065843621398, 0.28328611898017, 0.129366106080207, 0.1941747572815534, 0.3424657534246575, 0.27932960893854747, 0.23923444976076558, 0.19305019305019305, 0.26666666666666666, 0.2577319587628866, 0.30864197530864196, 0.2724795640326976, 0.27932960893854747, 0.22831050228310504, 0.21008403361344538, 0.2747252747252747, 0.2976190476190476, 0.2232142857142857, 0.18761726078799248, 0.3164556962025316, 0.3236245954692557, 0.3623188405797102, 0.29069767441860467, 0.3194888178913738], 'Away%': [0.033967391304347824, 0.6329113924050632, 0.2347417840375587, 0.03333333333333333, 0.24813895781637715, 0.06622516556291391, 0.7874015748031495, 0.4115226337448559, 0.4098360655737705, 0.6622516556291391, 0.18656716417910446, 0.18726591760299627, 0.14144271570014144, 0.32573289902280134, 0.5076142131979695, 0.30303030303030304, 0.4132231404958678, 0.7633587786259541, 0.23752969121140144, 0.2237136465324385, 0.1636661211129296, 0.8130081300813008, 0.5, 0.49751243781094534, 0.2808988764044944, 0.4081632653061224, 0.27322404371584696], 'BestOdds%': [0.021739130434782608, 0.625, 0.21052631578947367, 0.024390243902439025, 0.5235602094240838, 0.061387354205033766, 0.18181818181818182, 0.3333333333333333, 0.3333333333333333, 0.6172839506172839, 0.7092198581560284, 0.1724137931034483, 0.6756756756756757, 0.29411764705882354, 0.47619047619047616, 0.47619047619047616, 0.45454545454545453, 0.125, 0.21978021978021978, 0.21008403361344538, 0.14285714285714285, 0.07363770250368189, 0.2777777777777778, 0.2631578947368421, 0.24937655860349128, 0.3952569169960475, 0.2597402597402597], 'BestOddsBookie': ['Betfair', 'Interwetten', 'BetVictor', 'Betfair', 'William Hill', 'Pinnacle', 'ComeOn', 'Unibet', 'Betfair', 'Betfair', '10Bet', 'Unibet', 'Unibet', 'Betway', 'Unibet', 'William Hill', 'BetVictor', 'Betfair', 'Unibet', 'Pinnacle', 'Unibet', 'Pinnacle', 'Betfair', 'Betfair', 'Pinnacle', 'Pinnacle', 'ComeOn'], 'url': ['https://www.betexplorer.com/soccer/italy/serie-a-women/juventus-pomigliano/QJ5QYNdk/', 'https://www.betexplorer.com/soccer/italy/serie-d-group-b/brusaporto-desenzano-calvina/6Nfh5QLd/', 'https://www.betexplorer.com/soccer/italy/primavera-1/fiorentina-verona/21tMntMN/', 'https://www.betexplorer.com/soccer/myanmar/national-league/yangon-united-rakhine-united/AJFE9PuB/', 'https://www.betexplorer.com/soccer/england/championship/west-brom-luton/nLI4lK3H/', 'https://www.betexplorer.com/soccer/germany/bundesliga-women/eintracht-frankfurt-werder-bremen/Y1W1OFSK/', 'https://www.betexplorer.com/soccer/portugal/taca-de-portugal/gondomar-penafiel/bFxjOfB0/', 'https://www.betexplorer.com/soccer/spain/segunda-rfef-group-2/brea-mutilvera/dKDnjACl/', 'https://www.betexplorer.com/soccer/czech-republic/cfl-group-b/teplice-jablonec/63SO9WFN/', 'https://www.betexplorer.com/soccer/hungary/nb-iii-west/zalaegerszegi-tatabanya/ttRVat7A/', 'https://www.betexplorer.com/soccer/austria/regionalliga-west-vorarlberg/altach-fc-wolfurt/KvKyrn7K/', 'https://www.betexplorer.com/soccer/romania/liga-2/miercurea-ciuc-metaloglobus-bucharest/KdD5FXNc/', 'https://www.betexplorer.com/soccer/argentina/torneo-federal/antoniana-sportivo-las-parejas/EVSCz12F/', 'https://www.betexplorer.com/soccer/italy/serie-d-group-a/hsl-derthona-gozzano/KYG0ofJO/', 'https://www.betexplorer.com/soccer/hungary/nb-iii-east/debrecen-fuzesgyarmati/KCPlF2RB/', 'https://www.betexplorer.com/soccer/czech-republic/cfl-group-b/hradec-kralove-kolin/jy4e4Awo/', 'https://www.betexplorer.com/soccer/norway/division-3-group-2/molde-floro/67hGnReE/', 'https://www.betexplorer.com/soccer/mexico/liga-premier-serie-a/leviatan-montaneses/z179xiMm/', 'https://www.betexplorer.com/soccer/poland/ekstraklasa/jagiellonia-korona-kielce/GhviXwuP/', 'https://www.betexplorer.com/soccer/portugal/taca-de-portugal/pero-pinheiro-ferreiras/4jnly26n/', 'https://www.betexplorer.com/soccer/slovakia/2-liga/samorin-zilina/UytRix9L/', 'https://www.betexplorer.com/soccer/ukraine/premier-league/minaj-dynamo-kyiv/voA6ahaQ/', 'https://www.betexplorer.com/soccer/spain/tercera-rfef-group-13/e-g-el-palmar-la-union/vkKKPPvP/', 'https://www.betexplorer.com/soccer/vietnam/v-league-2/dak-lak-long-an/WtrbcKZ9/', 'https://www.betexplorer.com/soccer/morocco/botola-pro/fus-rabat-maghreb-fez/KOPF7CuB/', 'https://www.betexplorer.com/soccer/germany/regionalliga-bayern/tsv-aubstadt-bayern/WUuFtZx2/', 'https://www.betexplorer.com/soccer/spain/tercera-rfef-group-10/xerez-cd-pozoblanco/vqiNtz7A/']})
这里有一种方法可以做到这一点:
# Shorter version of the dataframe (optional step)
df = df[["match", "Home%", "Draw%", "Away%", "BestOdds%"]]
# Add new column with the closest match
df["ClosestMatch"] = df.apply(
lambda x: min(
abs(x["Home%"] - x["BestOdds%"]),
abs(x["Draw%"] - x["BestOdds%"]),
abs(x["Away%"] - x["BestOdds%"]),
),
axis=1,
)
print(df)
# Output
match Home% Draw% Away% BestOdds% ClosestMatch
0 Juventus W ... 0.970874 0.080386 0.033967 0.021739 0.012228
1 Brusaporto ... 0.206612 0.273973 0.632911 0.625000 0.007911
2 Fiorentina ... 0.613497 0.255754 0.234742 0.210526 0.024215
3 Yangon Utd ... 0.961538 0.102881 0.033333 0.024390 0.008943
4 West Brom -... 0.534759 0.283286 0.248139 0.523560 0.011199
5 Eintracht F... 0.900901 0.129366 0.066225 0.061387 0.004838
6 Gondomar - ... 0.107181 0.194175 0.787402 0.181818 0.012357
7 Brea - Muti... 0.352113 0.342466 0.411523 0.333333 0.009132
8 Teplice B -... 0.442478 0.279330 0.409836 0.333333 0.054004
9 Zalaegersze... 0.193050 0.239234 0.662252 0.617284 0.044968
10 Altach U21 ... 0.729927 0.193050 0.186567 0.709220 0.020707
11 Csikszereda... 0.653595 0.266667 0.187266 0.172414 0.014852
12 Antoniana -... 0.719424 0.257732 0.141443 0.675676 0.043749
13 HSL Derthon... 0.483092 0.308642 0.325733 0.294118 0.014524
14 Debrecen II... 0.332226 0.272480 0.507614 0.476190 0.031424
15 Hradec Kral... 0.552486 0.279330 0.303030 0.476190 0.076296
16 Molde 2 - F... 0.476190 0.228311 0.413223 0.454545 0.021645
17 Leviatan - ... 0.141044 0.210084 0.763359 0.125000 0.016044
18 Jagiellonia... 0.558659 0.274725 0.237530 0.219780 0.017749
19 Pero Pinhei... 0.574713 0.297619 0.223714 0.210084 0.013630
20 Samorin - Z... 0.724638 0.223214 0.163666 0.142857 0.020809
21 Minaj - Dyn... 0.087796 0.187617 0.813008 0.073638 0.014159
22 EG El Palma... 0.305810 0.316456 0.500000 0.277778 0.028033
23 Dak Lak - L... 0.280112 0.323625 0.497512 0.263158 0.016954
24 FUS Rabat -... 0.462963 0.362319 0.280899 0.249377 0.031522
25 Aubstadt - ... 0.400000 0.290698 0.408163 0.395257 0.004743
26 Xerez CD - ... 0.520833 0.319489 0.273224 0.259740 0.013484
评论