提问人:ProjectJords 提问时间:11/6/2023 更新时间:11/6/2023 访问量:38
连接时出现 Pandas 错误“值长度与索引长度不匹配”
Pandas error "Length of values does not match length of index" when concatenating
问:
我弹出了这个“值的长度与索引的长度不匹配”错误并破坏了我的代码。
这里的代码是遍历字典,拉出参数和路径,发送到 api 以更新路径对象中的信息。
我的代码在第一次迭代后失败,我不知道为什么。
代码如下:
import dydx3 as dy
import pandas as pd
import time
import os
dir = os.getcwd()
client = dy.Client(
host = 'https://api.dydx.exchange'
)
markets_1m = {'BTC-USD':f'{dir}\\markets\\BTC-USD_1MIN.csv',
'ETH-USD':f'{dir}\\markets\\ETH-USD_1MIN.csv',
'LINK-USD':f'{dir}\\markets\\LINK-USD_1MIN.csv',
'SOL-USD':f'{dir}\\markets\\SOL-USD_1MIN.csv',
'ATOM-USD':f'{dir}\\markets\\ATOM-USD_1MIN.csv',
'XMR-USD':f'{dir}\\markets\\XMR-USD_1MIN.csv',
'RUNE-USD':f'{dir}\\markets\\RUNE-USD_1MIN.csv',
}
def get_price_data_1m():
try:
dataframe = pd.DataFrame()
StartedAt = []
UpdatedAt = []
Market = []
Resolution = []
Low = []
High = []
Open = []
Close = []
BaseTokenVolume = []
Trades = []
UsdVolume = []
StartingOpenInterest = []
for ticker, path in markets_1m.items():
candles = client.public.get_candles(
market = ticker,
resolution ='1MIN',
)
for i in candles.data['candles']:
startedAt = i['startedAt']
updatedAt = i['updatedAt']
market = i['market']
resolution = i['resolution']
low = i['low']
high = i['high']
open = i['open']
close = i['close']
baseTokenVolume = i['baseTokenVolume']
trades = i['trades']
usdVolume = i['usdVolume']
startingOpenInterest = i['startingOpenInterest']
StartedAt.append(startedAt)
UpdatedAt.append(updatedAt)
Market.append(market)
Resolution.append(resolution)
Low.append(low)
High.append(high)
Open.append(open)
Close.append(close)
BaseTokenVolume.append(baseTokenVolume)
Trades.append(trades)
UsdVolume.append(usdVolume)
StartingOpenInterest.append(startingOpenInterest)
dataframe['startedAt'] = [value for value in StartedAt]
dataframe['updatedAt'] = [value for value in UpdatedAt]
dataframe['market'] = [value for value in Market]
dataframe['resolution'] = [value for value in Resolution]
dataframe['open'] = [value for value in Low]
dataframe['high'] = [value for value in High]
dataframe['low'] = [value for value in Open]
dataframe['close'] = [value for value in Close]
dataframe['baseTokenVolume'] = [value for value in BaseTokenVolume]
dataframe['trades'] = [value for value in Trades]
dataframe['usdVolume'] = [value for value in UsdVolume]
dataframe['startingOpenInterest'] = [value for value in StartingOpenInterest]
price_history = pd.read_csv(f'{path}') #input csv path
combined = pd.concat([price_history.reset_index(drop=True), dataframe.reset_index(drop=True)], join='outer', ignore_index=True, axis=0)
combined.drop_duplicates(subset=['startedAt'], keep='first', inplace=True)
combined = combined.sort_values(by=['startedAt'])
combined.to_csv(f'{dir}\\markets\\{ticker}_1MIN.csv', index=False)
print("end of iteration")
except Exception as e:
print(f'{e}')
我试图重置两个数据帧中的索引长度并忽略 concat 函数中的索引,但没有奏效。 我还尝试在主循环中使用 try/except 中的 continue 来抑制错误,这也产生了相同的错误。
我应该发生的事情是,该函数应该提取数据,将该数据附加到现有数据帧中,删除任何重复项,按“startedAt”列对数据帧进行排序,然后保存到相同的路径。
当我尝试这样做时,只会正确执行“markets_1m”字典中第一项的函数,然后在其余所有项上失败。
关于为什么它只对第一个有效或为什么会出现错误的任何想法?
谢谢
答:
0赞
ProjectJords
11/6/2023
#1
我想通了。
问题在于,为存储新数据而组装的列表在每次迭代中都没有被清除,因为它们存储在主函数循环之外。
正确的代码将列表存储在主循环下,因此在每次新迭代开始时都会清除它们。
0赞
Parfait
11/6/2023
#2
避免维护许多临时列表的簿记。使用定义的方法和列表/字典推导式进行 API 流程和数据帧构建:
def get_api_data(ticker):
"""
Call API and return select values in list of dicts.
"""
candles = client.public.get_candles(
market = ticker,
resolution ='1MIN',
)
api_data = [
{
"ticker": ticker,
"startedAt": i['startedAt'],
"updatedAt": i['updatedAt'],
"market": i['market'],
"resolution": i['resolution'],
"low": i['low'],
"high": i['high'],
"open": i['open'],
"close": i['close'],
"baseTokenVolume": i['baseTokenVolume'],
"trades": i['trades'],
"usdVolume": i['usdVolume'],
"startingOpenInterest": i['startingOpenInterest']
}
for i in candles.data['candles']
]
return api_data
def build_price_data_1m_frames():
"""
Build and return dict of data frames.
Export each ticker historical and current data to CSVs.
"""
try:
dydx_dfs = {
ticker: pd.concat(
[
pd.read_csv(path).assign(ticker = ticker),
pd.DataFrame(get_api_data(ticker))
],
ignore_index=True
).drop_duplicates(
subset=['startedAt'], keep='first'
).sort_values(
by=['startedAt']
).reset_index(drop=True)
for ticker, path in markets_1m.items()
}
for tkr, df in dydx.times():
df.to_csv(os.path.join(dir, "markets", tkr, "_1MIN.csv"))
except Exception as e:
print(e)
return None
return dydx_dfs
评论