提问人:sampeterson 提问时间:12/9/2022 最后编辑:Laurentsampeterson 更新时间:12/11/2022 访问量:69
将包含多个嵌套列表的字典转换为 pandas 数据帧
Convert dictionary containing multiple nested lists to a pandas dataframe
问:
我有一本字典,看起来像这样:
my_dict = {
"end_time": "2022-10-21T20:00:00",
"time_samples": [
"2022-10-21T15:00:00+00:00",
"2022-10-21T16:00:00+00:00",
"2022-10-21T17:00:00+00:00",
"2022-10-21T18:00:00+00:00",
"2022-10-21T19:00:00+00:00",
],
"groupings": [
{
"grouping": "day_of_week",
"groupings": ["day_of_week"],
"grouping_value": "5",
"grouping_values": [5],
"metrics": [
{
"metric_type": "vehicle_rotation",
"units": {
"numerators": [{"unit": "metric_count"}],
"denominators": [
{"unit": "vehicles"},
{"unit": "seconds", "value": 86400.0},
],
},
"values": [
17.533546325878596,
17.19327731092437,
13.44502617801047,
11.5010395010395,
9.649484536082475,
],
}
],
},
{
"grouping": "device_type",
"groupings": ["device_type"],
"grouping_value": "bicycle",
"grouping_values": ["bicycle"],
"metrics": [
{
"metric_type": "vehicle_rotation",
"units": {
"numerators": [{"unit": "metric_count"}],
"denominators": [
{"unit": "vehicles"},
{"unit": "seconds", "value": 86400.0},
],
},
"values": [
2.3736263736263736,
0.5454545454545454,
None,
None,
None,
],
}
],
},
{
"grouping": "device_type",
"groupings": ["device_type"],
"grouping_value": "moped",
"grouping_values": ["moped"],
"metrics": [
{
"metric_type": "vehicle_rotation",
"units": {
"numerators": [{"unit": "metric_count"}],
"denominators": [
{"unit": "vehicles"},
{"unit": "seconds", "value": 86400.0},
],
},
"values": [
19.160377358490564,
18.88888888888889,
14.799076212471132,
12.702640642939151,
10.669714285714285,
],
}
],
},
{
"grouping": "geo_fence",
"groupings": ["geo_fence"],
"grouping_value": "/geo_features/1448150377",
"grouping_values": ["/geo_features/1448150377"],
"metrics": [
{
"metric_type": "vehicle_rotation",
"units": {
"numerators": [{"unit": "metric_count"}],
"denominators": [
{"unit": "vehicles"},
{"unit": "seconds", "value": 86400.0},
],
},
"values": [
17.533546325878596,
17.19327731092437,
13.44502617801047,
11.5010395010395,
9.649484536082475,
],
}
],
},
{
"grouping": "hour_of_day",
"groupings": ["hour_of_day"],
"grouping_value": "17",
"grouping_values": [17],
"metrics": [
{
"metric_type": "vehicle_rotation",
"units": {
"numerators": [{"unit": "metric_count"}],
"denominators": [
{"unit": "vehicles"},
{"unit": "seconds", "value": 86400.0},
],
},
"values": [17.533546325878596, None, None, None, None],
}
],
},
{
"grouping": "hour_of_day",
"groupings": ["hour_of_day"],
"grouping_value": "18",
"grouping_values": [18],
"metrics": [
{
"metric_type": "vehicle_rotation",
"units": {
"numerators": [{"unit": "metric_count"}],
"denominators": [
{"unit": "vehicles"},
{"unit": "seconds", "value": 86400.0},
],
},
"values": [None, 17.19327731092437, None, None, None],
}
],
},
{
"grouping": "hour_of_day",
"groupings": ["hour_of_day"],
"grouping_value": "19",
"grouping_values": [19],
"metrics": [
{
"metric_type": "vehicle_rotation",
"units": {
"numerators": [{"unit": "metric_count"}],
"denominators": [
{"unit": "vehicles"},
{"unit": "seconds", "value": 86400.0},
],
},
"values": [None, None, 13.44502617801047, None, None],
}
],
},
{
"grouping": "hour_of_day",
"groupings": ["hour_of_day"],
"grouping_value": "20",
"grouping_values": [20],
"metrics": [
{
"metric_type": "vehicle_rotation",
"units": {
"numerators": [{"unit": "metric_count"}],
"denominators": [
{"unit": "vehicles"},
{"unit": "seconds", "value": 86400.0},
],
},
"values": [None, None, None, 11.5010395010395, None],
}
],
},
{
"grouping": "hour_of_day",
"groupings": ["hour_of_day"],
"grouping_value": "21",
"grouping_values": [21],
"metrics": [
{
"metric_type": "vehicle_rotation",
"units": {
"numerators": [{"unit": "metric_count"}],
"denominators": [
{"unit": "vehicles"},
{"unit": "seconds", "value": 86400.0},
],
},
"values": [None, None, None, None, 9.649484536082475],
}
],
},
{
"grouping": "no_grouping",
"groupings": ["no_grouping"],
"grouping_value": "all",
"grouping_values": ["all"],
"metrics": [
{
"metric_type": "vehicle_rotation",
"units": {
"numerators": [{"unit": "metric_count"}],
"denominators": [
{"unit": "vehicles"},
{"unit": "seconds", "value": 86400.0},
],
},
"values": [
17.533546325878596,
17.19327731092437,
13.44502617801047,
11.5010395010395,
9.649484536082475,
],
}
],
},
{
"grouping": "provider",
"groupings": ["provider"],
"grouping_value": "/providers/12",
"grouping_values": ["/providers/12"],
"metrics": [
{
"metric_type": "vehicle_rotation",
"units": {
"numerators": [{"unit": "metric_count"}],
"denominators": [
{"unit": "vehicles"},
{"unit": "seconds", "value": 86400.0},
],
},
"values": [
15.849056603773583,
16.383561643835616,
13.254545454545454,
10.914027149321267,
10.232142857142858,
],
}
],
},
{
"grouping": "provider",
"groupings": ["provider"],
"grouping_value": "/providers/180",
"grouping_values": ["/providers/180"],
"metrics": [
{
"metric_type": "vehicle_rotation",
"units": {
"numerators": [{"unit": "metric_count"}],
"denominators": [
{"unit": "vehicles"},
{"unit": "seconds", "value": 86400.0},
],
},
"values": [
2.3736263736263736,
0.5454545454545454,
None,
None,
None,
],
}
],
},
{
"grouping": "provider",
"groupings": ["provider"],
"grouping_value": "/providers/19",
"grouping_values": ["/providers/19"],
"metrics": [
{
"metric_type": "vehicle_rotation",
"units": {
"numerators": [{"unit": "metric_count"}],
"denominators": [
{"unit": "vehicles"},
{"unit": "seconds", "value": 86400.0},
],
},
"values": [
22.471698113207548,
21.464788732394368,
16.394366197183096,
14.545454545454547,
11.1288056206089,
],
}
],
},
],
}
我想将这个字典转换为一个 pandas 数据帧,其中键中的时间戳作为一列,键中的值(按键分组)作为其他列(以及列名),如下所示:time_samples
values
grouping_value
grouping_value
time_samples 5 bicycle ...
2022-10-21T15:00:00+00:00 17.533546325878596 2.3736263736263736 ...
2022-10-21T16:00:00+00:00 17.19327731092437 0.5454545454545454 ...
2022-10-21T17:00:00+00:00 13.44502617801047 None ...
2022-10-21T18:00:00+00:00 11.5010395010395 None ...
2022-10-21T19:00:00+00:00 9.649484536082475 None ...
我是这样开始的,但很难继续。有什么想法吗?
df = pd.json_normalize(response_dict)
df['groupings'].explode().apply(pd.Series)
答:
1赞
Laurent
12/11/2022
#1
以下是使用 Pandas concat 的一种方法:
import pandas as pd
df = pd.concat(
[
pd.DataFrame({"time_samples": my_dict["time_samples"]}),
pd.DataFrame(
{
grouping["grouping_value"]: grouping["metrics"][0]["values"]
for grouping in my_dict["groupings"]
}
),
],
axis=1,
)
然后:
print(df)
# Output
time_samples 5 bicycle moped \
0 2022-10-21T15:00:00+00:00 17.533546 2.373626 19.160377
1 2022-10-21T16:00:00+00:00 17.193277 0.545455 18.888889
2 2022-10-21T17:00:00+00:00 13.445026 NaN 14.799076
3 2022-10-21T18:00:00+00:00 11.501040 NaN 12.702641
4 2022-10-21T19:00:00+00:00 9.649485 NaN 10.669714
/geo_features/1448150377 17 18 19 20 \
0 17.533546 17.533546 NaN NaN NaN
1 17.193277 NaN 17.193277 NaN NaN
2 13.445026 NaN NaN 13.445026 NaN
3 11.501040 NaN NaN NaN 11.50104
4 9.649485 NaN NaN NaN NaN
21 all /providers/12 /providers/180 /providers/19
0 NaN 17.533546 15.849057 2.373626 22.471698
1 NaN 17.193277 16.383562 0.545455 21.464789
2 NaN 13.445026 13.254545 NaN 16.394366
3 NaN 11.501040 10.914027 NaN 14.545455
4 9.649485 9.649485 10.232143 NaN 11.128806
评论
my_dict.update({'time_samples': str(my_dict['time_samples'])})
df = pd.json_normalize(my_dict, record_path='groupings', meta=['end_time', 'time_samples'], max_level=0)