提问人:lostwanderer 提问时间:1/15/2022 更新时间:1/15/2022 访问量:471
DataFrame 中的 Python 数字格式
Python Number Formatting in DataFrame
问:
我正在整理一个 p 值表(在下面的代码中var_sig),但我无法让数字以小数点后 4 位显示。我尝试了映射函数,但它返回了错误消息“未知格式代码'f'类型为'str'类型的对象。然后我尝试将 pval 包装在 float() 中,但这也返回了一个错误。如何修复以下代码?
insdf = pd.DataFrame({'n':[500,1200,100,400,500,300],
'c':[42, 37, 1, 101, 73, 14],
'car':['small','medium','large','small','medium','large'],
'age':[1,1,1,2,2,2]})
insdf['n_log']=np.log(insdf['n'])
ratingvar = ['car','age']
def glm_freq(df, family, ratingvar, exposure, response, offset):
h2o.no_progress()
hf = h2o.H2OFrame(df)
# first convert to categorical variables
print('***Rebasing levels***')
for r in ratingvar:
hf[r] = hf[r].asfactor()
# then re-basing on level with most exposure
rebase = pd.pivot_table(df,
values=[exposure],
index=[r],
aggfunc=np.sum).sort_values((exposure), ascending=False).index.to_list()
rebase_list = [str(x) for x in rebase]
hf[r] = hf[r].set_levels(levels=rebase_list)
print('***Fitting GLM***')
glm_model = H2OGeneralizedLinearEstimator(family= family,
lambda_ = 0,
compute_p_values = True,
offset_column = offset
)
glm_model.train(ratingvar, response, training_frame= hf)
print('***Printing model diagnostics***')
print('Model AIC is: ', glm_model.aic())
print('Model Residual Deviance is: ', glm_model.residual_deviance())
llf = glm_model.scoring_history()['negative_log_likelihood'].iloc[-1]
print('Log Likelihood is: ', llf)
print('***Calculating Type III Statistics***')
var_sig=pd.DataFrame(columns=['ratingvar','pval'])
for p in ratingvar:
remainder_pred = ratingvar.copy()
remainder_pred.remove(p)
glm_model_tmp = H2OGeneralizedLinearEstimator(family= family,
lambda_ = 0,
compute_p_values = True,
offset_column = offset
)
glm_model_tmp.train(remainder_pred, response, training_frame = hf)
llvar = glm_model_tmp.scoring_history()['negative_log_likelihood'].iloc[-1]
d_freedom = df[p].unique().shape[0] - 1
print('Degree of freedom of ', str(p), ' is ', d_freedom)
chisqstat = 2 * (llvar - llf)
print('Log Likelihood of model without ', str(p), ' is ', llvar)
print('Chi-sq stat of ', str(p), ' is ', chisqstat)
pval = 1-stats.chi2.cdf(chisqstat, d_freedom)
var_sig = var_sig.append({'ratingvar':p,'pval':pval}, ignore_index=True)
var_sig['pval'] = var_sig['pval'].map('{:.4f}'.format)
return var_sig
glm_freq(insdf, 'poisson', ratingvar, 'n', 'c', 'n_log')
答: 暂无答案
评论