提问人:TY00 提问时间:11/9/2023 更新时间:11/9/2023 访问量:58
如何在 Python 中检查每行代码的执行时间和数据量
How to check the execution time and amount of data for each line of code in Python
问:
我正在尝试在 FaaS 上以最佳方式拆分 Python 代码以缩短响应时间。
为了在最佳位置拆分代码,我需要每行的执行时间和每行所依赖的数据大小。 有没有合适的方法可以获得这两个?
我使用的环境是,
- Azure 函数
- Vscode的
- Python 编程模型 v2
顺便说一句,可能没有必要拆分它们,但我将尝试将下面代码中的活动函数拆分为多个函数。
import azure.functions as func
import azure.durable_functions as df
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.datasets import fetch_california_housing # Dataset
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error # MSE(Mean Squared Error)
from sklearn.preprocessing import StandardScaler
app = df.DFApp(http_auth_level=func.AuthLevel.ANONYMOUS)
### client function ###
@app.route(route="orchestrators/client_function")
@app.durable_client_input(client_name="client")
async def client_function(req: func.HttpRequest, client: df.DurableOrchestrationClient) -> func.HttpResponse:
instance_id = await client.start_new("orchestrator", None, {})
await client.wait_for_completion_or_create_check_status_response(req, instance_id)
return client.create_check_status_response(req, instance_id)
### orchestrator function ###
@app.orchestration_trigger(context_name="context")
def orchestrator(context: df.DurableOrchestrationContext) -> str:
result = yield context.call_activity("origin_analysis", '')
return "finished"
### activity function ###
@app.blob_output(arg_name="outputblob", path="newblob/test.txt", connection="BlobStorageConnection")
@app.activity_trigger(input_name="blank")
def origin_analysis(blank: str, outputblob: func.Out[str]):
# prepare data
california_housing = fetch_california_housing()
exp_data = pd.DataFrame(california_housing.data, columns=california_housing.feature_names)
tar_data = pd.DataFrame(california_housing.target, columns=['HousingPrices'])
data = pd.concat([exp_data, tar_data], axis=1)
# Delete anomalous values
data = data[data['HouseAge'] != 52]
data = data[data['HousingPrices'] != 5.00001]
# Create useful variables
data['Household'] = data['Population']/data['AveOccup']
data['AllRooms'] = data['AveRooms']*data['Household']
data['AllBedrms'] = data['AveBedrms']*data['Household']
### simple regression analysis ###
exp_var = 'MedInc'
tar_var = 'HousingPrices'
# Remove outliers
q_95 = data['MedInc'].quantile(0.95)
data = data[data['MedInc'] < q_95]
data = data[data['MedInc'] < q_95]
# Split data into explanatory and objective variables
X = data[[exp_var]]
y = data[[tar_var]]
# learn
model = LinearRegression()
model.fit(X, y)
### multiple regression analysis ###
exp_vars = ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']
tar_var = 'HousingPrices'
# Remove outliers
for exp_var in exp_vars:
q_95 = data[exp_var].quantile(0.95)
data = data[data[exp_var] < q_95]
# Split data into explanatory and objective variables
X = data[exp_vars]
y = data[[tar_var]]
# Split into training and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
# Standardize X_train
scaler = StandardScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_train_scaled = pd.DataFrame(X_train_scaled, columns = exp_vars)
# learn
model = LinearRegression()
model.fit(X_train_scaled, y_train)
# Calculate predicted values
y_pred = model.predict(X_train_scaled)
y_pred[:10]
# MSE for test data
X_test_scaled = scaler.transform(X_test) # Test data standardized by mean and standard deviation obtained from training data
y_test_pred = model.predict(X_test_scaled) # Predicting against test data
mse_test = mean_squared_error(y_test, y_test_pred)
# Ridge regression
ridge = Ridge(alpha=1.0)
ridge.fit(X_train_scaled, y_train)
ridge_y_pred = ridge.predict(X_train_scaled)
# Checking Partial Regression Coefficients
ridge_w = pd.DataFrame(ridge.coef_.T, index=exp_vars, columns=['Ridge'])
for xi, wi in zip(exp_vars, ridge.coef_[0]):
print('{0:7s}: {1:6.3f}'.format(xi, wi))
# Mean Squared Error (MSE) for training data
mse_train = mean_squared_error(y_train, y_pred)
# Mean Squared Error (MSE) for training data
ridge_mse_train = mean_squared_error(y_train, ridge_y_pred)
# MSE for test data
ridge_y_test_pred = ridge.predict(X_test_scaled) # Predicting against test data
ridge_mse_test = mean_squared_error(y_test, ridge_y_test_pred)
# Lasso regression
lasso = Lasso(alpha=1.0)
lasso.fit(X_train_scaled, y_train)
lasso_y_pred = lasso.predict(X_train_scaled)
# Checking Partial Regression Coefficients
lasso_w = pd.Series(index=exp_vars, data=lasso.coef_)
lasso_mse_train = mean_squared_error(y_train, lasso_y_pred)
lasso_X_test_scaled = scaler.transform(X_test)
lasso_y_pred_test = lasso.predict(lasso_X_test_scaled)
lasso_mse_test = mean_squared_error(y_test, lasso_y_pred_test)
# Comparison of the accuracy of multiple regression analysis with and without regularization
data = {'Training data MSE':[mse_train, ridge_mse_train, lasso_mse_train],
'Test Data MSE':[mse_test, ridge_mse_test, lasso_mse_test],
'coefficient of determination':[model.score(X_test_scaled, y_test), ridge.score(X_test_scaled, y_test), lasso.score(X_test_scaled, y_test)]}
df_mse = pd.DataFrame(data=data, index=['multiple regression', 'Ridge regression', 'Lasso regression'])
return str(df_mse)
答:
1赞
SiddheshDesai
12/4/2023
#1
您可以使用 time 模块来检查代码中代码块的执行时间,我已经导入了 time 模块并对您的代码进行了如下更改:-
我的function_app.py
import azure.functions as func
import azure.durable_functions as df
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.datasets import fetch_california_housing # Dataset
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error # MSE(Mean Squared Error)
from sklearn.preprocessing import StandardScaler
import sys
import cProfile
import time
app = df.DFApp(http_auth_level=func.AuthLevel.ANONYMOUS)
### client function ###
@app.route(route="orchestrators/client_function")
@app.durable_client_input(client_name="client")
async def client_function(req: func.HttpRequest, client: df.DurableOrchestrationClient) -> func.HttpResponse:
instance_id = await client.start_new("orchestrator", None, {})
await client.wait_for_completion_or_create_check_status_response(req, instance_id)
return client.create_check_status_response(req, instance_id)
### orchestrator function ###
@app.orchestration_trigger(context_name="context")
def orchestrator(context: df.DurableOrchestrationContext) -> str:
result = yield context.call_activity("origin_analysis", '')
return "finished"
### activity function ###
@app.blob_output(arg_name="outputblob", path="newblob/test.txt", connection="BlobStorageConnection")
@app.activity_trigger(input_name="blank")
def origin_analysis(blank: str, outputblob: func.Out[str]):
start_time = time.time()
# prepare data
california_housing = fetch_california_housing()
exp_data = pd.DataFrame(california_housing.data, columns=california_housing.feature_names)
tar_data = pd.DataFrame(california_housing.target, columns=['HousingPrices'])
data = pd.concat([exp_data, tar_data], axis=1)
# Delete anomalous values
data = data[data['HouseAge'] != 52]
data = data[data['HousingPrices'] != 5.00001]
# Create useful variables
data['Household'] = data['Population']/data['AveOccup']
data['AllRooms'] = data['AveRooms']*data['Household']
data['AllBedrms'] = data['AveBedrms']*data['Household']
### simple regression analysis ###
exp_var = 'MedInc'
tar_var = 'HousingPrices'
# Remove outliers
q_95 = data['MedInc'].quantile(0.95)
data = data[data['MedInc'] < q_95]
data = data[data['MedInc'] < q_95]
# Split data into explanatory and objective variables
X = data[[exp_var]]
y = data[[tar_var]]
# learn
model = LinearRegression()
model.fit(X, y)
### multiple regression analysis ###
exp_vars = ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']
tar_var = 'HousingPrices'
# Remove outliers
for exp_var in exp_vars:
q_95 = data[exp_var].quantile(0.95)
data = data[data[exp_var] < q_95]
# Split data into explanatory and objective variables
X = data[exp_vars]
y = data[[tar_var]]
# Split into training and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
# Standardize X_train
scaler = StandardScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_train_scaled = pd.DataFrame(X_train_scaled, columns = exp_vars)
# learn
model = LinearRegression()
model.fit(X_train_scaled, y_train)
# Calculate predicted values
y_pred = model.predict(X_train_scaled)
y_pred[:10]
# MSE for test data
X_test_scaled = scaler.transform(X_test) # Test data standardized by mean and standard deviation obtained from training data
y_test_pred = model.predict(X_test_scaled) # Predicting against test data
mse_test = mean_squared_error(y_test, y_test_pred)
# Ridge regression
ridge = Ridge(alpha=1.0)
ridge.fit(X_train_scaled, y_train)
ridge_y_pred = ridge.predict(X_train_scaled)
# Checking Partial Regression Coefficients
ridge_w = pd.DataFrame(ridge.coef_.T, index=exp_vars, columns=['Ridge'])
for xi, wi in zip(exp_vars, ridge.coef_[0]):
print('{0:7s}: {1:6.3f}'.format(xi, wi))
# Mean Squared Error (MSE) for training data
mse_train = mean_squared_error(y_train, y_pred)
# Mean Squared Error (MSE) for training data
ridge_mse_train = mean_squared_error(y_train, ridge_y_pred)
# MSE for test data
ridge_y_test_pred = ridge.predict(X_test_scaled) # Predicting against test data
ridge_mse_test = mean_squared_error(y_test, ridge_y_test_pred)
# Lasso regression
lasso = Lasso(alpha=1.0)
lasso.fit(X_train_scaled, y_train)
lasso_y_pred = lasso.predict(X_train_scaled)
# Checking Partial Regression Coefficients
lasso_w = pd.Series(index=exp_vars, data=lasso.coef_)
lasso_mse_train = mean_squared_error(y_train, lasso_y_pred)
lasso_X_test_scaled = scaler.transform(X_test)
lasso_y_pred_test = lasso.predict(lasso_X_test_scaled)
lasso_mse_test = mean_squared_error(y_test, lasso_y_pred_test)
# Comparison of the accuracy of multiple regression analysis with and without regularization
data = {'Training data MSE':[mse_train, ridge_mse_train, lasso_mse_train],
'Test Data MSE':[mse_test, ridge_mse_test, lasso_mse_test],
'coefficient of determination':[model.score(X_test_scaled, y_test), ridge.score(X_test_scaled, y_test), lasso.score(X_test_scaled, y_test)]}
df_mse = pd.DataFrame(data=data, index=['multiple regression', 'Ridge regression', 'Lasso regression'])
end_time = time.time()
execution_time = end_time - start_time
# Measure the size of 'data' here
data_size = sys.getsizeof(data)
print(f"Execution time: {execution_time} seconds")
print(f"Size of 'data': {data_size} bytes")
return str(df_mse)
# test = cProfile.run(origin_analysis)
# print(test)
输出:-
您还可以使用 cProfile 来获取每个代码块的执行时间,以下是使用 cProfile 的function_app.py
代码:-
function_app.py:-
import azure.functions as func
import azure.durable_functions as df
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.datasets import fetch_california_housing # Dataset
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error # MSE(Mean Squared Error)
from sklearn.preprocessing import StandardScaler
import sys
import cProfile
import time
app = df.DFApp(http_auth_level=func.AuthLevel.ANONYMOUS)
### client function ###
@app.route(route="orchestrators/client_function")
@app.durable_client_input(client_name="client")
async def client_function(req: func.HttpRequest, client: df.DurableOrchestrationClient) -> func.HttpResponse:
instance_id = await client.start_new("orchestrator", None, {})
await client.wait_for_completion_or_create_check_status_response(req, instance_id)
return client.create_check_status_response(req, instance_id)
### orchestrator function ###
@app.orchestration_trigger(context_name="context")
def orchestrator(context: df.DurableOrchestrationContext) -> str:
result = yield context.call_activity("origin_analysis", '')
return "finished"
### activity function ###
@app.blob_output(arg_name="outputblob", path="newblob/test.txt", connection="BlobStorageConnection")
@app.activity_trigger(input_name="blank")
def origin_analysis(blank: str, outputblob: func.Out[str]):
start_time = time.time()
# prepare data
california_housing = fetch_california_housing()
exp_data = pd.DataFrame(california_housing.data, columns=california_housing.feature_names)
tar_data = pd.DataFrame(california_housing.target, columns=['HousingPrices'])
data = pd.concat([exp_data, tar_data], axis=1)
# Delete anomalous values
data = data[data['HouseAge'] != 52]
data = data[data['HousingPrices'] != 5.00001]
# Create useful variables
data['Household'] = data['Population']/data['AveOccup']
data['AllRooms'] = data['AveRooms']*data['Household']
data['AllBedrms'] = data['AveBedrms']*data['Household']
### simple regression analysis ###
exp_var = 'MedInc'
tar_var = 'HousingPrices'
# Remove outliers
q_95 = data['MedInc'].quantile(0.95)
data = data[data['MedInc'] < q_95]
data = data[data['MedInc'] < q_95]
# Split data into explanatory and objective variables
X = data[[exp_var]]
y = data[[tar_var]]
# learn
model = LinearRegression()
model.fit(X, y)
### multiple regression analysis ###
exp_vars = ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']
tar_var = 'HousingPrices'
# Remove outliers
for exp_var in exp_vars:
q_95 = data[exp_var].quantile(0.95)
data = data[data[exp_var] < q_95]
# Split data into explanatory and objective variables
X = data[exp_vars]
y = data[[tar_var]]
# Split into training and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
# Standardize X_train
scaler = StandardScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_train_scaled = pd.DataFrame(X_train_scaled, columns = exp_vars)
# learn
model = LinearRegression()
model.fit(X_train_scaled, y_train)
# Calculate predicted values
y_pred = model.predict(X_train_scaled)
y_pred[:10]
# MSE for test data
X_test_scaled = scaler.transform(X_test) # Test data standardized by mean and standard deviation obtained from training data
y_test_pred = model.predict(X_test_scaled) # Predicting against test data
mse_test = mean_squared_error(y_test, y_test_pred)
# Ridge regression
ridge = Ridge(alpha=1.0)
ridge.fit(X_train_scaled, y_train)
ridge_y_pred = ridge.predict(X_train_scaled)
# Checking Partial Regression Coefficients
ridge_w = pd.DataFrame(ridge.coef_.T, index=exp_vars, columns=['Ridge'])
for xi, wi in zip(exp_vars, ridge.coef_[0]):
print('{0:7s}: {1:6.3f}'.format(xi, wi))
# Mean Squared Error (MSE) for training data
mse_train = mean_squared_error(y_train, y_pred)
# Mean Squared Error (MSE) for training data
ridge_mse_train = mean_squared_error(y_train, ridge_y_pred)
# MSE for test data
ridge_y_test_pred = ridge.predict(X_test_scaled) # Predicting against test data
ridge_mse_test = mean_squared_error(y_test, ridge_y_test_pred)
# Lasso regression
lasso = Lasso(alpha=1.0)
lasso.fit(X_train_scaled, y_train)
lasso_y_pred = lasso.predict(X_train_scaled)
# Checking Partial Regression Coefficients
lasso_w = pd.Series(index=exp_vars, data=lasso.coef_)
lasso_mse_train = mean_squared_error(y_train, lasso_y_pred)
lasso_X_test_scaled = scaler.transform(X_test)
lasso_y_pred_test = lasso.predict(lasso_X_test_scaled)
lasso_mse_test = mean_squared_error(y_test, lasso_y_pred_test)
profiler = cProfile.Profile()
profiler.enable()
# Comparison of the accuracy of multiple regression analysis with and without regularization
data = {'Training data MSE':[mse_train, ridge_mse_train, lasso_mse_train],
'Test Data MSE':[mse_test, ridge_mse_test, lasso_mse_test],
'coefficient of determination':[model.score(X_test_scaled, y_test), ridge.score(X_test_scaled, y_test), lasso.score(X_test_scaled, y_test)]}
df_mse = pd.DataFrame(data=data, index=['multiple regression', 'Ridge regression', 'Lasso regression'])
profiler.disable()
profiler.print_stats(sort='cumtime')
end_time = time.time()
execution_time = end_time - start_time
# Measure the size of 'data' here
data_size = sys.getsizeof(data)
print(f"Execution time: {execution_time} seconds")
print(f"Size of 'data': {data_size} bytes")
return str(df_mse)
输出:-
评论