从 C++ 代码初始化 python 函数需要很长时间-解网

问：

我有一个 C++ 脚本，我在其中加载一个 python 文件来执行一个函数来评估图神经网络。执行此函数以评估事件循环中的数据，因此会针对每个事件执行脚本。我有一个问题，初始化需要很长时间。当我第一次想在 python 代码中评估模型时，需要 ~8 秒，如果我立即再次执行，只需要 0.005 秒，这很棒。我能做些什么来加快此初始化速度，因为我需要遍历数百万个事件：\

这是 C++ 代码

//arachne
#include "ArachneWupperFlow.hh"
#include "pic.h"

//std/sys
#include <iostream>
#include <stdio.h>
#include <sys/stat.h>

//python
#define PY_SSIZE_T_CLEAN
#include <Python.h>

//root
#include "TSystem.h"

ArachneWupperFlow::ArachneWupperFlow()
{
  runWupperFlow = false; //flag to run WupperFlow
  runMode = "DNN"; //Possible run-modes are DNN or GNN
  modelFilepath = ""; //path to ONNX model
  num_inputs = -999; //number of input variables

  nnoutbins = 100; //number of nnout-bins
  nnout = -999;    //nnout variable
  nvar = -1;       //nvar     
  GNNnvar = -1;    //number of elements in the GNNarray per event
  num_classes = 1; 

  name = "";
  train_tree_name = "";
}

ArachneWupperFlow::~ArachneWupperFlow()
{

}

void ArachneWupperFlow::setUpWupperFlow()
{
  if (runMode == "GNN") {
    std::cout<<"Running in mode GNN. Preparing everything for interference of the GNN model."<<std::endl;

    //Print Logo
    print_WupperFlow("GNN Evaluator");

    //Initialize the python objects
    PyObject *pName, *pModule;//, *pFunc;
    PyObject *pArgs, *pValue;
    PyObject* GNNList = PyList_New(0);

    //Initialize the interpreter
    Py_Initialize();

    //Load the python module
    pName = PyUnicode_DecodeFSDefault("scripts.python.WupperFlowEvaluator");

    //Import the function from the python module
    pModule = PyImport_Import(pName);
    Py_DECREF(pName);

    if (pModule != NULL) {
      //Obtain the function from the python module
      //pFunc is a new reference
      pFunc = PyObject_GetAttrString(pModule, "main");

      if (pFunc && PyCallable_Check(pFunc)) {
        //Retrieve the arguments to be passed to the function
        pArgs = PyTuple_New(2);

        //Convert the C++ std::vector with the GNN input-vars to a python list
        int size = GNNnvar;
        if (size == -1) {
          std::cout<<"Please provide the number of elements in the GNNarray with the GNNnvar parameter!"<<std::endl;
        } else {
          //Fill the python list with the GNN input-vars
          for (int i=0; i<size; i++) {
            PyList_Append(GNNList, PyFloat_FromDouble(2.0));
          }
        }
        //Fill the arguments for the python function, GNN input-list and event number
        PyTuple_SetItem(pArgs, 0, GNNList);
        PyTuple_SetItem(pArgs, 1, PyLong_FromLong(345654323));

        //Call the function and get the output
        pValue = PyObject_CallObject(pFunc, pArgs);
        Py_DECREF(pArgs);
        Py_DECREF(GNNList);
        if (pValue != NULL) {
          //Get the output score of the GNN interference
          nnout = PyFloat_AsDouble(pValue);
          Py_DECREF(pValue);
        } else {
          Py_DECREF(pFunc);
          Py_DECREF(pModule);
          PyErr_Print();
          fprintf(stderr,"Call failed\n");
        }
      }
      else {
        if (PyErr_Occurred()) {
          PyErr_Print();
        }
        fprintf(stderr, "Cannot find function \"%s\"\n", "main");
      }
      // Py_XDECREF(pFunc);
      Py_DECREF(pModule);
    } else {
      PyErr_Print();
      fprintf(stderr, "Failed to load \"%s\"\n", "scripts.python.WupperFlowEvaluator");
    }



  }
}

double ArachneWupperFlow::evaluate(float* inputarray, ULong_t event_number)
{
  if (runMode == "DNN") {
    
  } else if (runMode == "GNN") {
    PyObject *pArgs, *pValue;
    PyObject* GNNList = PyList_New(0);
    // Retrieve the arguments to be passed to the function
    pArgs = PyTuple_New(2);

    //Convert the C++ std::vector with the GNN input-vars to a python list
    int size = GNNnvar;
    if (size == -1) {
      std::cout<<"Please provide the number of elements in the GNNarray with the GNNnvar parameter!"<<std::endl;
    } else {
      //Fill the python list with the GNN input-vars
      for (int i=0; i<size; i++) {
        PyList_Append(GNNList, PyFloat_FromDouble(inputarray[i]));
      }
    }
    //Fill the arguments for the python function, GNN input-list and event number
    PyTuple_SetItem(pArgs, 0, GNNList);
    PyTuple_SetItem(pArgs, 1, PyLong_FromLong(event_number));

    //Call the function and get the output
    pValue = PyObject_CallObject(pFunc, pArgs);
    Py_DECREF(pArgs);
    Py_DECREF(GNNList);
    if (pValue != NULL) {
      //Get the output score of the GNN interference
      nnout = PyFloat_AsDouble(pValue);
      Py_DECREF(pValue);
    } else {
      // Py_DECREF(pFunc);
      // Py_DECREF(pModule);
      PyErr_Print();
      fprintf(stderr,"Call failed\n");
    }

    
  }

  //Return the output score
  std::cout<<nnout<<std::endl;
  return nnout;
}

这是 python 部分：

import os
import numpy as np
import pandas as pd
import sonnet as snt
import tensorflow as tf
from graph_nets import utils_tf
from graph_nets import graphs
from graph_nets import modules


def calc_dphi_array(phi1,phi2):
  ...

def make_graph(event):
  ...

class MyMLP(snt.Module):
  @tf.function()
  def __init__(self,latent_size,num_layers,dropout,activation):
    super(MyMLP, self).__init__(name=None)
    self.mlp = snt.nets.MLP([latent_size] * num_layers, activate_final=True, dropout_rate=dropout, w_init = None, b_init = None, activation = activation)
    self.ln = snt.LayerNorm(axis=-1, create_scale=True, create_offset=False)
    self.use_dropout = (dropout != 0)

  @tf.function()
  def __call__(self, inputs):
    if self.use_dropout:
      outputs = self.mlp(inputs, is_training=False)
    else:
      outputs = self.mlp(inputs)
    outputs = self.ln(outputs)
    return outputs

class OutputMLP(snt.Module):
  @tf.function()
  def __init__(self, global_output_size = 1, latent_size=64, dropout=0.05, activation=tf.nn.leaky_relu):
    super(OutputMLP, self).__init__(name=None)
    self.mlp = snt.nets.MLP([latent_size, global_output_size],
                            name='global_output', dropout_rate = dropout, w_init = None, b_init = None, activation = activation)
    self.use_dropout = (dropout != 0)

  @tf.function()
  def __call__(self, inputs):
    if self.use_dropout:
      outputs = self.mlp(inputs, is_training=False)
    else:
      outputs = self.mlp(inputs)
    outputs = tf.sigmoid(outputs)
    return outputs
 
def make_mlp_model(latent_size=64,num_layers=4,dropout=0.05,activation=tf.nn.leaky_relu):
  return MyMLP(latent_size,num_layers,dropout,activation)

class MLPGraphIndependent(snt.Module):
  """GraphIndependent with MLP edge, node, and global models."""
  @tf.function()
  def __init__(self):
    super(MLPGraphIndependent, self).__init__(name="MLPGraphIndependent")
    self._network = modules.GraphIndependent(
      edge_model_fn=make_mlp_model,
      node_model_fn=make_mlp_model,
      global_model_fn=make_mlp_model)

  @tf.function()
  def __call__(self, inputs):
    return self._network(inputs)

class OutputTransform(snt.Module):
  @tf.function()
  def __init__(self):
    super(OutputTransform, self).__init__(name="OutputTransform")
    self._network = modules.GraphIndependent(
      edge_model_fn = None,
      node_model_fn = None,
      global_model_fn = OutputMLP)
    
  @tf.function()
  def __call__(self, inputs):
    return self._network(inputs)

class MLPGraphNetwork(snt.Module):
  """GraphIndependent with MLP edge, node, and global models."""
  @tf.function()
  def __init__(self):
    super(MLPGraphNetwork, self).__init__(name="MLPGraphNetwork")
    self._network = modules.GraphNetwork(
      edge_model_fn=make_mlp_model,
      node_model_fn=make_mlp_model,
      global_model_fn=make_mlp_model)

  @tf.function()
  def __call__(self, inputs):
    return self._network(inputs)

class MLPAttentionNetwork(snt.Module):
  """SelfAttention with MLP edge, node, and global models."""
  @tf.function()
  def __init__(self):
    super(MLPAttentionNetwork, self).__init__(name="MLPAttentionNetwork")
    self._attn = modules.SelfAttention()

  @tf.function()
  def __call__(self, inputs):
    nodes = inputs.nodes
    return self._attn(nodes,nodes,nodes,inputs)
  
class GeneralClassifier(snt.Module):
    @tf.function()
    def __init__(self):
        super(GeneralClassifier, self).__init__(name="GeneralClassifier")
        self._encoder = MLPGraphIndependent()
        self._core    = MLPGraphNetwork()
        self._decoder = MLPGraphIndependent()
        # Transforms the outputs into appropriate shapes.
        self._output_transform = OutputTransform()

    @tf.function()
    def __call__(self, input_op, num_processing_steps):
      latent = self._encoder(input_op)
      latent0 = latent
      
      output_ops = []
      for _ in range(num_processing_steps):
        core_input = utils_tf.concat([latent0, latent], axis=1)
        latent = self._core(core_input)
        
      decoded_op = self._decoder(latent)
      output_ops.append(self._output_transform(decoded_op))
      return output_ops

class AttentionClassifier(snt.Module):
    @tf.function()
    def __init__(self):
        super(AttentionClassifier, self).__init__(name="AttentionClassifier")          
        self._encoder = MLPGraphIndependent()
        self._attn    = MLPAttentionNetwork()
        self._core    = MLPGraphNetwork()
        self._decoder = MLPGraphIndependent()
        # Transforms the outputs into appropriate shapes.
        self._output_transform = OutputTransform()

    @tf.function()
    def __call__(self, input_op, num_processing_steps):
      latent = self._encoder(input_op)
      latent0 = latent
      
      output_ops = []
      for _ in range(num_processing_steps):
        core_input = utils_tf.concat([latent0, latent], axis=1)
        latent = self._core( self._attn(core_input))
        
      decoded_op = self._decoder(latent)
      output_ops.append(self._output_transform(decoded_op))
      return output_ops


"""
Main executable for evaluating WupperFlow GNNs in the Arachne event-loop
""" 
def main(input_list, event_number):
  ...

      import time
      start = time.time()
      output = model(input_graphs_ntuple, nprocsteps)
      end = time.time()
      print("Eval time: "+str(end-start))
      start = time.time()
      output = model(input_graphs_ntuple, nprocsteps)
      end = time.time()
      print("Eval time: "+str(end-start))

  return output[0][4].numpy()[0][0]

我省略了没有问题的部分，主要问题是调用时间太长，因为初始化需要一些时间。我在这里调用了两次，因为我注意到这种方式第二次执行要快得多。但最后我只想叫一次。output = model(input_graphs_ntuple, nprocsteps)

我已经尝试了对tf代码的一些优化，但这还不够有帮助，主要问题是初始化。

python tensorflow 初始化 python-c-api graph-neural-network

1赞 roganjosh 11/8/2023

一种方法是有一个中间人脚本，它基本上是一个已经加载了导入的服务器。我不确定是否有办法从每次都冷加载的脚本中克服这种开销，但这是希望！

0赞 Lukas Kretschmann 11/8/2023

@roganjosh 你有什么参考（链接？）或例子给我吗？我也已经考虑过这个问题，但我真的不知道该怎么做。如果您有一些参考或示例，如果您能提供更多信息，那就太好了！非常感谢您的建议:)

0赞 roganjosh 11/8/2023

您可以使用服务器轻松完成此操作，但您需要序列化/反序列化步骤才能在两者之间生成 JSON 有效负载。除非你有大型数据，否则与你的 8 秒相比，通过 localhost 传递它应该是最小的开销。模型本身（只要它是静态的）可以在应用启动时全局加载flask

答： 暂无答案

上一个：神经网络权重初始化相互妥协

下一个：相同情况下的不同 Uncaught ReferenceError

从 C++ 代码初始化 python 函数需要很长时间

Initialization of a python function from C++ code takes too long

评论