C++ and Python give different results

Dmitry · August 11, 2021, 10:14am

I’m trying to create python module on C++ with using the model, which works well in Python. But in C++ it gives bad result on the same machine.

Python code:

lib = tvm.runtime.load_module("lib/deploy_lib.tar.so")
dev = tvm.opencl()
m = graph_executor.GraphModule(lib["default"](dev))
m.set_input('input', tvm.nd.array(a))
m.run()
masks = m.get_output(0).numpy()
best = (masks[0]*-1).argsort(axis=0)[:6, :, :]
mask = best[0]

C++ code:

class Segmentator  
{  
        private:  
            std::vector<int64_t> input_shape;  
            std::vector<int64_t> output_shape;  

            DLDevice dev;
            tvm::runtime::Module module;
            tvm::runtime::Module gmod;
            tvm::runtime::PackedFunc _set_input;
            tvm::runtime::PackedFunc _get_output;
            tvm::runtime::PackedFunc _run;

            tvm::runtime::NDArray input;
            tvm::runtime::NDArray output;
            boost::python::numpy::ndarray output_np;
        public:
            Segmentator(std::string filename, boost::python::list input_shape,
                        boost::python::list output_shape, 
                        const boost::python::numpy::ndarray& dummy_output) :
            output_np(dummy_output)
            {
              this->input_shape = std::vector<int64_t>(4);
              this->output_shape = std::vector<int64_t>(4);

              for (int i = 0; i< 4; i++)
              {
                this->input_shape[i] = int64_t(boost::python::extract<int64_t>(input_shape[i]));
                this->output_shape[i] = int64_t(boost::python::extract<int64_t>(output_shape[i]));
              }
              // load in the library
              dev = DLDevice{kDLOpenCL, 0};
              module = tvm::runtime::Module::LoadFromFile(filename);
              // create the graph executor module
              gmod = module.GetFunction("default")(dev);
              _set_input = gmod.GetFunction("set_input");
              _get_output = gmod.GetFunction("get_output");
              _run = gmod.GetFunction("run");

              input = tvm::runtime::NDArray::Empty(this->input_shape, 
                                                    DLDataType{kDLFloat, 32, 1}, dev);

              output = tvm::runtime::NDArray::Empty(this->output_shape, 
                                                    DLDataType{kDLFloat, 32, 1}, dev);
            };
            void set_input(boost::python::numpy::ndarray input_)
            {
                                  this->input.CopyFromBytes(input_.get_data(),
                                  this->input_shape[0]*this->input_shape[1]*
                                  this->input_shape[2]*this->input_shape[3]*4);
                                  _set_input("input", this->input);
            };

            void run()
            {
              _run();
            };

            boost::python::numpy::ndarray get_output()
            { 
              _get_output(0, output);
              

              output.CopyToBytes(output_np.get_data(), output_shape[0]*
                                                       output_shape[1]*
                                                       output_shape[2]*
                                                       output_shape[3]*
                                                       4);
              return output_np;
            };
    };

And Python code which uses it:

import segmentator
...

segmentator = segmentator.Segmentator('./lib/deploy_lib.tar.so', [1, 3, 224, 224], [1, 264, 224, 224], b)
segmentator.set_input(a)
segmentator.run()
masks = segmentator.get_output()
best = (masks[0]*-1).argsort(axis=0)[:6, :, :]
    mask = best[0]

I checked input_ in set_input. It’s the same in python and C++. So it’s not an issue with conversion of numpy.ndarray to boost::numpy::ndarray.

Dmitry · August 11, 2021, 11:09am

I was wrong a little. If I check on new array like this:

boost::python::numpy::ndarray test_np = boost::python::numpy::zeros(boost::python::make_tuple(this->input_shape[0],
                                                                            this->input_shape[1],
                                                                            this->input_shape[2],
                                                                            this->input_shape[3]),
                                                  boost::python::numpy::dtype::get_builtin<float>());


this->input.CopyToBytes(test_np.get_data(),
                              this->input_shape[0]*this->input_shape[1]*
                              this->input_shape[2]*this->input_shape[3]*4);
std::cerr<<boost::python::extract<char const *>(boost::python::str(test_np))<<std::endl;

It is not equal to input. But I still don’t know what did I do wrong.

Dmitry · August 11, 2021, 11:55am

TVMSynchronize(kDLOpenCL, 0, NULL);
Doesn’t work for me too.

Dmitry · August 11, 2021, 1:16pm

The problem was in data format of multidimensional np.ndarray and boost::python::numpy::ndarray. Running Same code with ‘a.reshape((-1))’ instead of just ‘a’ solved problem.