TVMError: ValueError, While Quantizing the tensorflow model with tvm

shalu · August 19, 2020, 11:36am

Hi,

While quantizing a tensorflow model (with and without data ware ) in tvm , i am getting this error

File "/home/quest/Downloads/tvm2/python/tvm/relay/quantize/_partition.py", line 137, in mul_partition_generic
    raise ValueError
TVMError: ValueError

i am using tf 2.3 with tvm latest version Below is the code used:

model_path ='/home/quest/facenet-Task/facenet/src/VGG_face2_model_inference/freeze_facenet.pb'



   
def prewhiten(x):
     
     mean = np.mean(x)
     std = np.std(x)
     std_adj = np.maximum(std, 1.0/np.sqrt(x.size))
     y = np.multiply(np.subtract(x, mean), 1/std_adj)
     return y          
  
    
import tensorflow as tf
try:
    tf_compat_v1 = tf.compat.v1
except ImportError:
    tf_compat_v1 = tf           

#tflite_model_file =  '/home/quest/facenet-Task/facenet/src/VGG_face2_model_inference/my_facenet.tflite'

#tflite_model_buf = open(tflite_model_file, "rb").read()



        
count2=0
local_demo = True
# Get TFLite model from buffer
'''try:
    import tflite
    tflite_model = tflite.Model.GetRootAsModel(tflite_model_buf, 0)
    print(tflite_model)
except AttributeError:
    import tflite.Model
    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_model_buf, 0)
'''



   
# Compile the model with relay
# ----------------------------

# TFLite input tensor name, shape and type
input_tensor = "input"
input_shape = (1,160, 160, 3)
input_dtype = "float32"
layout=None
# Parse TFLite model and convert it to a Relay module
from tvm import relay, transform
#mod, params = relay.frontend.from_tflite(tflite_model,
                                       #shape_dict={input_tensor: input_shape},
                                      #dtype_dict={input_tensor: input_dtype})


with tf_compat_v1.gfile.GFile(model_path, 'rb') as f:
    graph_def = tf_compat_v1.GraphDef()
    graph_def.ParseFromString(f.read())
    graph = tf.import_graph_def(graph_def, name='')
    # Call the utility to import the graph definition into default graph.
    graph_def = tf_testing.ProcessGraphDefParam(graph_def)
    # Add shapes to the graph.
    with tf_compat_v1.Session() as sess:
        graph_def = tf_testing.AddShapesToGraphDef(sess, 'embeddings')
 

                             
mod, params = relay.frontend.from_tensorflow(graph_def,
                                             layout=layout,
                                             shape={input_tensor: input_shape})                                      
 
img_dir ="/home/quest/Downloads/Facene_Real_time_face_ecognition/preproces_vgg_face2_dataset/images_for_mtcnn" 

def rep_data_gen():
    a = []
    width = 160
    height = 160
    BATCH_SIZE = 1
    for i in os.listdir(img_dir):
        print (i)
        for images in os.listdir(img_dir+"/"+i):
       
            img = cv2.imread(img_dir +"/" +i+"/"+images)
            image = prewhiten(img)
            img = cv2.resize(image, (width, height))
            img = img.astype(np.float32)
            a.append(img)
    a = np.array(a)
    print(a.shape) # a is np array of 160 3D images
    img = tf.data.Dataset.from_tensor_slices(a).batch(1)

    for i in img.take(BATCH_SIZE):
        print(i)
        yield {'data': i}  
        
batch_size =1 



        
def quantize(mod, params, data_aware):
    print('Quantize mode', relay.quantize.qconfig())
    print(rep_data_gen())
    if data_aware:
        #relay.quantize.qconfig(calibrate_mode='', weight_scale='max'):  
        with relay.quantize.qconfig(calibrate_mode='kl_divergence',global_scale=8.0):
            mod = relay.quantize.quantize(mod, params, dataset=rep_data_gen())
    else:
        with relay.quantize.qconfig(calibrate_mode='global_scale', global_scale=8.0):
            mod = relay.quantize.quantize(mod, params)
    return mod
mod = quantize(mod, params, data_aware=False)
                                  

# Build the module against to x86 CPU
target = "llvm"
#target = "cuda"
with transform.PassContext(opt_level=3):
    lib = relay.build(mod, target, params=params)    
    

lib_fname = "./tvm_models/vgg_pb_tf2_3.so"
fcompile = ndk.create_shared if not local_demo else None
lib.export_library(lib_fname, fcompile)
# Execute on TVM
# --------------
import tvm
from tvm import te
from tvm.contrib import graph_runtime as runtime

# Create a runtime executor module
module = runtime.GraphModule(lib['default'](tvm.cpu()))    
#module = runtime.GraphModule(lib['default'](tvm.gpu()))

Please commenty on this issue. Thank you

zhanghaohit · August 20, 2020, 1:27am

I think maybe in your model, there is a multiply whose rhs is QPartitionExpr and lhs is not. Can try to add the following code after line #131 to see if it can pass:

     if rhs_cond:                                                                                                                                                                                                                                                                                                                                                          
         return QPartitionExpr(_forward_op(ref_call, [lhs, rhs]))

shalu · August 20, 2020, 9:03am

@zhanghaohit
I have added the code. But now i getting this error:

File "/home/quest/Downloads/tvm2/python/tvm/relay/quantize/_annotate.py", line 224, in multiply_rewrite
    raise ValueError
TVMError: ValueError

Thank you

zhanghaohit · August 24, 2020, 2:55am

I think same problems here. Have to support rhs_kind is not None while lhs_kind is None for annotate.