How to get the shape of tensors generated by tvm

kk2048 · March 2, 2022, 4:16am

Hi!

I’m trying to use TVM autoscheduler to generate cuda source code and use it elsewhere. But I don’t know how to get the shape of some tensors built by TVM.

I successfully get the source code of my workload, but the result is in the form of 3 different kernels.(it looks like this)


extern "C" __global__ void __launch_bounds__(32) myop_kernel2(float* __restrict__ output_node, float* __restrict__ extracted_reduction) {...}

extern "C" __global__ void __launch_bounds__(240) myop_kernel0(float* __restrict__ input_node, float* __restrict__ mid_node) {...}

extern "C" __global__ void __launch_bounds__(32) myop_kernel1(float* __restrict__ extracted_reduction, float* __restrict__ mid_node) {...}

To launch these kernels, the shapes of extracted_reduction and mid_node are necessary(When call cudaMalloc). But I have no idea how to get these information.

It seems that such a module contains an llvm module and some cuda modules, and the llvm module is build with an IRmodule. But I can’t find any useful tool to get these information from either llvm module nor IRmodule.

Here is how I tried to solve this problem myself(but failed):

I have tried to read the llvm source code of the same module, it looks like this.(here are the first part and last part of this llvm module)


%0 = type { double }

%1 = type { i8*, %2, i32, %3, i64*, i64*, i64 }

; Function Attrs: noinline

define internal fastcc i32 @myop_compute_(%0* noalias, i8* noalias align 128, i32* noalias, i8* noalias align 128, i32) unnamed_addr #1 {

entry:

  %5 = alloca i8*, align 8

  %6 = alloca i8*, align 8

... ...

  %52 = load i8*, i8** @.tvm_func.myop_kernel2, align 8

... ...

handle_init_end16:                                ; preds = %call_end14, %call_end18

  %58 = phi i8* [ %52, %call_end14 ], [ %62, %call_end18 ]

  %59 = load i32 (i8*, %0*, i32*, i32, %0*, i32*)*, i32 (i8*, %0*, i32*, i32, %0*, i32*)** @__TVMFuncCall, align 8, !tbaa !20

  %60 = call i32 %59(i8* %58, %0* nonnull %0, i32* nonnull %2, i32 4, %0* nonnull %20, i32* nonnull %21)

It seemes that this llvm module loads myop_kernel2 into %52 and %58. It calls __TVMFuncCall and uses %58 as first argument. I read the definition of TVMFuncCall


int TVMFuncCall(TVMFunctionHandle func, TVMValue* args, int* arg_type_codes, int num_args, TVMValue* ret_val, int* ret_type_code)

It seems that the second argument of TVMFuncCall TVMValue* args should be the argument passed to func(which is @.tvm_func.myop_kernel2 here). But the llvm source code uses %0* nonnull %0 as second argument. The %0* represents double* because of %0 = type { double } I gauss.

But I don’t know what does nonnull %0 refer to. I guess it refers to the frist argument of myop_compute_(define i32 @myop_compute_(%0* noalias, ..., does it mean %0* noalias %0 by default? )

If that is correct, %0 comes from .sub in the caller of myop_compute_.


define dllexport i32 @myop(i8* noalias nocapture readonly, i8* noalias nocapture readonly, i32, i8* noalias nocapture readnone, i8* noalias nocapture readnone, i8* noalias nocapture readnone) local_unnamed_addr !dbg !5 {

entry:

... ...

  %7 = alloca [7 x %0], align 8, !dbg !18

  %.sub = getelementptr inbounds [7 x %0], [7 x %0]* %7, i64 0, i64 0

... ...

  %172 = load i8*, i8** @.tvm_func.__tvm_set_device, align 8, !dbg !18

... ...

  %181 = load i32 (i8*, %0*, i32*, i32, %0*, i32*)*, i32 (i8*, %0*, i32*, i32, %0*, i32*)** @__TVMFuncCall, align 8, !dbg !18, !tbaa !20

  %182 = call i32 %181(i8* %172, %0* nonnull %.sub, i32* nonnull %.sub56, i32 2, %0* nonnull %179, i32* nonnull %180), !dbg !18

... ...

  %185 = call fastcc i32 @myop_compute_(%0* nonnull %.sub, i8* %30, i32* nonnull %.sub56, i8* %22, i32 %28), !dbg !18

So .sub should be a [7 * double*] array allocaed locally and initialized by @.tvm_func.__tvm_set_device(maybe?) But I can’t get anything useful in __tvm_set_device so I have no idea how to reach my goal now.

complete llvm source code is below

%0 = type { double }

%1 = type { i8*, %2, i32, %3, i64*, i64*, i64 }

%2 = type { i32, i32 }

%3 = type { i8, i8, i16 }

@.str = private constant [140 x i8] c"Assert fail: (num_args == 2), winograd_convolution_op_float_i5_3_128_128_w64_3_3_3_o5_64_128_128_ws1_1_wd1_1_p1_1_op0: num_args should be 2\00", align 1

@.tvm_func.__tvm_set_device = internal unnamed_addr global i8* null, align 8

@.str.22 = private constant [17 x i8] c"__tvm_set_device\00", align 1

@__TVMBackendAllocWorkspace = linkonce dllexport local_unnamed_addr global i8* (i32, i32, i64, i32, i32)* null, align 8

@.tvm_func.winograd_convolution_op_float_i5_3_128_128_w64_3_3_3_o5_64_128_128_ws1_1_wd1_1_p1_1_op0_kernel0 = internal unnamed_addr global i8* null, align 8

@.str.23 = private constant [96 x i8] c"winograd_convolution_op_float_i5_3_128_128_w64_3_3_3_o5_64_128_128_ws1_1_wd1_1_p1_1_op0_kernel0\00", align 1

@.tvm_func.winograd_convolution_op_float_i5_3_128_128_w64_3_3_3_o5_64_128_128_ws1_1_wd1_1_p1_1_op0_kernel1 = internal unnamed_addr global i8* null, align 8

@.str.24 = private constant [96 x i8] c"winograd_convolution_op_float_i5_3_128_128_w64_3_3_3_o5_64_128_128_ws1_1_wd1_1_p1_1_op0_kernel1\00", align 1

@.tvm_func.winograd_convolution_op_float_i5_3_128_128_w64_3_3_3_o5_64_128_128_ws1_1_wd1_1_p1_1_op0_kernel2 = internal unnamed_addr global i8* null, align 8

@.str.25 = private constant [96 x i8] c"winograd_convolution_op_float_i5_3_128_128_w64_3_3_3_o5_64_128_128_ws1_1_wd1_1_p1_1_op0_kernel2\00", align 1

@__TVMBackendFreeWorkspace = linkonce dllexport local_unnamed_addr global i32 (i32, i32, i8*)* null, align 8

@tvm_main = weak dllexport local_unnamed_addr constant [88 x i8] c"winograd_convolution_op_float_i5_3_128_128_w64_3_3_3_o5_64_128_128_ws1_1_wd1_1_p1_1_op0\00", align 1

@llvm.global_ctors = appending global [0 x { i32, void (), i8 }] zeroinitializer

define dllexport i32 @winograd_convolution_op_float_i5_3_128_128_w64_3_3_3_o5_64_128_128_ws1_1_wd1_1_p1_1_op0(i8* noalias nocapture readonly, i8* noalias nocapture readonly, i32, i8* noalias nocapture readnone, i8* noalias nocapture readnone, i8* noalias nocapture readnone) local_unnamed_addr !dbg !5 {

entry:

call void @llvm.dbg.value(metadata i8* %0, metadata !12, metadata !DIExpression()), !dbg !18

call void @llvm.dbg.value(metadata i8* %1, metadata !13, metadata !DIExpression()), !dbg !18

call void @llvm.dbg.value(metadata i32 %2, metadata !14, metadata !DIExpression()), !dbg !18

call void @llvm.dbg.value(metadata i8* %3, metadata !15, metadata !DIExpression()), !dbg !18

call void @llvm.dbg.value(metadata i8* %4, metadata !16, metadata !DIExpression()), !dbg !18

call void @llvm.dbg.value(metadata i8* %5, metadata !17, metadata !DIExpression()), !dbg !18

%6 = alloca i8*, align 8, !dbg !18

%7 = alloca [7 x %0], align 8, !dbg !18

%8 = alloca [7 x i32], align 4, !dbg !18

%.sub56 = getelementptr inbounds [7 x i32], [7 x i32]* %8, i64 0, i64 0

%.sub = getelementptr inbounds [7 x %0], [7 x %0]* %7, i64 0, i64 0

%9 = icmp eq i32 %2, 2, !dbg !18

br i1 %9, label %assert_end, label %assert_fail, !dbg !18, !prof !19

assert_fail: ; preds = %entry

%10 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !dbg !18, !tbaa !20

tail call void %10(i8* getelementptr inbounds ([140 x i8], [140 x i8]* @.str, i64 0, i64 0)), !dbg !18

ret i32 -1, !dbg !18

assert_end: ; preds = %entry

%11 = bitcast i8* %0 to %1**, !dbg !18

%12 = load %1*, %1** %11, align 8, !dbg !18

%13 = bitcast i8* %1 to i32*, !dbg !18

%14 = load i32, i32* %13, align 4, !dbg !18, !tbaa !23

%15 = getelementptr inbounds i8, i8* %0, i64 8, !dbg !18

%16 = bitcast i8* %15 to %1**, !dbg !18

%17 = load %1*, %1** %16, align 8, !dbg !18

%18 = getelementptr inbounds i8, i8* %1, i64 4, !dbg !18

%19 = bitcast i8* %18 to i32*, !dbg !18

%20 = load i32, i32* %19, align 4, !dbg !18, !tbaa !37

%21 = getelementptr inbounds %1, %1* %12, i64 0, i32 0, !dbg !18

%22 = load i8*, i8** %21, align 8, !dbg !18

%ptrint = ptrtoint i8* %22 to i64, !dbg !18

%maskedptr = and i64 %ptrint, 127, !dbg !18

%maskcond = icmp eq i64 %maskedptr, 0, !dbg !18

tail call void @llvm.assume(i1 %maskcond), !dbg !18

%23 = getelementptr inbounds %1, %1* %12, i64 0, i32 4, !dbg !18

%24 = load i64*, i64** %23, align 8, !dbg !18

%25 = getelementptr inbounds %1, %1* %12, i64 0, i32 5, !dbg !18

%26 = load i64*, i64** %25, align 8, !dbg !18

%27 = getelementptr inbounds %1, %1* %12, i64 0, i32 1, i32 1, !dbg !18

%28 = load i32, i32* %27, align 4, !dbg !18

%29 = getelementptr inbounds %1, %1* %17, i64 0, i32 0, !dbg !18

%30 = load i8*, i8** %29, align 8, !dbg !18

%ptrint1 = ptrtoint i8* %30 to i64, !dbg !18

%maskedptr2 = and i64 %ptrint1, 127, !dbg !18

%maskcond3 = icmp eq i64 %maskedptr2, 0, !dbg !18

tail call void @llvm.assume(i1 %maskcond3), !dbg !18

%31 = getelementptr inbounds %1, %1* %17, i64 0, i32 4, !dbg !18

%32 = load i64*, i64** %31, align 8, !dbg !18

%33 = getelementptr inbounds %1, %1* %17, i64 0, i32 5, !dbg !18

%34 = load i64*, i64** %33, align 8, !dbg !18

switch i32 %14, label %assert_fail4 [

  i32 13, label %assert_end5

  i32 7, label %assert_end5

  i32 4, label %assert_end5

  i32 3, label %assert_end5

], !dbg !18

assert_fail4: ; preds = %assert_end

%35 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !dbg !18, !tbaa !20

tail call void %35(i8* getelementptr inbounds ([215 x i8], [215 x i8]* @.str.1, i64 0, i64 0)), !dbg !18

ret i32 -1, !dbg !18

assert_end5: ; preds = %assert_end, %assert_end, %assert_end, %assert_end

switch i32 %20, label %assert_fail6 [

  i32 13, label %assert_end7

  i32 7, label %assert_end7

  i32 4, label %assert_end7

  i32 3, label %assert_end7

], !dbg !18

assert_fail6: ; preds = %assert_end5

%36 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !dbg !18, !tbaa !20

tail call void %36(i8* getelementptr inbounds ([215 x i8], [215 x i8]* @.str.2, i64 0, i64 0)), !dbg !18

ret i32 -1, !dbg !18

assert_end7: ; preds = %assert_end5, %assert_end5, %assert_end5, %assert_end5

%37 = getelementptr inbounds %1, %1* %12, i64 0, i32 2, !dbg !18

%38 = load i32, i32* %37, align 4, !dbg !18

%39 = icmp eq i32 %38, 4, !dbg !18

br i1 %39, label %assert_end11, label %assert_fail8, !dbg !18, !prof !19

assert_fail8: ; preds = %assert_end7

%40 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !dbg !18, !tbaa !20

tail call void %40(i8* getelementptr inbounds ([85 x i8], [85 x i8]* @.str.3, i64 0, i64 0)), !dbg !18

ret i32 -1, !dbg !18

assert_end11: ; preds = %assert_end7

%41 = getelementptr inbounds %1, %1* %12, i64 0, i32 3, i32 2, !dbg !18

%42 = load i16, i16* %41, align 2, !dbg !18

%43 = icmp eq i16 %42, 1, !dbg !18

%44 = getelementptr inbounds %1, %1* %12, i64 0, i32 3, i32 1, !dbg !18

%45 = load i8, i8* %44, align 1, !dbg !18

%46 = icmp eq i8 %45, 32, !dbg !18

%47 = getelementptr inbounds %1, %1* %12, i64 0, i32 3, i32 0, !dbg !18

%48 = load i8, i8* %47, align 1, !dbg !18

%49 = icmp eq i8 %48, 2, !dbg !18

%50 = and i1 %46, %49, !dbg !18

%51 = and i1 %43, %50, !dbg !18

br i1 %51, label %assert_end13, label %assert_fail12, !dbg !18, !prof !19

assert_fail12: ; preds = %assert_end11

%52 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !dbg !18, !tbaa !20

tail call void %52(i8* getelementptr inbounds ([198 x i8], [198 x i8]* @.str.4, i64 0, i64 0)), !dbg !18

ret i32 -1, !dbg !18

assert_end13: ; preds = %assert_end11

%53 = load i64, i64* %24, align 8, !dbg !18, !tbaa !39

%54 = trunc i64 %53 to i32, !dbg !18

%55 = icmp eq i32 %54, 5, !dbg !18

br i1 %55, label %assert_end15, label %assert_fail14, !dbg !18, !prof !19

assert_fail14: ; preds = %assert_end13

%56 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !dbg !18, !tbaa !20

tail call void %56(i8* getelementptr inbounds ([124 x i8], [124 x i8]* @.str.5, i64 0, i64 0)), !dbg !18

ret i32 -1, !dbg !18

assert_end15: ; preds = %assert_end13

%57 = getelementptr inbounds i64, i64* %24, i64 1, !dbg !18

%58 = load i64, i64* %57, align 8, !dbg !18, !tbaa !53

%59 = trunc i64 %58 to i32, !dbg !18

%60 = icmp eq i32 %59, 3, !dbg !18

br i1 %60, label %assert_end17, label %assert_fail16, !dbg !18, !prof !19

assert_fail16: ; preds = %assert_end15

%61 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !dbg !18, !tbaa !20

tail call void %61(i8* getelementptr inbounds ([124 x i8], [124 x i8]* @.str.6, i64 0, i64 0)), !dbg !18

ret i32 -1, !dbg !18

assert_end17: ; preds = %assert_end15

%62 = getelementptr inbounds i64, i64* %24, i64 2, !dbg !18

%63 = load i64, i64* %62, align 8, !dbg !18, !tbaa !55

%64 = trunc i64 %63 to i32, !dbg !18

%65 = icmp eq i32 %64, 128, !dbg !18

br i1 %65, label %assert_end19, label %assert_fail18, !dbg !18, !prof !19

assert_fail18: ; preds = %assert_end17

%66 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !dbg !18, !tbaa !20

tail call void %66(i8* getelementptr inbounds ([128 x i8], [128 x i8]* @.str.7, i64 0, i64 0)), !dbg !18

ret i32 -1, !dbg !18

assert_end19: ; preds = %assert_end17

%67 = getelementptr inbounds i64, i64* %24, i64 3, !dbg !18

%68 = load i64, i64* %67, align 8, !dbg !18, !tbaa !58

%69 = trunc i64 %68 to i32, !dbg !18

%70 = icmp eq i32 %69, 128, !dbg !18

br i1 %70, label %assert_end21, label %assert_fail20, !dbg !18, !prof !19

assert_fail20: ; preds = %assert_end19

%71 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !dbg !18, !tbaa !20

tail call void %71(i8* getelementptr inbounds ([128 x i8], [128 x i8]* @.str.8, i64 0, i64 0)), !dbg !18

ret i32 -1, !dbg !18

assert_end21: ; preds = %assert_end19

%72 = icmp eq i64* %26, null, !dbg !18

br i1 %72, label %if_end, label %if_then, !dbg !18, !prof !60

if_then: ; preds = %assert_end21

%73 = load i64, i64* %26, align 8, !dbg !18, !tbaa !61

%74 = trunc i64 %73 to i32, !dbg !18

%75 = icmp eq i32 %74, 49152, !dbg !18

%76 = getelementptr inbounds i64, i64* %26, i64 1, !dbg !18

%77 = load i64, i64* %76, align 8, !dbg !18, !tbaa !75

%78 = trunc i64 %77 to i32, !dbg !18

%79 = icmp eq i32 %78, 16384, !dbg !18

%80 = getelementptr inbounds i64, i64* %26, i64 2, !dbg !18

%81 = load i64, i64* %80, align 8, !dbg !18, !tbaa !77

%82 = trunc i64 %81 to i32, !dbg !18

%83 = icmp eq i32 %82, 128, !dbg !18

%84 = getelementptr inbounds i64, i64* %26, i64 3, !dbg !18

%85 = load i64, i64* %84, align 8, !dbg !18, !tbaa !80

%86 = trunc i64 %85 to i32, !dbg !18

%87 = icmp eq i32 %86, 1, !dbg !18

%88 = and i1 %83, %87, !dbg !18

%89 = and i1 %79, %88, !dbg !18

%90 = and i1 %75, %89, !dbg !18

br i1 %90, label %if_end, label %assert_fail22, !dbg !18, !prof !19

if_end: ; preds = %assert_end21, %if_then

%91 = getelementptr inbounds %1, %1* %12, i64 0, i32 6, !dbg !18

%92 = load i64, i64* %91, align 8, !dbg !18

%93 = icmp eq i64 %92, 0, !dbg !18

br i1 %93, label %assert_end25, label %assert_fail24, !dbg !18, !prof !19

assert_fail22: ; preds = %if_then

%94 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !dbg !18, !tbaa !20

tail call void %94(i8* getelementptr inbounds ([202 x i8], [202 x i8]* @.str.9, i64 0, i64 0)), !dbg !18

ret i32 -1, !dbg !18

assert_fail24: ; preds = %if_end

%95 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !dbg !18, !tbaa !20

tail call void %95(i8* getelementptr inbounds ([163 x i8], [163 x i8]* @.str.10, i64 0, i64 0)), !dbg !18

ret i32 -1, !dbg !18

assert_end25: ; preds = %if_end

%96 = getelementptr inbounds %1, %1* %12, i64 0, i32 1, i32 0, !dbg !18

%97 = load i32, i32* %96, align 4, !dbg !18

%98 = icmp eq i32 %97, 2, !dbg !18

br i1 %98, label %assert_end27, label %assert_fail26, !dbg !18, !prof !19

assert_fail26: ; preds = %assert_end25

%99 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !dbg !18, !tbaa !20

tail call void %99(i8* getelementptr inbounds ([149 x i8], [149 x i8]* @.str.11, i64 0, i64 0)), !dbg !18

ret i32 -1, !dbg !18

assert_end27: ; preds = %assert_end25

%100 = getelementptr inbounds %1, %1* %17, i64 0, i32 2, !dbg !18

%101 = load i32, i32* %100, align 4, !dbg !18

%102 = icmp eq i32 %101, 4, !dbg !18

br i1 %102, label %assert_end31, label %assert_fail28, !dbg !18, !prof !19

assert_fail28: ; preds = %assert_end27

%103 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !dbg !18, !tbaa !20

tail call void %103(i8* getelementptr inbounds ([85 x i8], [85 x i8]* @.str.12, i64 0, i64 0)), !dbg !18

ret i32 -1, !dbg !18

assert_end31: ; preds = %assert_end27

%104 = getelementptr inbounds %1, %1* %17, i64 0, i32 3, i32 2, !dbg !18

%105 = load i16, i16* %104, align 2, !dbg !18

%106 = icmp eq i16 %105, 1, !dbg !18

%107 = getelementptr inbounds %1, %1* %17, i64 0, i32 3, i32 1, !dbg !18

%108 = load i8, i8* %107, align 1, !dbg !18

%109 = icmp eq i8 %108, 32, !dbg !18

%110 = getelementptr inbounds %1, %1* %17, i64 0, i32 3, i32 0, !dbg !18

%111 = load i8, i8* %110, align 1, !dbg !18

%112 = icmp eq i8 %111, 2, !dbg !18

%113 = and i1 %109, %112, !dbg !18

%114 = and i1 %106, %113, !dbg !18

br i1 %114, label %assert_end33, label %assert_fail32, !dbg !18, !prof !19

assert_fail32: ; preds = %assert_end31

%115 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !dbg !18, !tbaa !20

tail call void %115(i8* getelementptr inbounds ([198 x i8], [198 x i8]* @.str.13, i64 0, i64 0)), !dbg !18

ret i32 -1, !dbg !18

assert_end33: ; preds = %assert_end31

%116 = load i64, i64* %32, align 8, !dbg !18, !tbaa !82

%117 = trunc i64 %116 to i32, !dbg !18

%118 = icmp eq i32 %117, 6, !dbg !18

br i1 %118, label %assert_end35, label %assert_fail34, !dbg !18, !prof !19

assert_fail34: ; preds = %assert_end33

%119 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !dbg !18, !tbaa !20

tail call void %119(i8* getelementptr inbounds ([124 x i8], [124 x i8]* @.str.14, i64 0, i64 0)), !dbg !18

ret i32 -1, !dbg !18

assert_end35: ; preds = %assert_end33

%120 = getelementptr inbounds i64, i64* %32, i64 1, !dbg !18

%121 = load i64, i64* %120, align 8, !dbg !18, !tbaa !96

%122 = trunc i64 %121 to i32, !dbg !18

%123 = icmp eq i32 %122, 6, !dbg !18

br i1 %123, label %assert_end37, label %assert_fail36, !dbg !18, !prof !19

assert_fail36: ; preds = %assert_end35

%124 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !dbg !18, !tbaa !20

tail call void %124(i8* getelementptr inbounds ([124 x i8], [124 x i8]* @.str.15, i64 0, i64 0)), !dbg !18

ret i32 -1, !dbg !18

assert_end37: ; preds = %assert_end35

%125 = getelementptr inbounds i64, i64* %32, i64 2, !dbg !18

%126 = load i64, i64* %125, align 8, !dbg !18, !tbaa !98

%127 = trunc i64 %126 to i32, !dbg !18

%128 = icmp eq i32 %127, 3, !dbg !18

br i1 %128, label %assert_end39, label %assert_fail38, !dbg !18, !prof !19

assert_fail38: ; preds = %assert_end37

%129 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !dbg !18, !tbaa !20

tail call void %129(i8* getelementptr inbounds ([124 x i8], [124 x i8]* @.str.16, i64 0, i64 0)), !dbg !18

ret i32 -1, !dbg !18

assert_end39: ; preds = %assert_end37

%130 = getelementptr inbounds i64, i64* %32, i64 3, !dbg !18

%131 = load i64, i64* %130, align 8, !dbg !18, !tbaa !101

%132 = trunc i64 %131 to i32, !dbg !18

%133 = icmp eq i32 %132, 5120, !dbg !18

br i1 %133, label %assert_end41, label %assert_fail40, !dbg !18, !prof !19

assert_fail40: ; preds = %assert_end39

%134 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !dbg !18, !tbaa !20

tail call void %134(i8* getelementptr inbounds ([130 x i8], [130 x i8]* @.str.17, i64 0, i64 0)), !dbg !18

ret i32 -1, !dbg !18

assert_end41: ; preds = %assert_end39

%135 = icmp eq i64* %34, null, !dbg !18

br i1 %135, label %if_end43, label %if_then42, !dbg !18, !prof !60

if_then42: ; preds = %assert_end41

%136 = load i64, i64* %34, align 8, !dbg !18, !tbaa !103

%137 = trunc i64 %136 to i32, !dbg !18

%138 = icmp eq i32 %137, 92160, !dbg !18

%139 = getelementptr inbounds i64, i64* %34, i64 1, !dbg !18

%140 = load i64, i64* %139, align 8, !dbg !18, !tbaa !117

%141 = trunc i64 %140 to i32, !dbg !18

%142 = icmp eq i32 %141, 15360, !dbg !18

%143 = getelementptr inbounds i64, i64* %34, i64 2, !dbg !18

%144 = load i64, i64* %143, align 8, !dbg !18, !tbaa !119

%145 = trunc i64 %144 to i32, !dbg !18

%146 = icmp eq i32 %145, 5120, !dbg !18

%147 = getelementptr inbounds i64, i64* %34, i64 3, !dbg !18

%148 = load i64, i64* %147, align 8, !dbg !18, !tbaa !122

%149 = trunc i64 %148 to i32, !dbg !18

%150 = icmp eq i32 %149, 1, !dbg !18

%151 = and i1 %146, %150, !dbg !18

%152 = and i1 %142, %151, !dbg !18

%153 = and i1 %138, %152, !dbg !18

br i1 %153, label %if_end43, label %assert_fail44, !dbg !18, !prof !19

if_end43: ; preds = %assert_end41, %if_then42

%154 = getelementptr inbounds %1, %1* %17, i64 0, i32 6, !dbg !18

%155 = load i64, i64* %154, align 8, !dbg !18

%156 = icmp eq i64 %155, 0, !dbg !18

br i1 %156, label %assert_end47, label %assert_fail46, !dbg !18, !prof !19

assert_fail44: ; preds = %if_then42

%157 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !dbg !18, !tbaa !20

tail call void %157(i8* getelementptr inbounds ([203 x i8], [203 x i8]* @.str.18, i64 0, i64 0)), !dbg !18

ret i32 -1, !dbg !18

assert_fail46: ; preds = %if_end43

%158 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !dbg !18, !tbaa !20

tail call void %158(i8* getelementptr inbounds ([163 x i8], [163 x i8]* @.str.19, i64 0, i64 0)), !dbg !18

ret i32 -1, !dbg !18

assert_end47: ; preds = %if_end43

%159 = getelementptr inbounds %1, %1* %17, i64 0, i32 1, i32 0, !dbg !18

%160 = load i32, i32* %159, align 4, !dbg !18

%161 = icmp eq i32 %160, 2, !dbg !18

br i1 %161, label %assert_end49, label %assert_fail48, !dbg !18, !prof !19

assert_fail48: ; preds = %assert_end47

%162 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !dbg !18, !tbaa !20

tail call void %162(i8* getelementptr inbounds ([149 x i8], [149 x i8]* @.str.20, i64 0, i64 0)), !dbg !18

ret i32 -1, !dbg !18

assert_end49: ; preds = %assert_end47

%163 = getelementptr inbounds %1, %1* %17, i64 0, i32 1, i32 1, !dbg !18

%164 = load i32, i32* %163, align 4, !dbg !18

%165 = icmp eq i32 %28, %164, !dbg !18

br i1 %165, label %assert_end51, label %assert_fail50, !dbg !18, !prof !19

assert_fail50: ; preds = %assert_end49

%166 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !dbg !18, !tbaa !20

tail call void %166(i8* getelementptr inbounds ([155 x i8], [155 x i8]* @.str.21, i64 0, i64 0)), !dbg !18

ret i32 -1, !dbg !18

assert_end51: ; preds = %assert_end49

%167 = bitcast [7 x %0]* %7 to i64*, !dbg !18

store i64 2, i64* %167, align 8, !dbg !18

store i32 0, i32* %.sub56, align 4, !dbg !18, !tbaa !124

%168 = sext i32 %28 to i64, !dbg !18

%169 = getelementptr inbounds [7 x %0], [7 x %0]* %7, i64 0, i64 1, !dbg !18

%170 = bitcast %0* %169 to i64*, !dbg !18

store i64 %168, i64* %170, align 8, !dbg !18

%171 = getelementptr inbounds [7 x i32], [7 x i32]* %8, i64 0, i64 1, !dbg !18

store i32 0, i32* %171, align 4, !dbg !18, !tbaa !138

%172 = load i8*, i8** @.tvm_func.__tvm_set_device, align 8, !dbg !18

%173 = icmp eq i8* %172, null, !dbg !18

br i1 %173, label %handle_init, label %handle_init_end, !dbg !18, !prof !60

handle_init: ; preds = %assert_end51

%174 = load i8*, i8** @__tvm_module_ctx, align 8, !dbg !18, !tbaa !20

%175 = load i32 (i8*, i8*, i8**)*, i32 (i8*, i8*, i8**)** @__TVMBackendGetFuncFromEnv, align 8, !dbg !18, !tbaa !20

%176 = call i32 %175(i8* %174, i8* getelementptr inbounds ([17 x i8], [17 x i8]* @.str.22, i64 0, i64 0), i8** nonnull %6), !dbg !18

%177 = icmp eq i32 %176, 0, !dbg !18

br i1 %177, label %call_end, label %call_fail, !dbg !18, !prof !19

handle_init_end: ; preds = %assert_end51, %call_end

%178 = phi i8* [ %172, %assert_end51 ], [ %184, %call_end ], !dbg !18

%179 = getelementptr inbounds [7 x %0], [7 x %0]* %7, i64 0, i64 2, !dbg !18

%180 = getelementptr inbounds [7 x i32], [7 x i32]* %8, i64 0, i64 2, !dbg !18

%181 = load i32 (i8*, %0*, i32*, i32, %0*, i32*)*, i32 (i8*, %0*, i32*, i32, %0*, i32*)** @__TVMFuncCall, align 8, !dbg !18, !tbaa !20

%182 = call i32 %181(i8* %178, %0* nonnull %.sub, i32* nonnull %.sub56, i32 2, %0* nonnull %179, i32* nonnull %180), !dbg !18

%183 = icmp eq i32 %182, 0, !dbg !18

br i1 %183, label %call_end53, label %call_fail, !dbg !18, !prof !19

call_fail: ; preds = %call_end53, %handle_init_end, %handle_init

%merge = phi i32 [ %176, %handle_init ], [ %182, %handle_init_end ], [ %185, %call_end53 ]

ret i32 %merge, !dbg !18

call_end: ; preds = %handle_init

%184 = load i8*, i8** %6, align 8, !dbg !18

store i8* %184, i8** @.tvm_func.__tvm_set_device, align 8, !dbg !18

br label %handle_init_end, !dbg !18

call_end53: ; preds = %handle_init_end

%185 = call fastcc i32 @winograd_convolution_op_float_i5_3_128_128_w64_3_3_3_o5_64_128_128_ws1_1_wd1_1_p1_1_op0_compute_(%0* nonnull %.sub, i8* %30, i32* nonnull %.sub56, i8* %22, i32 %28), !dbg !18

br label %call_fail

}

; Function Attrs: nounwind

declare void @llvm.assume(i1) #0

; Function Attrs: noinline

define internal fastcc i32 @winograd_convolution_op_float_i5_3_128_128_w64_3_3_3_o5_64_128_128_ws1_1_wd1_1_p1_1_op0_compute_(%0* noalias, i8* noalias align 128, i32* noalias, i8* noalias align 128, i32) unnamed_addr #1 {

entry:

%5 = alloca i8*, align 8

%6 = alloca i8*, align 8

%7 = alloca i8*, align 8

%8 = load i8* (i32, i32, i64, i32, i32)*, i8* (i32, i32, i64, i32, i32)** @__TVMBackendAllocWorkspace, align 8, !tbaa !20

%9 = tail call i8* %8(i32 2, i32 %4, i64 2211840, i32 2, i32 32)

%ptrint = ptrtoint i8* %9 to i64

%maskedptr = and i64 %ptrint, 127

%maskcond = icmp eq i64 %maskedptr, 0

tail call void @llvm.assume(i1 %maskcond)

%10 = icmp eq i8* %9, null

br i1 %10, label %if_then, label %if_end, !prof !19

if_then: ; preds = %call_end20, %if_end22, %handle_init_end16, %handle_init15, %handle_init_end10, %handle_init9, %handle_init_end, %handle_init, %if_end, %entry

%merge = phi i32 [ -1, %if_end ], [ -1, %entry ], [ %28, %handle_init ], [ %34, %handle_init_end ], [ %43, %handle_init9 ], [ %47, %handle_init_end10 ], [ %56, %handle_init15 ], [ %60, %handle_init_end16 ], [ -1, %call_end20 ], [ %., %if_end22 ]

ret i32 %merge

if_end: ; preds = %entry

%11 = load i8* (i32, i32, i64, i32, i32)*, i8* (i32, i32, i64, i32, i32)** @__TVMBackendAllocWorkspace, align 8, !tbaa !20

%12 = tail call i8* %11(i32 2, i32 %4, i64 983040, i32 2, i32 32)

%ptrint1 = ptrtoint i8* %12 to i64

%maskedptr2 = and i64 %ptrint1, 127

%maskcond3 = icmp eq i64 %maskedptr2, 0

tail call void @llvm.assume(i1 %maskcond3)

%13 = icmp eq i8* %12, null

br i1 %13, label %if_then, label %if_end5, !prof !19

if_end5: ; preds = %if_end

%14 = bitcast %0* %0 to i8**

store i8* %1, i8** %14, align 8

%15 = getelementptr inbounds %0, %0* %0, i64 1

%16 = bitcast %0* %15 to i8**

store i8* %9, i8** %16, align 8

%17 = getelementptr inbounds %0, %0* %0, i64 2

%18 = bitcast %0* %17 to <2 x i64>*

store <2 x i64> <i64 128, i64 240>, <2 x i64>* %18, align 8

%19 = bitcast i32* %2 to <4 x i32>*

store <4 x i32> <i32 3, i32 3, i32 0, i32 0>, <4 x i32>* %19, align 4, !tbaa !140

%20 = getelementptr inbounds %0, %0* %0, i64 4

%21 = getelementptr inbounds i32, i32* %2, i64 4

store i32 0, i32* %21, align 4, !tbaa !141

%22 = bitcast %0* %20 to <2 x i64>*

store <2 x i64> <i64 240, i64 240>, <2 x i64>* %22, align 8

%23 = getelementptr inbounds i32, i32* %2, i64 5

store i32 0, i32* %23, align 4, !tbaa !145

%24 = load i8*, i8** @.tvm_func.winograd_convolution_op_float_i5_3_128_128_w64_3_3_3_o5_64_128_128_ws1_1_wd1_1_p1_1_op0_kernel0, align 8

%25 = icmp eq i8* %24, null

br i1 %25, label %handle_init, label %handle_init_end, !prof !60

handle_init: ; preds = %if_end5

%26 = load i8*, i8** @__tvm_module_ctx, align 8, !tbaa !20

%27 = load i32 (i8*, i8*, i8**)*, i32 (i8*, i8*, i8**)** @__TVMBackendGetFuncFromEnv, align 8, !tbaa !20

%28 = call i32 %27(i8* %26, i8* getelementptr inbounds ([96 x i8], [96 x i8]* @.str.23, i64 0, i64 0), i8** nonnull %7)

%29 = icmp eq i32 %28, 0

br i1 %29, label %call_end, label %if_then, !prof !19

handle_init_end: ; preds = %if_end5, %call_end

%30 = phi i8* [ %24, %if_end5 ], [ %36, %call_end ]

%31 = getelementptr inbounds %0, %0* %0, i64 6

%32 = getelementptr inbounds i32, i32* %2, i64 6

%33 = load i32 (i8*, %0*, i32*, i32, %0*, i32*)*, i32 (i8*, %0*, i32*, i32, %0*, i32*)** @__TVMFuncCall, align 8, !tbaa !20

%34 = call i32 %33(i8* %30, %0* nonnull %0, i32* nonnull %2, i32 6, %0* nonnull %31, i32* nonnull %32)

%35 = icmp eq i32 %34, 0

br i1 %35, label %call_end8, label %if_then, !prof !19

call_end: ; preds = %handle_init

%36 = load i8*, i8** %7, align 8

store i8* %36, i8** @.tvm_func.winograd_convolution_op_float_i5_3_128_128_w64_3_3_3_o5_64_128_128_ws1_1_wd1_1_p1_1_op0_kernel0, align 8

br label %handle_init_end

call_end8: ; preds = %handle_init_end

store i8* %12, i8** %14, align 8

store i8* %9, i8** %16, align 8

%37 = bitcast %0* %17 to <2 x i64>*

store <2 x i64> <i64 7680, i64 32>, <2 x i64>* %37, align 8

%38 = bitcast i32* %2 to <4 x i32>*

store <4 x i32> <i32 3, i32 3, i32 0, i32 0>, <4 x i32>* %38, align 4, !tbaa !140

%39 = load i8*, i8** @.tvm_func.winograd_convolution_op_float_i5_3_128_128_w64_3_3_3_o5_64_128_128_ws1_1_wd1_1_p1_1_op0_kernel1, align 8

%40 = icmp eq i8* %39, null

br i1 %40, label %handle_init9, label %handle_init_end10, !prof !60

handle_init9: ; preds = %call_end8

%41 = load i8*, i8** @__tvm_module_ctx, align 8, !tbaa !20

%42 = load i32 (i8*, i8*, i8**)*, i32 (i8*, i8*, i8**)** @__TVMBackendGetFuncFromEnv, align 8, !tbaa !20

%43 = call i32 %42(i8* %41, i8* getelementptr inbounds ([96 x i8], [96 x i8]* @.str.24, i64 0, i64 0), i8** nonnull %6)

%44 = icmp eq i32 %43, 0

br i1 %44, label %call_end12, label %if_then, !prof !19

handle_init_end10: ; preds = %call_end8, %call_end12

%45 = phi i8* [ %39, %call_end8 ], [ %49, %call_end12 ]

%46 = load i32 (i8*, %0*, i32*, i32, %0*, i32*)*, i32 (i8*, %0*, i32*, i32, %0*, i32*)** @__TVMFuncCall, align 8, !tbaa !20

%47 = call i32 %46(i8* %45, %0* nonnull %0, i32* nonnull %2, i32 4, %0* nonnull %20, i32* nonnull %21)

%48 = icmp eq i32 %47, 0

br i1 %48, label %call_end14, label %if_then, !prof !19

call_end12: ; preds = %handle_init9

%49 = load i8*, i8** %6, align 8

store i8* %49, i8** @.tvm_func.winograd_convolution_op_float_i5_3_128_128_w64_3_3_3_o5_64_128_128_ws1_1_wd1_1_p1_1_op0_kernel1, align 8

br label %handle_init_end10

call_end14: ; preds = %handle_init_end10

store i8* %3, i8** %14, align 8

store i8* %12, i8** %16, align 8

%50 = bitcast %0* %17 to <2 x i64>*

store <2 x i64> <i64 7680, i64 32>, <2 x i64>* %50, align 8

%51 = bitcast i32* %2 to <4 x i32>*

store <4 x i32> <i32 3, i32 3, i32 0, i32 0>, <4 x i32>* %51, align 4, !tbaa !140

%52 = load i8*, i8** @.tvm_func.winograd_convolution_op_float_i5_3_128_128_w64_3_3_3_o5_64_128_128_ws1_1_wd1_1_p1_1_op0_kernel2, align 8

%53 = icmp eq i8* %52, null

br i1 %53, label %handle_init15, label %handle_init_end16, !prof !60

handle_init15: ; preds = %call_end14

%54 = load i8*, i8** @__tvm_module_ctx, align 8, !tbaa !20

%55 = load i32 (i8*, i8*, i8**)*, i32 (i8*, i8*, i8**)** @__TVMBackendGetFuncFromEnv, align 8, !tbaa !20

%56 = call i32 %55(i8* %54, i8* getelementptr inbounds ([96 x i8], [96 x i8]* @.str.25, i64 0, i64 0), i8** nonnull %5)

%57 = icmp eq i32 %56, 0

br i1 %57, label %call_end18, label %if_then, !prof !19

handle_init_end16: ; preds = %call_end14, %call_end18

%58 = phi i8* [ %52, %call_end14 ], [ %62, %call_end18 ]

%59 = load i32 (i8*, %0*, i32*, i32, %0*, i32*)*, i32 (i8*, %0*, i32*, i32, %0*, i32*)** @__TVMFuncCall, align 8, !tbaa !20

%60 = call i32 %59(i8* %58, %0* nonnull %0, i32* nonnull %2, i32 4, %0* nonnull %20, i32* nonnull %21)

%61 = icmp eq i32 %60, 0

br i1 %61, label %call_end20, label %if_then, !prof !19

call_end18: ; preds = %handle_init15

%62 = load i8*, i8** %5, align 8

store i8* %62, i8** @.tvm_func.winograd_convolution_op_float_i5_3_128_128_w64_3_3_3_o5_64_128_128_ws1_1_wd1_1_p1_1_op0_kernel2, align 8

br label %handle_init_end16

call_end20: ; preds = %handle_init_end16

%63 = load i32 (i32, i32, i8*)*, i32 (i32, i32, i8*)** @__TVMBackendFreeWorkspace, align 8, !tbaa !20

%64 = call i32 %63(i32 2, i32 %4, i8* nonnull %12)

%65 = icmp eq i32 %64, 0

br i1 %65, label %if_end22, label %if_then, !prof !60

if_end22: ; preds = %call_end20

%66 = load i32 (i32, i32, i8*)*, i32 (i32, i32, i8*)** @__TVMBackendFreeWorkspace, align 8, !tbaa !20

%67 = call i32 %66(i32 2, i32 %4, i8* nonnull %9)

%68 = icmp ne i32 %67, 0

%. = sext i1 %68 to i32

br label %if_then

}