Float8 regression

As I’m working on the uTVM tutorial, with the latest great from master I’m now hitting the following:

  File "./micro_tflite.py", line 183, in <module>
    micro_mod = micro.create_micro_mod(c_mod, dev_config)
  File "/home/tgall/tvm/tvm/python/tvm/micro/base.py", line 212, in create_micro_mod
  File "/home/tgall/tvm/tvm/python/tvm/runtime/module.py", line 340, in export_library
    fcompile(file_name, files, **kwargs)
  File "/home/tgall/tvm/tvm/python/tvm/contrib/cc.py", line 153, in _fcompile
    compile_func(outputs, objects + add_files, options=all_options, **kwargs)
  File "/home/tgall/tvm/tvm/python/tvm/micro/base.py", line 295, in compile_func
    create_micro_lib(obj_path, src_path, lib_type, options, lib_src_paths=lib_src_paths)
  File "/home/tgall/tvm/tvm/python/tvm/micro/device/arm/stm32f746xx.py", line 87, in create_micro_lib
  File "/home/tgall/tvm/tvm/python/tvm/micro/device/base.py", line 166, in create_micro_lib_base
  File "/home/tgall/tvm/tvm/python/tvm/contrib/binutil.py", line 97, in run_cmd
    raise RuntimeError(msg)
RuntimeError: error while running command "arm-none-eabi-gcc -std=c11 -Wall -Wextra --pedantic -c -g -nostartfiles -nodefaultlibs -nostdlib -fdata-sections -ffunction-sections -I/home/tgall/tvm/tvm/include -I/home/tgall/tvm/tvm/3rdparty/dlpack/include -I/home/tgall/tvm/tvm/3rdparty/dmlc-core/include -O2 -mcpu=cortex-m7 -mlittle-endian -mfloat-abi=hard -mfpu=fpv5-sp-d16 -mthumb -ffast-math -gdwarf-5 -DARM_MATH_CM7 -D__FPU_PRESENT=1U -DARM_MATH_DSP -Wno-unused-variable -Wno-unused-parameter -I/home/tgall/st/STM32Cube_FW_F7_V1.16.0/Drivers/CMSIS -I/home/tgall/st/STM32Cube_FW_F7_V1.16.0/Drivers/CMSIS/Core/Include -I /home/tgall/tvm/tvm/include -I /home/tgall/tvm/tvm/3rdparty/dlpack/include -I /home/tgall/tvm/tvm/3rdparty/dmlc-core/include -I /home/tgall/tvm/tvm/python/tvm/micro/../../../src/runtime/micro/host_driven /tmp/tmpe7nlbq80/temp.c -o /tmp/tmpe7nlbq80/temp.o":
/tmp/tmpe7nlbq80/temp.c: In function 'fused_reshape':
/tmp/tmpe7nlbq80/temp.c:172:7: error: 'float8' undeclared (first use in this function); did you mean 'float'?
  172 |     ((float8*)((float*)T_reshape + ((ax1_outer * 8))))[0] = ((float8*)((float*)placeholder + ((ax1_outer * 8))))[0];
      |       ^~~~~~
      |       float
/tmp/tmpe7nlbq80/temp.c:172:7: note: each undeclared identifier is reported only once for each function it appears in
/tmp/tmpe7nlbq80/temp.c:172:14: error: expected expression before ')' token
  172 |     ((float8*)((float*)T_reshape + ((ax1_outer * 8))))[0] = ((float8*)((float*)placeholder + ((ax1_outer * 8))))[0];
      |              ^
/tmp/tmpe7nlbq80/temp.c:172:70: error: expected expression before ')' token
  172 |     ((float8*)((float*)T_reshape + ((ax1_outer * 8))))[0] = ((float8*)((float*)placeholder + ((ax1_outer * 8))))[0];
      |                                                                      ^


This small bit of python also reproduces it too:

import os
import pprint

import numpy as np
import tvm
from tvm.contrib import graph_runtime, util
from tvm import relay
import tvm.micro as micro

TARGET = 'c -device=micro_dev'

def relay_micro_build(func, dev_config, params=None):
    """Create a graph runtime module with a micro device context from a Relay function.

    func : relay.Function
        function to compile

    dev_config : Dict[str, Any]
        MicroTVM config dict for the target device

    params : dict
        input parameters that do not change during inference

    mod : tvm.runtime.Module
        graph runtime module for the target device
    # disable_vectorize = tvm.target.build_config(disable_vectorize=True)
    disable_fusion = relay.build_config(disabled_pass={'FuseOps'})
    with tvm.transform.PassContext(opt_level=3, config={'tir.disable_vectorize': True}), disable_fusion:
        graph, c_mod, params = relay.build(func, target=TARGET, params=params)
    micro_mod = micro.create_micro_mod(c_mod, dev_config)
    ctx = tvm.micro_dev(0)
    mod = graph_runtime.create(graph, micro_mod, ctx)
    return mod

shape = (1024,)
dtype = 'float32'

# Construct Relay program.
x = relay.var('x', relay.TensorType(shape=shape, dtype=dtype))
xx = relay.multiply(x, x)
z = relay.add(xx, relay.const(1.0))
func = relay.Function([x], z)

dev_config = micro.device.arm.stm32f746xx.generate_config("", 6666)

with micro.Session(dev_config) as sess:
  mod = relay_micro_build(func, dev_config)

  x_in = np.random.uniform(size=shape[0]).astype(dtype)

  # Run with `x_in` as the input. 
  result = mod.get_output(0).asnumpy()

  expected = (x_in * x_in) + 1.0
  print(f'expected result:\t{expected}')
  print(f'μTVM result:\t\t{result}')
  print("differences in array : "+ str((expected == result ) .sum()))

  rtol = 1e-05
  atol = 1e-05

  if np.allclose(expected, result, rtol, atol):
    print('Looks good!')
    print('Something\'s wrong!')

I was able to get this to build by combining the PassContexts (build_config is now PassContext, in a recent refactor) in your smaller snippet of Python. Change the following lines:

disable_fusion = relay.build_config(disabled_pass={'FuseOps'})
with tvm.transform.PassContext(opt_level=3, config={'tir.disable_vectorize': True}), disable_fusion:


with tvm.transform.PassContext(opt_level=3, config={'tir.disable_vectorize': True}, disabled_pass=['FuseOps']):

We might need to update our test cases to reflect the recent refactor. Can you point me where you got the initial build_config snippet?

You are exactly right. Thank you.

The current tests don’t really exercise this. They should. I’ll add. (I’ve been working on a uTVM CI FWIW, nothing too fancy, simple and pragmatic)

I don’t think there is anything in tree currently. (I’ll grep around tho)