Date: 03.06.2025

Testing bare metal riscv executables with qemu

Over the last months I have been working on my master thesis. This includes running bare metal binaries for riscv and checking the exit code to find out whether the result is correct. I wanted to share the script and sources because I haven't found anything else like it during my web search. This is our main file and it will only return 0 to indicate success.
# main.c
int main() {
	return 0;
}
Since, the binary does not run in userspace, we need to setup our own initialisiation. It first initialises all the registers with zero. Then it calls the main function which is defined in main.c. Then, we need to indicate to qemu to terminate the application. We are using the htif which defines two addressed fromhost and tohost. Qemu monitors these and by writing to tohost, we can notify to terminate with the written value.
After main jumps back, we have the exit code in the register a0. We cannot write directly the value to tohost. There isn't much documentation of htif but the qemu sources have a comment to show how the bit pattern looks like. Therefore, we shift the exit code by 1 and set the lowest bit to 1. Afterwards, we load the address of tohost to register X2 and store the value of a0 into the 32 bits. Afterwards, we store0 into the upper bits. It is crucially important to write the full 64 bits. It is also crucially important to first write the lower bits and then the higher bits. I had a compiler bug once which reordered the writes and Johannes Zottele discovered that qemu will ignore it then. Qemu didn't terminate and was stuck in infinity loop. But it took quite some time to figure that out.
# init.s
.section .text.init;

.extern  main
.type    main, @function
.global _start

_start:
    li ra, 0
    li gp, 0
    li fp, 0

    li a0, 0
    li a1, 0
    li a2, 0
    li a3, 0
    li a4, 0
    li a5, 0
    li a6, 0
    li a7, 0

    li s1, 0
    li s2, 0
    li s3, 0
    li s4, 0
    li s5, 0
    li s6, 0
    li s7, 0
    li s8, 0
    li s9, 0
    li s10, 0
    li s11, 0

    li t0, 0
    li t1, 0
    li t2, 0
    li t3, 0
    li t4, 0
    li t5, 0
    li t6, 0

    li sp, 0
    li tp, 0

    lui t0, %hi(__stack_shift)
    addi t0, t0, %lo(__stack_shift)
    la tp, __stack_start
    sll t0, s0, t0
    add tp, tp, t0

    lui t0, %hi(__stack_size)
    addi t0, t0, %lo(__stack_size)
    add sp, tp, t0

    jal x1, main

    # shutdown with exit code from main
    slli a0, a0, 1
    ori a0, a0, 1
    la x2, tohost
    sw a0, 0(x2)
    sw x0, 4(x2)

    .section .tohost, "aw", @progbits
    .align 6;
    .global tohost;
    tohost: .dword 0;
    .size tohost, 8;
    .align 6;
    .global fromhost;
    fromhost: .dword 0;
    .size fromhost, 8;
 
Finally, we have to link it with a linker script (for gcc).
OUTPUT_ARCH( "riscv" )
ENTRY(_start)

/* https://github.com/riscv/riscv-test-env/blob/master/p/link.ld */
/* https://github.com/ucb-bar/libgloss-htif/blob/master/util/htif.ld */
SECTIONS
{
  . = 0x80000000;

  /* Define a symbol to mark the base address, useful for relocation */
  PROVIDE(__base = .);

  .text.init : { *(.text.init) }

  . = ALIGN(0x1000);
  .text.startup : { *(.text.startup) }

  . = ALIGN(0x1000);
  .text : { *(.text) }

  . = ALIGN(0x1000);
  .data : { *(.data) }
  .data.string : { *(.data.string)}

   .got : ALIGN(4K) {
      PROVIDE(__global_pointer$ = . + 0x800);
      *(.got)
    }

    .sdata : ALIGN(4K) {
      *(.sdata)
    }

    .sbss : ALIGN(4K) {
      *(.sbss)
    }

    .bss : ALIGN(4K) {
      *(.bss)
      *(COMMON)
    }

  . = ALIGN(0x1000);
  .tohost : { *(.tohost) }

   . = ALIGN(0x1000);
    /* place Thread Control Block (TCB) at bottom of hart stack */
    PROVIDE (__stack_size = 64K);
    PROVIDE (__stack_start = .);
    PROVIDE (__stack_shift = LOG2CEIL( ALIGN (__stack_size, 0x10)));
    __ehdr_start = .;
    _end = .;
}
 
Now, you are ready to compile it, link and run it. I have a gcc toolchain in /opt/riscv to use stdlib. Since, I use it to evaluate my downstream llvm compiler, I might have different needs than you. In my case, I use clang to compile object code and then I use gcc to link it for me. But, you could also use gcc to get your object files.
 clang --target=riscv64 -I/opt/riscv/riscv64-unknown-elf/include -S -O3 -c init.s -o init.o
 clang --target=riscv64 -I/opt/riscv/riscv64-unknown-elf/include -S -O3 -c main.s -o main.o
 /opt/riscv/bin/riscv64-unknown-elf-gcc -static -nostartfiles -Tlink.ld main.o init.o -o main
 
And now you can run it.
 qemu-system-riscv64 -L /opt/riscv/riscv64-unknown-elf -nographic -machine spike -bios main