diff --git a/core/src/elf2rom.rs b/core/src/elf2rom.rs
index f2c7b1e6..62c1ff8d 100644
--- a/core/src/elf2rom.rs
+++ b/core/src/elf2rom.rs
@@ -10,36 +10,49 @@ use elf::{
 };
 use std::error::Error;
 
-/// Executes the file conversion process
+/// Executes the ROM transpilation process: from ELF to Zisk
 pub fn elf2rom(elf_file: String) -> Result<ZiskRom, Box<dyn Error>> {
+    // Get all data from the ELF file copied to a memory buffer
     let elf_file_path = std::path::PathBuf::from(elf_file.clone());
     let file_data = std::fs::read(elf_file_path)?;
 
+    // Parse the ELF data
     let elf_bytes = ElfBytes::<AnyEndian>::minimal_parse(file_data.as_slice())?;
 
+    // Create an empty ZiskRom instance
     let mut rom: ZiskRom = ZiskRom { next_init_inst_addr: ROM_ENTRY, ..Default::default() };
 
+    // Iterate on the available section headers of the ELF parsed data
     if let Some(section_headers) = elf_bytes.section_headers() {
         for section_header in section_headers {
+            // Consider only the section headers that contain program data
             if section_header.sh_type == SHT_PROGBITS {
+                // Get the program section data as a vector of bytes
                 let (data_u8, _) = elf_bytes.section_data(&section_header)?;
                 let mut data = data_u8.to_vec();
 
+                // Remove extra bytes if length is not 4-bytes aligned
                 while data.len() % 4 != 0 {
                     data.pop();
                 }
 
+                // Get the section data address
                 let addr = section_header.sh_addr;
 
+                // If the data contains instructions, parse them as RISC-V instructions and add them
+                // to the ROM instructions, at the specified program address
                 if (section_header.sh_flags & SHF_EXECINSTR as u64) != 0 {
                     add_zisk_code(&mut rom, addr, &data);
                 }
 
+                // If the data is a writable memory section, add it to the ROM memory using Zisk
+                // copy instructions
                 if (section_header.sh_flags & SHF_WRITE as u64) != 0 &&
                     addr >= RAM_ADDR &&
                     addr + data.len() as u64 <= RAM_ADDR + RAM_SIZE
                 {
                     add_zisk_init_data(&mut rom, addr, &data);
+                // Otherwise, add it to the ROM as RO data
                 } else {
                     rom.ro_data.push(RoData::new(addr, data.len(), data));
                 }
@@ -47,12 +60,14 @@ pub fn elf2rom(elf_file: String) -> Result<ZiskRom, Box<dyn Error>> {
         }
     }
 
+    // Add the program setup, system call and program wrapup instructions
     add_entry_exit_jmp(&mut rom, elf_bytes.ehdr.e_entry);
 
     // Preprocess the ROM (experimental)
+    // Split the ROM instructions based on their address in order to get a better performance when
+    // searching for the corresponding intruction to the pc program address
     let mut max_rom_entry = 0;
     let mut max_rom_instructions = 0;
-
     let mut min_rom_na_unstructions = u64::MAX;
     let mut max_rom_na_unstructions = 0;
     for instruction in &rom.insts {
@@ -115,7 +130,8 @@ pub fn elf2rom(elf_file: String) -> Result<ZiskRom, Box<dyn Error>> {
     Ok(rom)
 }
 
-/// Executes the file conversion process, and saves result into a file
+/// Executes the ELF file data transpilation process into a Zisk ROM, and saves the result into a
+/// file.  The file format can be JSON, PIL-based or binary.
 pub fn elf2romfile(
     elf_file: String,
     rom_file: String,
diff --git a/core/src/inst_context.rs b/core/src/inst_context.rs
index 38dabf5a..d1adbf49 100644
--- a/core/src/inst_context.rs
+++ b/core/src/inst_context.rs
@@ -2,14 +2,28 @@ use crate::{Mem, ROM_ENTRY};
 
 /// ZisK instruction context data container, storing the state of the execution
 pub struct InstContext {
+    // Memory, including several read-only sections and one read-write section (input data)
+    // This memory is initialized before running the program with the input data, and modified by
+    // the program instructions during the execution.  The RW data that has not been previously
+    // written is read as zero
     pub mem: Mem,
+
+    // Current values of registers a, b, c, and flag
     pub a: u64,
     pub b: u64,
     pub c: u64,
     pub flag: bool,
+
+    // Value of sp register
     pub sp: u64,
+
+    // Value of ROM program execution address, i.e. program counter (pc)
     pub pc: u64,
+
+    // Current execution step: 0, 1, 2...
     pub step: u64,
+
+    // End flag, set to true only by the last instruction to execute
     pub end: bool,
 }
 
@@ -18,7 +32,7 @@ impl InstContext {
     /// RisK instruction context constructor
     pub fn new() -> InstContext {
         InstContext {
-            mem: Mem::new(),
+            mem: Mem::default(),
             a: 0,
             b: 0,
             c: 0,
@@ -29,6 +43,8 @@ impl InstContext {
             end: false,
         }
     }
+
+    /// Creates a human-readable string describing the instruction context, for debugging purposes
     pub fn to_text(&self) -> String {
         let s = format! {"a={:x} b={:x} c={:x} flag={} sp={} pc={} step={} end={}", self.a, self.b, self.c, self.flag, self.sp, self.pc, self.step, self.end};
         s
@@ -36,6 +52,7 @@ impl InstContext {
 }
 
 impl Default for InstContext {
+    /// Default instruction context constructor
     fn default() -> Self {
         Self::new()
     }
diff --git a/core/src/mem.rs b/core/src/mem.rs
index 5660b6df..4d022f2c 100644
--- a/core/src/mem.rs
+++ b/core/src/mem.rs
@@ -3,34 +3,38 @@ use crate::UART_ADDR;
 use crate::MemSection;
 
 /// Memory structure, containing several read sections and one single write section
+#[derive(Default)]
 pub struct Mem {
     pub read_sections: Vec<MemSection>,
     pub write_section: MemSection,
 }
 
-/// Default constructor for Mem structure
-impl Default for Mem {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-/// Memory structure implementation
 impl Mem {
-    /// Memory structue constructor
-    pub fn new() -> Mem {
-        Mem { read_sections: Vec::new(), write_section: MemSection::new() }
-    }
-
     /// Adds a read section to the memory structure
     pub fn add_read_section(&mut self, start: u64, buffer: &[u8]) {
+        // Check that the start address is alligned to 8 bytes
+        if (start & 0x07) != 0 {
+            panic!(
+                "Mem::add_read_section() got a start address={:x} not alligned to 8 bytes",
+                start
+            );
+        }
+
+        // Calculate the end address
         let end = start + buffer.len() as u64;
+
+        // Create a mem section with this data
         let mut mem_section = MemSection { start, end, buffer: buffer.to_owned() };
+
+        // Add zero-value bytes until the end address is alligned to 8 bytes
         while (mem_section.end) % 8 != 0 {
             mem_section.buffer.push(0);
             mem_section.end += 1;
         }
+
+        // Push the new read section to the read sections list
         self.read_sections.push(mem_section);
+
         /*println!(
             "Mem::add_read_section() start={:x}={} len={} end={:x}={}",
             start,
@@ -45,12 +49,21 @@ impl Mem {
     pub fn add_write_section(&mut self, start: u64, size: u64) {
         //println!("Mem::add_write_section() start={} size={}", start, size);
 
+        // Check that the start address is alligned to 8 bytes
+        if (start & 0x07) != 0 {
+            panic!(
+                "Mem::add_write_section() got a start address={:x} not alligned to 8 bytes",
+                start
+            );
+        }
+
         // Check the start address is not zero
         if start == 0 {
             panic!("Mem::add_write_section() got invalid start={}", start);
         }
 
-        // Check the write section address has been set before this call
+        // Check the write section address has not been set before this call, since one only write
+        // section is allowed
         if self.write_section.start != 0 {
             panic!(
                 "Mem::add_write_section() only one write section allowed, write_section.start={}",
@@ -67,10 +80,11 @@ impl Mem {
         self.write_section.buffer = mem;
     }
 
-    /// Read a u64 value from the memory read sections, based on the provided address and width
+    /// Reads a 1, 2, 4 or 8 bytes value from the memory read sections, based on the provided
+    /// address and width
     #[inline(always)]
     pub fn read(&self, addr: u64, width: u64) -> u64 {
-        // First try to read in the write section
+        // First try to read from the write section
         if (addr >= self.write_section.start) && (addr <= (self.write_section.end - width)) {
             // Calculate the read position
             let read_position: usize = (addr - self.write_section.start) as usize;
@@ -94,7 +108,8 @@ impl Mem {
             return value;
         }
 
-        // Search for the section that contains the address using binary search (dicothomic search)
+        // Search for the section that contains the address using binary search (dicothomic search).
+        // Read sections are ordered by start address to allow this search.
         let section = if let Ok(section) = self.read_sections.binary_search_by(|section| {
             if addr < section.start {
                 std::cmp::Ordering::Greater
@@ -109,7 +124,7 @@ impl Mem {
             panic!("Mem::read() section not found for addr: {} with width: {}", addr, width);
         };
 
-        // Calculate the read position
+        // Calculate the buffer relative read position
         let read_position: usize = (addr - section.start) as usize;
 
         // Read the requested data based on the provided width
diff --git a/core/src/mem_section.rs b/core/src/mem_section.rs
index 50803f90..1f6bafb0 100644
--- a/core/src/mem_section.rs
+++ b/core/src/mem_section.rs
@@ -1,21 +1,7 @@
-/// Memory section data, including a buffer vector, and start and end addresses
+/// Memory section data, including a buffer (a vector of bytes) and start and end addresses
+#[derive(Default)]
 pub struct MemSection {
     pub start: u64,
     pub end: u64,
     pub buffer: Vec<u8>,
 }
-
-/// Default constructor for MemSection structure
-impl Default for MemSection {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-/// Memory section structure implementation
-impl MemSection {
-    /// Memory section constructor
-    pub fn new() -> MemSection {
-        MemSection { start: 0, end: 0, buffer: Vec::new() }
-    }
-}
diff --git a/core/src/rom.rs b/core/src/rom.rs
deleted file mode 100644
index abad3d0d..00000000
--- a/core/src/rom.rs
+++ /dev/null
@@ -1,199 +0,0 @@
-use indexmap::IndexMap;
-use serde::{Deserialize, Deserializer};
-use serde_json::Value;
-
-use crate::ZiskOperator;
-
-#[allow(dead_code)]
-#[derive(Debug, Deserialize)]
-pub struct RomProgram {
-    #[serde(rename = "nextInitInstAddr")]
-    pub next_init_inst_addr: usize,
-    #[serde(rename = "insts")]
-    #[serde(deserialize_with = "deserialize_insts")]
-    pub insts: IndexMap<String, RomInstruction>,
-    #[serde(rename = "roData")]
-    pub ro_data: Vec<RomRoData>,
-}
-
-#[allow(dead_code)]
-#[derive(Debug, Deserialize)]
-pub struct RomInstruction {
-    pub paddr: u64,
-    #[serde(deserialize_with = "deserialize_bool")]
-    pub store_ra: bool,
-    #[serde(deserialize_with = "deserialize_bool")]
-    pub store_use_sp: bool,
-    pub store: RomStore,
-    pub store_offset: i64,
-    #[serde(deserialize_with = "deserialize_bool")]
-    pub set_pc: bool,
-    #[serde(deserialize_with = "deserialize_bool")]
-    pub set_sp: bool,
-    pub ind_width: u64,
-    pub inc_sp: i64,
-    #[serde(deserialize_with = "deserialize_bool")]
-    pub end: bool,
-    pub a_src: RomSrc,
-    pub a_use_sp_imm1: isize,
-    pub a_offset_imm0: isize,
-    pub b_src: RomSrc,
-    pub b_use_sp_imm1: isize,
-    pub b_offset_imm0: isize,
-    pub jmp_offset1: isize,
-    pub jmp_offset2: isize,
-    #[serde(deserialize_with = "deserialize_bool")]
-    pub is_external_op: bool,
-    pub op: ZiskOperator,
-    #[serde(rename = "opStr")]
-    pub op_str: String,
-    pub verbose: String,
-}
-
-#[derive(Debug)]
-pub enum RomStore {
-    StoreNone,
-    StoreMem,
-    StoreInd,
-}
-
-impl<'de> Deserialize<'de> for RomStore {
-    fn deserialize<D>(deserializer: D) -> Result<RomStore, D::Error>
-    where
-        D: Deserializer<'de>,
-    {
-        let value: u64 = Deserialize::deserialize(deserializer)?;
-        match value {
-            0 => Ok(RomStore::StoreNone),
-            1 => Ok(RomStore::StoreMem),
-            2 => Ok(RomStore::StoreInd),
-            _ => Err(serde::de::Error::custom("Invalid value for RomStore")),
-        }
-    }
-}
-
-#[derive(Debug)]
-pub enum RomSrc {
-    SrcC,
-    SrcMem,
-    SrcImm,
-    SrcStep,
-    SrcSp,
-    SrcInd,
-}
-
-impl<'de> Deserialize<'de> for RomSrc {
-    fn deserialize<D>(deserializer: D) -> Result<RomSrc, D::Error>
-    where
-        D: Deserializer<'de>,
-    {
-        let value: u64 = Deserialize::deserialize(deserializer)?;
-        match value {
-            0 => Ok(RomSrc::SrcC),
-            1 => Ok(RomSrc::SrcMem),
-            2 => Ok(RomSrc::SrcImm),
-            3 => Ok(RomSrc::SrcStep),
-            4 => Ok(RomSrc::SrcSp),
-            5 => Ok(RomSrc::SrcInd),
-            _ => Err(serde::de::Error::custom("Invalid value for RomSrc")),
-        }
-    }
-}
-
-#[allow(dead_code)]
-#[derive(Debug, Deserialize)]
-pub struct RomRoData {
-    pub start: usize,
-    pub data: RomRoData2,
-}
-
-#[allow(dead_code)]
-#[derive(Debug, Deserialize)]
-pub struct RomRoData2 {
-    #[serde(rename = "type")]
-    pub type_: RomRoDataType,
-    pub data: Vec<usize>,
-}
-
-#[derive(Debug, Deserialize)]
-pub enum RomRoDataType {
-    #[serde(rename = "Buffer")]
-    Buffer,
-}
-
-fn deserialize_insts<'de, D>(deserializer: D) -> Result<IndexMap<String, RomInstruction>, D::Error>
-where
-    D: Deserializer<'de>,
-{
-    let value: IndexMap<String, Value> = Deserialize::deserialize(deserializer)?;
-    value
-        .into_iter()
-        .map(|(k, v)| {
-            serde_json::from_value(v).map(|inst| (k, inst)).map_err(serde::de::Error::custom)
-        })
-        .collect()
-}
-
-fn deserialize_bool<'de, D>(deserializer: D) -> Result<bool, D::Error>
-where
-    D: Deserializer<'de>,
-{
-    let value: u8 = Deserialize::deserialize(deserializer)?;
-    match value {
-        0 => Ok(false),
-        1 => Ok(true),
-        _ => Err(serde::de::Error::custom("expected 0 or 1")),
-    }
-}
-
-#[allow(dead_code)]
-impl RomProgram {
-    pub fn from_file(file_path: &str) -> Result<RomProgram, std::io::Error> {
-        let path = std::path::Path::new(file_path);
-        if !path.exists() {
-            println!("File {} does not exist", file_path);
-            return Err(std::io::Error::new(std::io::ErrorKind::NotFound, "File not found"));
-        } else {
-            println!("File exists");
-        }
-        let file_contents = std::fs::read_to_string(file_path)?;
-
-        let parsed_json: RomProgram = serde_json::from_str(&file_contents)?;
-
-        Ok(parsed_json)
-    }
-
-    pub fn from_json(input_json: &str) -> Result<RomProgram, serde_json::Error> {
-        let parsed_json: RomProgram = serde_json::from_str(input_json)?;
-
-        Ok(parsed_json)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    // Test deserialization and parsing of JSON input
-    #[test]
-    fn test_parse_rom_json() {
-        let rom_program_json = RomProgram::from_file("./data/rom.json");
-        assert!(rom_program_json.is_ok());
-
-        // let rom_program = rom_program_json.unwrap();
-        // println!("{:?}", rom_program.insts);
-    }
-
-    // Test deserialization and parsing of JSON input with wrong fields
-    #[test]
-    fn test_parse_input_json_wrong_fields() {
-        let input_json = r#"
-            {
-                "wrong": "fields"
-            }
-        "#;
-
-        let rom_program_json = RomProgram::from_json(input_json);
-        assert!(rom_program_json.is_err());
-    }
-}
diff --git a/core/src/utils.rs b/core/src/utils.rs
index 003aab23..3edf5e25 100644
--- a/core/src/utils.rs
+++ b/core/src/utils.rs
@@ -3,47 +3,12 @@ use crate::{SRC_C, SRC_IMM, SRC_IND, SRC_MEM, SRC_STEP, STORE_IND, STORE_MEM, ST
 // #[cfg(feature = "sp")]
 // use crate::SRC_SP;
 
-/// Read a u64 value from the u8 vector at the specified position in little endian order
-#[inline(always)]
-pub fn read_u64_le(data: &[u8], index: usize) -> u64 {
-    u64::from_le_bytes(data[index..index + 8].try_into().unwrap())
-}
-
-/// Read a u32 value from the u8 vector at the specified position in little endian order
-#[inline(always)]
-pub fn read_u32_le(data: &[u8], index: usize) -> u32 {
-    u32::from_le_bytes(data[index..index + 4].try_into().unwrap())
-}
-
-/// Read a u16 value from the u8 vector at the specified position in little endian order
-#[inline(always)]
-pub fn read_u16_le(data: &[u8], index: usize) -> u16 {
-    u16::from_le_bytes(data[index..index + 2].try_into().unwrap())
-}
-
-/// Write a u64 value to the u8 vector at the specified position in little endian order
-#[inline(always)]
-pub fn write_u64_le(data: &mut [u8], index: usize, value: u64) {
-    data[index..index + 8].copy_from_slice(&value.to_le_bytes());
-}
-
-/// Write a u32 value to the u8 vector at the specified position in little endian order
-#[inline(always)]
-pub fn write_u32_le(data: &mut [u8], index: usize, value: u32) {
-    data[index..index + 4].copy_from_slice(&value.to_le_bytes());
-}
-
-/// Write a u16 value to the u8 vector at the specified position in little endian order
-#[inline(always)]
-pub fn write_u16_le(data: &mut [u8], index: usize, value: u16) {
-    data[index..index + 2].copy_from_slice(&value.to_le_bytes());
-}
-
 /// Converts a u8 vector into a u32 vector
+/// The length of the input vector must be a multiple of 4
 pub fn convert_vector(input: &[u8]) -> Vec<u32> {
     // Check that the input length is a multiple of 4
     let input_len = input.len();
-    if (input_len % 4) != 0 {
+    if (input_len & 0x03) != 0 {
         panic!("convert_vector() found input length={} not a multiple of 4", input.len());
     }
 
@@ -55,13 +20,14 @@ pub fn convert_vector(input: &[u8]) -> Vec<u32> {
 
     // For every output u32 data, calculate it based on input u8 data, in little endian order
     for i in 0..output_len {
-        output.push(read_u32_le(input, 4 * i));
+        output.push(u32::from_le_bytes(input[4 * i..4 * i + 4].try_into().unwrap()));
     }
 
     // Return the output u32 vector
     output
 }
 
+/// Returns a human-readable text that describes an a or b registers source
 pub fn source_to_str(source: u64) -> &'static str {
     match source {
         SRC_C => "SRC_C",
@@ -75,6 +41,7 @@ pub fn source_to_str(source: u64) -> &'static str {
     }
 }
 
+/// Returns a human-readable text that describes a c register store destination
 pub fn store_to_str(store: u64) -> &'static str {
     match store {
         STORE_NONE => "STORE_NONE",
diff --git a/core/src/zisk_definitions.rs b/core/src/zisk_definitions.rs
index 1c568108..a0bc4a32 100644
--- a/core/src/zisk_definitions.rs
+++ b/core/src/zisk_definitions.rs
@@ -1,4 +1,4 @@
-// a and b registers source types
+/// a and b registers source types
 pub const SRC_C: u64 = 0;
 pub const SRC_MEM: u64 = 1;
 pub const SRC_IMM: u64 = 2;
@@ -7,31 +7,63 @@ pub const SRC_STEP: u64 = 3;
 // pub const SRC_SP: u64 = 4;
 pub const SRC_IND: u64 = 5;
 
-// c register store destination types
+/// c register store destination types
 pub const STORE_NONE: u64 = 0;
 pub const STORE_MEM: u64 = 1;
 pub const STORE_IND: u64 = 2;
 
-/* Memory map:
+/// # Memory map:
+///
+/// The Zisk processor memory stores data in little-endian format.
+/// The addressable memory space is divided into several regions:
+///
+/// ```ignore
+///  |--------------- ROM_ENTRY first instruction   (    0x1000)
+///  | calls program at ROM_ADDR, then returns
+///  | kind of a BIOS
+///  |--------------- ROM_EXIT last instruction     (0x10000000)
+///        ...
+///  |--------------- ROM_ADDR                      (0x80000000)
+///  | (rom program)
+///  |--------------- INPUT_ADDR                    (0x90000000)
+///  | (input data)
+///  |--------------- SYS_ADDR=RAM_ADDR=REG_FIRST   (0xa0000000)
+///  | SYS_ADDR = 32 registers
+///  | UART_ADDR = standard output
+///  |--------------- OUTPUT_ADDR                   (0xa0010000)
+///  | (output data)
+///  |--------------- AVAILABLE_MEM_ADDR            (0xa0020000)
+///  | (program memory)
+///  |---------------                               (0xb0000000)
+/// ```
+/// ## ROM_ENNTRY / ROM_ADDR / ROM_EXIT
+/// The program will start executing at the program address ROM_ENTRY.
+/// The first instructions do the basic program setup, including writing the input data into memory,
+/// configuring the ecall (system call) program address, and configuring the program completion
+/// return address. After the program setup, the program counter jumps to ROM_ADDR, executing the
+/// actual program. During the execution, the program can make system calls that will jump to the
+/// configured ecall program address, and return once the task has completed.  The precompiled are
+/// implemented via ecall. After the program is completed, the program counter will jump to the
+/// configured return address, where the execution wrapup tasks will happen, inluding reading the
+/// output data from memory. The address before the last one will jump to ROM_EXIT, the last
+/// insctruction of the execution. In general, setup and wrapup instructions are located in low
+/// addresses, while the actual program insctuctions are located in high addresses.
+///
+/// ## INPUT_ADDR
+/// The input data for the program execution is copied in this memory region, beginning with
+/// INPUT_ADDR. After the data has been written by the setup process, this data can only be read by
+/// the program execution, i.e. it is a read-only (RO) memory region.
+///
+/// ## RAM_ADDR / SYS_ADDR / OUPUT_ADDR / AVAILABLE_MEM_ADDR
+/// This memory section can be written and read by the program execution, i.e. it is a read-write
+/// (RW) memory region. The first RW region from SYS_ADDR to OUTPUT_ADDR is reserved for the system
+/// operation.  The lower addresses of this region is used to store 32 registers of 8 bytes each,
+/// i.e. 256 bytes; these registers are the equivalent to the RISC-V registers.  Any data of 1 byte
+/// written to UART_ADDR will be sent to the standard output of the system. The second RW region
+/// from OUTPUT_ADDR to AVAILABLE_MEM_ADDR is reserved to copy the output data during the program
+/// execution. The third RW region from AVAILABLE_MEM_ADDR onwards can be used during the program
+/// execution.
 
-  |--------------- ROM_ENTRY first instr   (    0x1000)
-  | calls program at ROM_ADDR, then returns
-  | kind of a BIOS
-  |--------------- ROM_EXIT last instr     (0x10000000)
-        ...
-  |--------------- ROM_ADDR                (0x80000000)
-  | (rom program)
-  |--------------- INPUT_ADDR              (0x90000000)
-  | (input data)
-  |--------------- SYS_ADDR (= RAM_ADDR)   (0xa0000000)
-  | (sys = 32 registers)
-  |--------------- OUTPUT_ADDR             (0xa0010000)
-  | (output data)
-  |--------------- AVAILABLE_MEM_ADDR      (0xa0020000)
-  | (program memory)
-  |---------------                         (0xb0000000)
-
-*/
 pub const ROM_ADDR: u64 = 0x80000000;
 pub const ROM_ADDR_MAX: u64 = INPUT_ADDR - 1;
 pub const INPUT_ADDR: u64 = 0x90000000;
@@ -49,45 +81,12 @@ pub const ROM_EXIT: u64 = 0x10000000;
 pub const ARCH_ID_ZISK: u64 = 0xFFFEEEE;
 pub const UART_ADDR: u64 = SYS_ADDR + 512;
 
-// Powers of 2 definitions
-pub const P2_0: u64 = 0x1;
-pub const P2_1: u64 = 0x2;
-pub const P2_2: u64 = 0x4;
-pub const P2_3: u64 = 0x8;
-pub const P2_4: u64 = 0x10;
-pub const P2_5: u64 = 0x20;
-pub const P2_6: u64 = 0x40;
-pub const P2_7: u64 = 0x80;
-pub const P2_8: u64 = 0x100;
-pub const P2_9: u64 = 0x200;
-pub const P2_10: u64 = 0x400;
-pub const P2_11: u64 = 0x800;
-pub const P2_12: u64 = 0x1000;
-pub const P2_13: u64 = 0x2000;
-pub const P2_14: u64 = 0x4000;
-pub const P2_15: u64 = 0x8000;
-pub const P2_16: u64 = 0x10000;
-pub const P2_17: u64 = 0x20000;
-pub const P2_18: u64 = 0x40000;
-pub const P2_19: u64 = 0x80000;
-pub const P2_20: u64 = 0x100000;
-pub const P2_21: u64 = 0x200000;
-pub const P2_22: u64 = 0x400000;
-pub const P2_23: u64 = 0x800000;
-pub const P2_24: u64 = 0x1000000;
-pub const P2_25: u64 = 0x2000000;
-pub const P2_26: u64 = 0x4000000;
-pub const P2_27: u64 = 0x8000000;
-pub const P2_28: u64 = 0x10000000;
-pub const P2_29: u64 = 0x20000000;
-pub const P2_30: u64 = 0x40000000;
-pub const P2_31: u64 = 0x80000000;
-
-// Registers definitions
+/// Registers memory address definitions
 
 pub const REG_FIRST: u64 = SYS_ADDR;
 
-// The 32 registers are mapped to the first 32x8 bytes of system memory
+/// The 32 registers are mapped to the first 32x8 bytes of system memory.
+/// These are the generic register names, i.e. REG_Xn.
 pub const REG_X0: u64 = REG_FIRST;
 pub const REG_X1: u64 = REG_FIRST + 8;
 pub const REG_X2: u64 = REG_FIRST + 2_u64 * 8;
@@ -123,7 +122,7 @@ pub const REG_X31: u64 = REG_FIRST + 31_u64 * 8;
 
 pub const REG_LAST: u64 = REG_X31;
 
-// ABI register names
+/// ABI register names.
 pub const REG_ZERO: u64 = REG_X0;
 pub const REG_RA: u64 = REG_X1; // Return address
 pub const REG_SP: u64 = REG_X2; // Stack pointer
@@ -156,3 +155,40 @@ pub const REG_T3: u64 = REG_X28; // Temporary register 3
 pub const REG_T4: u64 = REG_X29; // Temporary register 4
 pub const REG_T5: u64 = REG_X30; // Temporary register 5
 pub const REG_T6: u64 = REG_X31; // Temporary register 6
+
+/// Power of 2 constant definitions, named P2_n, equivalent to 2 to the power of n, in u64 format
+pub const P2_0: u64 = 0x1;
+pub const P2_1: u64 = 0x2;
+pub const P2_2: u64 = 0x4;
+pub const P2_3: u64 = 0x8;
+pub const P2_4: u64 = 0x10;
+pub const P2_5: u64 = 0x20;
+pub const P2_6: u64 = 0x40;
+pub const P2_7: u64 = 0x80;
+pub const P2_8: u64 = 0x100;
+pub const P2_9: u64 = 0x200;
+pub const P2_10: u64 = 0x400;
+pub const P2_11: u64 = 0x800;
+pub const P2_12: u64 = 0x1000;
+pub const P2_13: u64 = 0x2000;
+pub const P2_14: u64 = 0x4000;
+pub const P2_15: u64 = 0x8000;
+pub const P2_16: u64 = 0x10000;
+pub const P2_17: u64 = 0x20000;
+pub const P2_18: u64 = 0x40000;
+pub const P2_19: u64 = 0x80000;
+pub const P2_20: u64 = 0x100000;
+pub const P2_21: u64 = 0x200000;
+pub const P2_22: u64 = 0x400000;
+pub const P2_23: u64 = 0x800000;
+pub const P2_24: u64 = 0x1000000;
+pub const P2_25: u64 = 0x2000000;
+pub const P2_26: u64 = 0x4000000;
+pub const P2_27: u64 = 0x8000000;
+pub const P2_28: u64 = 0x10000000;
+pub const P2_29: u64 = 0x20000000;
+pub const P2_30: u64 = 0x40000000;
+pub const P2_31: u64 = 0x80000000;
+
+/// Constant values used in operation functions and state machine executors
+pub const M64: u64 = 0xFFFFFFFFFFFFFFFF;
diff --git a/core/src/zisk_inst.rs b/core/src/zisk_inst.rs
index c384198b..cf0ab55b 100644
--- a/core/src/zisk_inst.rs
+++ b/core/src/zisk_inst.rs
@@ -3,6 +3,10 @@ use crate::{
     STORE_MEM,
 };
 
+/// Describes the type of the Zisk opcode.  This type determines how the operation result will be
+/// proven. Internal operations are proven as part of the main state machine itself, given their
+/// simplicity. External operations (rest of types) are proven in their corresponding secondary
+/// state machine.
 #[derive(Clone, Copy, Debug, PartialEq, PartialOrd)]
 #[repr(u32)]
 pub enum ZiskOperationType {
@@ -17,12 +21,11 @@ pub enum ZiskOperationType {
 
 pub const ZISK_OPERATION_TYPE_VARIANTS: usize = 7;
 
-/// ZisK instruction definition
-///
-/// ZisK instruction defined as a binary operation with 2 results: op(a, b) -> (c, flag)
+/// ZisK instruction are defined as a binary operation with 2 results: op(a, b) -> (c, flag)
+/// a, b and c are u64 registers; flag is a boolean
 /// a and b are loaded from the respective sources specified in the instruction
 /// c is stored according to the destination specified in the instruction
-/// flag can only be 0 or 1
+/// flag meaning is operation-dependant
 #[derive(Debug, Clone)]
 pub struct ZiskInst {
     pub paddr: u64,
@@ -90,9 +93,9 @@ impl Default for ZiskInst {
     }
 }
 
-/// ZisK instruction implementation
 impl ZiskInst {
-    /// Creates a human-readable string containing the ZisK instruction fields that are not zero
+    /// Creates a human-readable string containing the ZisK instruction fields that are not zero.
+    /// Used only for debugging.
     pub fn to_text(&self) -> String {
         let mut s = String::new();
         if self.paddr != 0 {
@@ -166,6 +169,8 @@ impl ZiskInst {
         s
     }
 
+    /// Constructs a `flags`` bitmap made of combinations of fields of the Zisk instruction.  This
+    /// field is used by the PIL to proof some of the operations.
     pub fn get_flags(&self) -> u64 {
         let flags: u64 = 1 |
             (((self.a_src == SRC_IMM) as u64) << 1) |
diff --git a/core/src/zisk_inst_builder.rs b/core/src/zisk_inst_builder.rs
index 78e19f88..643e7b5e 100644
--- a/core/src/zisk_inst_builder.rs
+++ b/core/src/zisk_inst_builder.rs
@@ -1,59 +1,28 @@
 use crate::{
     zisk_ops::{InvalidNameError, OpType, ZiskOp},
-    ZiskInst, ZiskOperationType, SRC_C, SRC_IMM, SRC_IND, SRC_MEM, SRC_STEP, STORE_IND, STORE_MEM,
-    STORE_NONE, SYS_ADDR,
+    ZiskInst, REG_FIRST, SRC_C, SRC_IMM, SRC_IND, SRC_MEM, SRC_STEP, STORE_IND, STORE_MEM,
+    STORE_NONE,
 };
 
 // #[cfg(feature = "sp")]
 // use crate::SRC_SP;
 
-#[derive(Debug, Clone)]
+/// Helps building a Zisk instruction during the transpilation process
+#[derive(Debug, Clone, Default)]
 pub struct ZiskInstBuilder {
-    ind_width_set: bool,
     pub i: ZiskInst,
-    regs_addr: u64,
 }
 
 impl ZiskInstBuilder {
+    /// Constructor setting the initial pc address
     #[inline]
-    pub const fn new(paddr: u64) -> ZiskInstBuilder {
-        let regs_addr = SYS_ADDR;
-
-        ZiskInstBuilder {
-            ind_width_set: false,
-            i: ZiskInst {
-                paddr,
-                store_ra: false,
-                store_use_sp: false,
-                store: STORE_NONE,
-                store_offset: 0,
-                set_pc: false,
-                // #[cfg(feature = "sp")]
-                // set_sp: false,
-                ind_width: 8,
-                // #[cfg(feature = "sp")]
-                // inc_sp: 0,
-                end: false,
-                a_src: 0,
-                a_use_sp_imm1: 0,
-                a_offset_imm0: 0,
-                b_src: 0,
-                b_use_sp_imm1: 0,
-                b_offset_imm0: 0,
-                jmp_offset1: 0,
-                jmp_offset2: 0,
-                is_external_op: false,
-                op: 0,
-                func: |_| (),
-                op_str: "",
-                op_type: ZiskOperationType::None,
-                verbose: String::new(),
-                m32: false,
-            },
-            regs_addr,
-        }
+    pub fn new(paddr: u64) -> ZiskInstBuilder {
+        let mut zib = ZiskInstBuilder::default();
+        zib.i.paddr = paddr;
+        zib
     }
 
+    /// Converts a string to an a source value
     fn a_src(&self, src: &str) -> u64 {
         match src {
             "mem" => SRC_MEM,
@@ -66,6 +35,7 @@ impl ZiskInstBuilder {
         }
     }
 
+    /// Converts a string to a b source value
     fn b_src(&self, src: &str) -> u64 {
         match src {
             "mem" => SRC_MEM,
@@ -76,6 +46,7 @@ impl ZiskInstBuilder {
         }
     }
 
+    /// Converts a string to a c store value
     fn c_store(&self, store: &str) -> u64 {
         match store {
             "none" => STORE_NONE,
@@ -85,6 +56,7 @@ impl ZiskInstBuilder {
         }
     }
 
+    /// Splits a 128 bits into 2 32-bits chunks
     pub fn nto32s(n: i128) -> (u32, u32) {
         let mut a = n;
         if a >= (1_i128 << 64) {
@@ -99,6 +71,7 @@ impl ZiskInstBuilder {
         ((a & 0xFFFFFFFF) as u32, (a >> 32) as u32)
     }
 
+    /// Sets the a source instruction sttributes
     pub fn src_a(&mut self, src_input: &str, offset_imm_reg_input: u64, use_sp: bool) {
         let mut src = src_input;
         let mut offset_imm_reg = offset_imm_reg_input;
@@ -108,7 +81,7 @@ impl ZiskInstBuilder {
                 offset_imm_reg = 0;
             } else {
                 src = "mem";
-                offset_imm_reg = self.regs_addr + offset_imm_reg * 8;
+                offset_imm_reg = REG_FIRST + offset_imm_reg * 8;
             }
         }
         self.i.a_src = self.a_src(src);
@@ -130,6 +103,7 @@ impl ZiskInstBuilder {
         }
     }
 
+    /// Sets the b source instruction sttributes
     pub fn src_b(&mut self, src_input: &str, offset_imm_reg_input: u64, use_sp: bool) {
         let mut src = src_input;
         let mut offset_imm_reg = offset_imm_reg_input;
@@ -139,7 +113,7 @@ impl ZiskInstBuilder {
                 offset_imm_reg = 0;
             } else {
                 src = "mem";
-                offset_imm_reg = self.regs_addr + offset_imm_reg * 8;
+                offset_imm_reg = REG_FIRST + offset_imm_reg * 8;
             }
         }
         self.i.b_src = self.b_src(src);
@@ -161,6 +135,7 @@ impl ZiskInstBuilder {
         }
     }
 
+    /// Sets the c store instruction attributes
     pub fn store(&mut self, dst_input: &str, offset_input: i64, use_sp: bool, store_ra: bool) {
         let mut dst = dst_input;
         let mut offset = offset_input;
@@ -169,7 +144,7 @@ impl ZiskInstBuilder {
                 return;
             } else {
                 dst = "mem";
-                offset = self.regs_addr as i64 + offset * 8;
+                offset = REG_FIRST as i64 + offset * 8;
             }
         }
 
@@ -185,10 +160,12 @@ impl ZiskInstBuilder {
         }
     }
 
+    /// Set the store as a store ra
     pub fn store_ra(&mut self, dst: &str, offset: i64, use_sp: bool) {
         self.store(dst, offset, use_sp, true);
     }
 
+    /// Sets the set pc flag to true
     pub fn set_pc(&mut self) {
         self.i.set_pc = true;
     }
@@ -198,6 +175,7 @@ impl ZiskInstBuilder {
     //     self.i.set_sp = true;
     // }
 
+    /// Sets the opcode, and other instruction attributes that depend on it
     pub fn op(&mut self, optxt: &str) -> Result<(), InvalidNameError> {
         let op = ZiskOp::try_from_name(optxt)?;
         self.i.is_external_op = op.op_type() != OpType::Internal;
@@ -209,19 +187,24 @@ impl ZiskInstBuilder {
         Ok(())
     }
 
+    /// Sets jump offsets.  The first offset is added to the pc when a set pc or a flag happens,
+    /// and the second offset is the default one.
     pub fn j(&mut self, j1: i32, j2: i32) {
         self.i.jmp_offset1 = j1 as i64;
         self.i.jmp_offset2 = j2 as i64;
     }
 
+    /// Set the indirection data width.  Accepted values are 1, 2, 4 and 8 (bytes.)
     pub fn ind_width(&mut self, w: u64) {
-        if w != 1 && w != 2 && w != 4 && w != 8 {
-            panic!("ZiskInstBuilder::indWidth() invalid v={}", w);
-        }
-        self.i.ind_width = w;
-        self.ind_width_set = true;
+        self.i.ind_width = match w {
+            1 | 2 | 4 | 8 => w,
+            _ => {
+                panic!("ZiskInstBuilder::indWidth() invalid widtch={}", w);
+            }
+        };
     }
 
+    /// Sets the end flag to true, to be called only by the last instruction in any execution path
     pub fn end(&mut self) {
         self.i.end = true;
     }
@@ -231,10 +214,12 @@ impl ZiskInstBuilder {
     //     self.i.inc_sp += inc;
     // }
 
+    /// Sets a verbose description of the instruction
     pub fn verbose(&mut self, s: &str) {
         self.i.verbose = s.to_owned();
     }
 
+    /// Called when the instruction has been built
     pub fn build(&mut self) {
         //print!("ZiskInstBuilder::build() i=[ {} ]\n", self.i.to_string());
     }
diff --git a/core/src/zisk_opcodes.rs b/core/src/zisk_opcodes.rs
deleted file mode 100644
index 48dbc8c4..00000000
--- a/core/src/zisk_opcodes.rs
+++ /dev/null
@@ -1,118 +0,0 @@
-use serde::{de, Deserialize, Deserializer};
-
-#[allow(dead_code)]
-#[derive(Debug)]
-pub enum ZiskOperator {
-    Flag,
-    CopyB,
-    SignExtendB,
-    SignExtendH,
-    SignExtendW,
-    Add,
-    AddW,
-    Sub,
-    SubW,
-    Sll,
-    SllW,
-    Sra,
-    Srl,
-    SraW,
-    SrlW,
-    Eq,
-    EqW,
-    Ltu,
-    Lt,
-    LtuW,
-    LtW,
-    Leu,
-    Le,
-    LeuW,
-    LeW,
-    And,
-    Or,
-    Xor,
-    Mulu,
-    Mul,
-    MulW,
-    MulUh,
-    MulH,
-    MulSuh,
-    Divu,
-    Div,
-    DivuW,
-    DivW,
-    Remu,
-    Rem,
-    RemuW,
-    RemW,
-    Minu,
-    Min,
-    MinuW,
-    MinW,
-    Maxu,
-    Max,
-    MaxuW,
-    MaxW,
-}
-
-impl<'de> Deserialize<'de> for ZiskOperator {
-    fn deserialize<D>(deserializer: D) -> Result<ZiskOperator, D::Error>
-    where
-        D: Deserializer<'de>,
-    {
-        let value: u8 = Deserialize::deserialize(deserializer)?;
-        match value {
-            0x00 => Ok(ZiskOperator::Flag),
-            0x01 => Ok(ZiskOperator::CopyB),
-            0x23 => Ok(ZiskOperator::SignExtendB),
-            0x24 => Ok(ZiskOperator::SignExtendH),
-            0x25 => Ok(ZiskOperator::SignExtendW),
-            0x02 => Ok(ZiskOperator::Add),
-            0x12 => Ok(ZiskOperator::AddW),
-            0x03 => Ok(ZiskOperator::Sub),
-            0x13 => Ok(ZiskOperator::SubW),
-            0x0d => Ok(ZiskOperator::Sll),
-            0x1d => Ok(ZiskOperator::SllW),
-            0x0f => Ok(ZiskOperator::Sra),
-            0x0e => Ok(ZiskOperator::Srl),
-            0x1f => Ok(ZiskOperator::SraW),
-            0x1e => Ok(ZiskOperator::SrlW),
-            0x08 => Ok(ZiskOperator::Eq),
-            0x18 => Ok(ZiskOperator::EqW),
-            0x04 => Ok(ZiskOperator::Ltu),
-            0x05 => Ok(ZiskOperator::Lt),
-            0x14 => Ok(ZiskOperator::LtuW),
-            0x15 => Ok(ZiskOperator::LtW),
-            0x06 => Ok(ZiskOperator::Leu),
-            0x07 => Ok(ZiskOperator::Le),
-            0x16 => Ok(ZiskOperator::LeuW),
-            0x17 => Ok(ZiskOperator::LeW),
-            0x20 => Ok(ZiskOperator::And),
-            0x21 => Ok(ZiskOperator::Or),
-            0x22 => Ok(ZiskOperator::Xor),
-            0xb0 => Ok(ZiskOperator::Mulu),
-            0xb1 => Ok(ZiskOperator::Mul),
-            0xb5 => Ok(ZiskOperator::MulW),
-            0xb8 => Ok(ZiskOperator::MulUh),
-            0xb9 => Ok(ZiskOperator::MulH),
-            0xbb => Ok(ZiskOperator::MulSuh),
-            0xc0 => Ok(ZiskOperator::Divu),
-            0xc1 => Ok(ZiskOperator::Div),
-            0xc4 => Ok(ZiskOperator::DivuW),
-            0xc5 => Ok(ZiskOperator::DivW),
-            0xc8 => Ok(ZiskOperator::Remu),
-            0xc9 => Ok(ZiskOperator::Rem),
-            0xcc => Ok(ZiskOperator::RemuW),
-            0xcd => Ok(ZiskOperator::RemW),
-            0x09 => Ok(ZiskOperator::Minu),
-            0x0a => Ok(ZiskOperator::Min),
-            0x19 => Ok(ZiskOperator::MinuW),
-            0x1a => Ok(ZiskOperator::MinW),
-            0x0b => Ok(ZiskOperator::Maxu),
-            0x0c => Ok(ZiskOperator::Max),
-            0x1b => Ok(ZiskOperator::MaxuW),
-            0x1c => Ok(ZiskOperator::MaxW),
-            _ => Err(de::Error::custom(format!("Unknown ZiskOperator code: {}", value))),
-        }
-    }
-}
diff --git a/core/src/zisk_ops.rs b/core/src/zisk_ops.rs
index 661b8c19..4b1d7317 100644
--- a/core/src/zisk_ops.rs
+++ b/core/src/zisk_ops.rs
@@ -10,7 +10,7 @@ use std::{
 };
 use tiny_keccak::keccakf;
 
-use crate::{InstContext, ZiskOperationType, ZiskRequiredOperation, SYS_ADDR};
+use crate::{InstContext, ZiskOperationType, ZiskRequiredOperation, M64, REG_A0, SYS_ADDR};
 
 /// Determines the type of a [`ZiskOp`]
 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
@@ -217,6 +217,10 @@ macro_rules! define_ops {
     };
 }
 
+/// Table of Zisk opcode definitions: enum, name, type, cost, code and implementation functions
+/// This table is the backbone of the Zisk processor, it determines what functionality is supported,
+/// and what state machine is responsible of proving the execution of every opcode, based on its
+/// type.
 define_ops! {
     (Flag, "flag", Internal, 0, 0x00, opc_flag, op_flag),
     (CopyB, "copyb", Internal, 0, 0x01, opc_copyb, op_copyb),
@@ -272,159 +276,211 @@ define_ops! {
     (PubOut, "pubout", PubOut, 77, 0x30, opc_pubout, op_pubout), // TODO: New type
 }
 
-// Constant values used in operation functions
-const M64: u64 = 0xFFFFFFFFFFFFFFFF;
+/// # OPCODE operation functions, called `op_<opcode>`.  The `opc_<opcode>` functions are wrappers over the `op_<opcode>` functions that accept an instruction context as input/output parameter containg a, b, c and flag attributes.
 
-// Main binary operations
+/// ## INTERNAL operations
 
 /// Sets flag to true (and c to 0)
 #[inline(always)]
 pub const fn op_flag(_a: u64, _b: u64) -> (u64, bool) {
     (0, true)
 }
+
+/// InstContext-based wrapper over op_flag()
 #[inline(always)]
 pub fn opc_flag(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_flag(ctx.a, ctx.b);
 }
 
-/// Copies register b into c
+/// Copies register b into c (and flag to false)
 #[inline(always)]
 pub const fn op_copyb(_a: u64, b: u64) -> (u64, bool) {
     (b, false)
 }
+
+/// InstContext-based wrapper over op_copyb()
 #[inline(always)]
 pub fn opc_copyb(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_copyb(ctx.a, ctx.b);
 }
 
+/// ## SIGN EXTEND operations for different data widths (i8, i16 and i32) --> i64 --> u64
+
 /// Converts b from a signed 8-bits number in the range [-128, +127] into a signed 64-bit number of
-/// the same value, and stores the result in c
+/// the same value, adding 0xFFFFFFFFFFFFFF00 if negative, and stores the result in c as a u64 (and
+/// sets flag to false)
 #[inline(always)]
 pub const fn op_signextend_b(_a: u64, b: u64) -> (u64, bool) {
     ((b as i8) as u64, false)
 }
+
+/// InstContext-based wrapper over op_signextend_b()
 #[inline(always)]
 pub fn opc_signextend_b(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_signextend_b(ctx.a, ctx.b);
 }
 
 /// Converts b from a signed 16-bits number in the range [-32768, 32767] into a signed 64-bit number
-/// of the same value, and stores the result in c
+/// of the same value, adding 0xFFFFFFFFFFFF0000 if negative, and stores the result in c as a u64
+/// (and sets flag to false)
 #[inline(always)]
 pub const fn op_signextend_h(_a: u64, b: u64) -> (u64, bool) {
     ((b as i16) as u64, false)
 }
+
+/// InstContext-based wrapper over op_signextend_h()
 #[inline(always)]
 pub fn opc_signextend_h(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_signextend_h(ctx.a, ctx.b);
 }
 
 /// Converts b from a signed 32-bits number in the range [-2147483648, 2147483647] into a signed
-/// 64-bit number of the same value, and stores the result in c
+/// 64-bit number of the same value, adding 0xFFFFFFFF00000000 if negative  and stores the result in
+/// c as a u64 (and sets flag to false)
 #[inline(always)]
 pub const fn op_signextend_w(_a: u64, b: u64) -> (u64, bool) {
     ((b as i32) as u64, false)
 }
+
+/// InstContext-based wrapper over op_signextend_w()
 #[inline(always)]
 pub fn opc_signextend_w(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_signextend_w(ctx.a, ctx.b);
 }
 
-/// Adds a and b, and stores the result in c
+/// ## ADD AND SUB operations for different data widths (i32 and u64)
+
+/// Adds a and b as 64-bit unsigned values, and stores the result in c (and sets flag to false)
 #[inline(always)]
 pub fn op_add(a: u64, b: u64) -> (u64, bool) {
     ((Wrapping(a) + Wrapping(b)).0, false)
 }
+
+/// InstContext-based wrapper over op_add()
 #[inline(always)]
 pub fn opc_add(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_add(ctx.a, ctx.b);
 }
 
-/// Adds a and b as 32-bit unsigned values, and stores the result in c
+/// Adds a and b as 32-bit signed values, and stores the result in c (and flag to false)
 #[inline(always)]
 pub fn op_add_w(a: u64, b: u64) -> (u64, bool) {
     ((Wrapping(a as i32) + Wrapping(b as i32)).0 as u64, false)
 }
+
+/// InstContext-based wrapper over op_add_w()
 #[inline(always)]
 pub fn opc_add_w(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_add_w(ctx.a, ctx.b);
 }
 
-/// Subs a and b, and stores the result in c
+/// Subtracts a and b as 64-bit unsigned values, and stores the result in c (and sets flag to false)
 #[inline(always)]
 pub fn op_sub(a: u64, b: u64) -> (u64, bool) {
     ((Wrapping(a) - Wrapping(b)).0, false)
 }
+
+/// InstContext-based wrapper over op_sub()
 #[inline(always)]
 pub fn opc_sub(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_sub(ctx.a, ctx.b);
 }
 
-/// Subs a and b as 32-bit unsigned values, and stores the result in c
+/// Subtracts a and b as 32-bit signed values, and stores the result in c (and sets flag to false)
 #[inline(always)]
 pub fn op_sub_w(a: u64, b: u64) -> (u64, bool) {
     ((Wrapping(a as i32) - Wrapping(b as i32)).0 as u64, false)
 }
+
+/// InstContext-based wrapper over op_sub_w()
 #[inline(always)]
 pub fn opc_sub_w(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_sub_w(ctx.a, ctx.b);
 }
 
+/// ## SHIFT operations
+
+/// Shifts a as a 64-bits unsigned value to the left b mod 64 bits, and stores the result in c (and
+/// sets flag to false)
 #[inline(always)]
 pub const fn op_sll(a: u64, b: u64) -> (u64, bool) {
     (a << (b & 0x3f), false)
 }
+
+/// InstContext-based wrapper over op_sll()
 #[inline(always)]
 pub fn opc_sll(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_sll(ctx.a, ctx.b);
 }
 
+/// Shifts a as a 32-bits unsigned value to the left b mod 64 bits, and stores the result in c (and
+/// sets flag to false)
 #[inline(always)]
 pub fn op_sll_w(a: u64, b: u64) -> (u64, bool) {
     (((Wrapping(a as u32) << (b & 0x3f) as usize).0 as i32) as u64, false)
 }
+
+/// InstContext-based wrapper over op_sll_w()
 #[inline(always)]
 pub fn opc_sll_w(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_sll_w(ctx.a, ctx.b);
 }
 
+/// Shifts a as a 64-bits signed value to the right b mod 64 bits, and stores the result in c (and
+/// sets flag to false)
 #[inline(always)]
 pub const fn op_sra(a: u64, b: u64) -> (u64, bool) {
     (((a as i64) >> (b & 0x3f)) as u64, false)
 }
+
+/// InstContext-based wrapper over op_sra()
 #[inline(always)]
 pub fn opc_sra(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_sra(ctx.a, ctx.b);
 }
 
+/// Shifts a as a 64-bits unsigned value to the right b mod 64 bits, and stores the result in c (and
+/// sets flag to false)
 #[inline(always)]
 pub const fn op_srl(a: u64, b: u64) -> (u64, bool) {
     (a >> (b & 0x3f), false)
 }
+
+/// InstContext-based wrapper over op_srl()
 #[inline(always)]
 pub fn opc_srl(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_srl(ctx.a, ctx.b);
 }
 
+/// Shifts a as a 32-bits signed value to the right b mod 64 bits, and stores the result in c (and
+/// sets flag to false)
 #[inline(always)]
 pub fn op_sra_w(a: u64, b: u64) -> (u64, bool) {
     ((Wrapping(a as i32) >> (b & 0x3f) as usize).0 as u64, false)
 }
+
+/// InstContext-based wrapper over op_sra_w()
 #[inline(always)]
 pub fn opc_sra_w(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_sra_w(ctx.a, ctx.b);
 }
 
+/// Shifts a as a 32-bits unsigned value to the right b mod 64 bits, and stores the result in c (and
+/// sets flag to false)
 #[inline(always)]
 pub fn op_srl_w(a: u64, b: u64) -> (u64, bool) {
     (((Wrapping(a as u32) >> (b & 0x3f) as usize).0 as i32) as u64, false)
 }
+
+/// InstContext-based wrapper over op_srl_w()
 #[inline(always)]
 pub fn opc_srl_w(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_srl_w(ctx.a, ctx.b);
 }
 
-/// If a equals b, returns c=1, flag=true
+/// ## COMPARISON operations
+
+/// If a and b are equal, it returns c=1, flag=true; otherwise it returns c=0, flag=false
 #[inline(always)]
 pub const fn op_eq(a: u64, b: u64) -> (u64, bool) {
     if a == b {
@@ -433,11 +489,15 @@ pub const fn op_eq(a: u64, b: u64) -> (u64, bool) {
         (0, false)
     }
 }
+
+/// InstContext-based wrapper over op_eq()
 #[inline(always)]
 pub fn opc_eq(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_eq(ctx.a, ctx.b);
 }
 
+/// If a and b as 32-bit signed values are equal, as 64-bit unsigned values, it returns c=1,
+/// flag=true; otherwise it returns c=0, flag=false
 #[inline(always)]
 pub const fn op_eq_w(a: u64, b: u64) -> (u64, bool) {
     if (a as i32) == (b as i32) {
@@ -446,12 +506,15 @@ pub const fn op_eq_w(a: u64, b: u64) -> (u64, bool) {
         (0, false)
     }
 }
+
+/// InstContext-based wrapper over op_eq_w()
 #[inline(always)]
 pub fn opc_eq_w(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_eq_w(ctx.a, ctx.b);
 }
 
-/// If a is strictly less than b, returns c=1, flag=true
+/// If a is strictly less than b, as 64-bit unsigned values, it returns c=1, flag=true; otherwise it
+/// returns c=0, flag=false
 #[inline(always)]
 pub const fn op_ltu(a: u64, b: u64) -> (u64, bool) {
     if a < b {
@@ -460,11 +523,15 @@ pub const fn op_ltu(a: u64, b: u64) -> (u64, bool) {
         (0, false)
     }
 }
+
+/// InstContext-based wrapper over op_ltu()
 #[inline(always)]
 pub fn opc_ltu(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_ltu(ctx.a, ctx.b);
 }
 
+/// If a is strictly less than b, as 64-bit signed values, it returns c=1, flag=true; otherwise it
+/// returns c=0, flag=false
 #[inline(always)]
 pub const fn op_lt(a: u64, b: u64) -> (u64, bool) {
     if (a as i64) < (b as i64) {
@@ -473,11 +540,15 @@ pub const fn op_lt(a: u64, b: u64) -> (u64, bool) {
         (0, false)
     }
 }
+
+/// InstContext-based wrapper over op_lt()
 #[inline(always)]
 pub fn opc_lt(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_lt(ctx.a, ctx.b);
 }
 
+/// If a is strictly less than b, as 32-bit unsigned values, it returns c=1, flag=true; otherwise it
+/// returns c=0, flag=false
 #[inline(always)]
 pub const fn op_ltu_w(a: u64, b: u64) -> (u64, bool) {
     if (a as u32) < (b as u32) {
@@ -486,11 +557,15 @@ pub const fn op_ltu_w(a: u64, b: u64) -> (u64, bool) {
         (0, false)
     }
 }
+
+/// InstContext-based wrapper over op_ltu_w()
 #[inline(always)]
 pub fn opc_ltu_w(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_ltu_w(ctx.a, ctx.b);
 }
 
+/// If a is strictly less than b, as 32-bit signed values, it returns c=1, flag=true; otherwise it
+/// returns c=0, flag=false
 #[inline(always)]
 pub const fn op_lt_w(a: u64, b: u64) -> (u64, bool) {
     if (a as i32) < (b as i32) {
@@ -499,11 +574,15 @@ pub const fn op_lt_w(a: u64, b: u64) -> (u64, bool) {
         (0, false)
     }
 }
+
+/// InstContext-based wrapper over op_lt_w()
 #[inline(always)]
 pub fn opc_lt_w(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_lt_w(ctx.a, ctx.b);
 }
 
+/// If a is less than or equal to b, as 64-bit unsigned values, it returns c=1, flag=true; otherwise
+/// it returns c=0, flag=false
 #[inline(always)]
 pub const fn op_leu(a: u64, b: u64) -> (u64, bool) {
     if a <= b {
@@ -512,11 +591,15 @@ pub const fn op_leu(a: u64, b: u64) -> (u64, bool) {
         (0, false)
     }
 }
+
+/// InstContext-based wrapper over op_leu()
 #[inline(always)]
 pub fn opc_leu(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_leu(ctx.a, ctx.b);
 }
 
+/// If a is less than or equal to b, as 64-bit signed values, it returns c=1, flag=true; otherwise
+/// it returns c=0, flag=false
 #[inline(always)]
 pub const fn op_le(a: u64, b: u64) -> (u64, bool) {
     if (a as i64) <= (b as i64) {
@@ -525,11 +608,15 @@ pub const fn op_le(a: u64, b: u64) -> (u64, bool) {
         (0, false)
     }
 }
+
+/// InstContext-based wrapper over op_le()
 #[inline(always)]
 pub fn opc_le(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_le(ctx.a, ctx.b);
 }
 
+/// If a is less than or equal to b, as 32-bit unsigned values, it returns c=1, flag=true; otherwise
+/// it returns c=0, flag=false
 #[inline(always)]
 pub const fn op_leu_w(a: u64, b: u64) -> (u64, bool) {
     if (a as u32) <= (b as u32) {
@@ -538,11 +625,15 @@ pub const fn op_leu_w(a: u64, b: u64) -> (u64, bool) {
         (0, false)
     }
 }
+
+/// InstContext-based wrapper over op_leu_w()
 #[inline(always)]
 pub fn opc_leu_w(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_leu_w(ctx.a, ctx.b);
 }
 
+/// If a is less than or equal to b, as 32-bit signed values, it returns c=1, flag=true; otherwise
+/// it returns c=0, flag=false
 #[inline(always)]
 pub const fn op_le_w(a: u64, b: u64) -> (u64, bool) {
     if (a as i32) <= (b as i32) {
@@ -551,92 +642,127 @@ pub const fn op_le_w(a: u64, b: u64) -> (u64, bool) {
         (0, false)
     }
 }
+
+/// InstContext-based wrapper over op_le_w()
 #[inline(always)]
 pub fn opc_le_w(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_le_w(ctx.a, ctx.b);
 }
 
+/// ## LOGICAL operations
+
+/// Sets c to a AND b, and flag to false
 #[inline(always)]
 pub const fn op_and(a: u64, b: u64) -> (u64, bool) {
     (a & b, false)
 }
+
+/// InstContext-based wrapper over op_and()
 #[inline(always)]
 pub fn opc_and(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_and(ctx.a, ctx.b);
 }
 
+/// Sets c to a OR b, and flag to false
 #[inline(always)]
 pub const fn op_or(a: u64, b: u64) -> (u64, bool) {
     (a | b, false)
 }
+
+/// InstContext-based wrapper over op_or()
 #[inline(always)]
 pub fn opc_or(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_or(ctx.a, ctx.b);
 }
 
+/// Sets c to a XOR b, and flag to false
 #[inline(always)]
 pub const fn op_xor(a: u64, b: u64) -> (u64, bool) {
     (a ^ b, false)
 }
+
+/// InstContext-based wrapper over op_xor()
 #[inline(always)]
 pub fn opc_xor(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_xor(ctx.a, ctx.b);
 }
 
+/// ## ARITHMETIC operations: div / mul / rem
+
+/// Sets c to a x b, as 64-bits unsigned values, and flag to false
 #[inline(always)]
 pub fn op_mulu(a: u64, b: u64) -> (u64, bool) {
     ((Wrapping(a) * Wrapping(b)).0, false)
 }
+
+/// InstContext-based wrapper over op_mulu()
 #[inline(always)]
 pub fn opc_mulu(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_mulu(ctx.a, ctx.b);
 }
 
+/// Sets c to a x b, as 64-bits signed values, and flag to false
 #[inline(always)]
 pub fn op_mul(a: u64, b: u64) -> (u64, bool) {
     ((Wrapping(a as i64) * Wrapping(b as i64)).0 as u64, false)
 }
+
+/// InstContext-based wrapper over op_mul()
 #[inline(always)]
 pub fn opc_mul(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_mul(ctx.a, ctx.b);
 }
 
+/// Sets c to a x b, as 32-bits signed values, and flag to false
 #[inline(always)]
 pub fn op_mul_w(a: u64, b: u64) -> (u64, bool) {
     ((Wrapping(a as i32) * Wrapping(b as i32)).0 as u64, false)
 }
+
+/// InstContext-based wrapper over op_mul_w()
 #[inline(always)]
 pub fn opc_mul_w(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_mul_w(ctx.a, ctx.b);
 }
 
+/// Sets c to the highest 64-bits of a x b, as 128-bits unsigned values, and flag to false
 #[inline(always)]
 pub const fn op_muluh(a: u64, b: u64) -> (u64, bool) {
     (((a as u128 * b as u128) >> 64) as u64, false)
 }
+
+/// InstContext-based wrapper over op_muluh()
 #[inline(always)]
 pub fn opc_muluh(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_muluh(ctx.a, ctx.b);
 }
 
+/// Sets c to the highest 64-bits of a x b, as 128-bits unsigned values, and flag to false
 #[inline(always)]
 pub const fn op_mulh(a: u64, b: u64) -> (u64, bool) {
     (((((a as i64) as i128) * ((b as i64) as i128)) >> 64) as u64, false)
 }
+
+/// InstContext-based wrapper over op_mulh()
 #[inline(always)]
 pub fn opc_mulh(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_mulh(ctx.a, ctx.b);
 }
 
+/// Sets c to the highest 64-bits of a x b, as 128-bits signed values, and flag to false
 #[inline(always)]
 pub const fn op_mulsuh(a: u64, b: u64) -> (u64, bool) {
     (((((a as i64) as i128) * (b as i128)) >> 64) as u64, false)
 }
+
+/// InstContext-based wrapper over op_mulsuh()
 #[inline(always)]
 pub fn opc_mulsuh(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_mulsuh(ctx.a, ctx.b);
 }
 
+/// Sets c to a / b, as 64-bits unsigned values, and flag to false.
+/// If b=0 (divide by zero) it sets c to 2^64 - 1, and sets flag to true.
 #[inline(always)]
 pub const fn op_divu(a: u64, b: u64) -> (u64, bool) {
     if b == 0 {
@@ -645,11 +771,17 @@ pub const fn op_divu(a: u64, b: u64) -> (u64, bool) {
 
     (a / b, false)
 }
+
+/// InstContext-based wrapper over op_divu()
 #[inline(always)]
 pub fn opc_divu(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_divu(ctx.a, ctx.b);
 }
 
+/// Sets c to a / b, as 64-bits signed values, and flag to false.
+/// If b=0 (divide by zero) it sets c to 2^64 - 1, and sets flag to true.
+/// If a=0x8000000000000000 (MIN_I64) and b=0xFFFFFFFFFFFFFFFF (-1) the result should be -MIN_I64,
+/// which cannot be represented with 64 bits (overflow) and it returns c=a
 #[inline(always)]
 pub const fn op_div(a: u64, b: u64) -> (u64, bool) {
     if b == 0 {
@@ -657,11 +789,15 @@ pub const fn op_div(a: u64, b: u64) -> (u64, bool) {
     }
     ((((a as i64) as i128) / ((b as i64) as i128)) as u64, false)
 }
+
+/// InstContext-based wrapper over op_div()
 #[inline(always)]
 pub fn opc_div(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_div(ctx.a, ctx.b);
 }
 
+/// Sets c to a / b, as 32-bits unsigned values, and flag to false.
+/// If b=0 (divide by zero) it sets c to 2^64 - 1, and sets flag to true.
 #[inline(always)]
 pub const fn op_divu_w(a: u64, b: u64) -> (u64, bool) {
     if b as u32 == 0 {
@@ -670,11 +806,15 @@ pub const fn op_divu_w(a: u64, b: u64) -> (u64, bool) {
 
     (((a as u32 / b as u32) as i32) as u64, false)
 }
+
+/// InstContext-based wrapper over op_divu_w()
 #[inline(always)]
 pub fn opc_divu_w(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_divu_w(ctx.a, ctx.b);
 }
 
+/// Sets c to a / b, as 32-bits signed values, and flag to false.
+/// If b=0 (divide by zero) it sets c to 2^64 - 1, and sets flag to true.
 #[inline(always)]
 pub const fn op_div_w(a: u64, b: u64) -> (u64, bool) {
     if b as i32 == 0 {
@@ -683,11 +823,15 @@ pub const fn op_div_w(a: u64, b: u64) -> (u64, bool) {
 
     ((((a as i32) as i64) / ((b as i32) as i64)) as u64, false)
 }
+
+/// InstContext-based wrapper over op_div_w()
 #[inline(always)]
 pub fn opc_div_w(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_div_w(ctx.a, ctx.b);
 }
 
+/// Sets c to a mod b, as 64-bits unsigned values, and flag to false.
+/// If b=0 (divide by zero) it sets c to a, and sets flag to true.
 #[inline(always)]
 pub const fn op_remu(a: u64, b: u64) -> (u64, bool) {
     if b == 0 {
@@ -696,11 +840,15 @@ pub const fn op_remu(a: u64, b: u64) -> (u64, bool) {
 
     (a % b, false)
 }
+
+/// InstContext-based wrapper over op_remu()
 #[inline(always)]
 pub fn opc_remu(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_remu(ctx.a, ctx.b);
 }
 
+/// Sets c to a mod b, as 64-bits signed values, and flag to false.
+/// If b=0 (divide by zero) it sets c to a, and sets flag to true.
 #[inline(always)]
 pub const fn op_rem(a: u64, b: u64) -> (u64, bool) {
     if b == 0 {
@@ -709,11 +857,15 @@ pub const fn op_rem(a: u64, b: u64) -> (u64, bool) {
 
     ((((a as i64) as i128) % ((b as i64) as i128)) as u64, false)
 }
+
+/// InstContext-based wrapper over op_rem()
 #[inline(always)]
 pub fn opc_rem(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_rem(ctx.a, ctx.b);
 }
 
+/// Sets c to a mod b, as 32-bits unsigned values, and flag to false.
+/// If b=0 (divide by zero) it sets c to a, and sets flag to true.
 #[inline(always)]
 pub const fn op_remu_w(a: u64, b: u64) -> (u64, bool) {
     if (b as u32) == 0 {
@@ -722,11 +874,15 @@ pub const fn op_remu_w(a: u64, b: u64) -> (u64, bool) {
 
     ((((a as u32) % (b as u32)) as i32) as u64, false)
 }
+
+/// InstContext-based wrapper over op_remu_w()
 #[inline(always)]
 pub fn opc_remu_w(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_remu_w(ctx.a, ctx.b);
 }
 
+/// Sets c to a mod b, as 32-bits signed values, and flag to false.
+/// If b=0 (divide by zero) it sets c to a, and sets flag to true.
 #[inline(always)]
 pub const fn op_rem_w(a: u64, b: u64) -> (u64, bool) {
     if (b as i32) == 0 {
@@ -735,25 +891,32 @@ pub const fn op_rem_w(a: u64, b: u64) -> (u64, bool) {
 
     ((((a as i32) as i64) % ((b as i32) as i64)) as u64, false)
 }
+
+/// InstContext-based wrapper over op_rem_w()
 #[inline(always)]
 pub fn opc_rem_w(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_rem_w(ctx.a, ctx.b);
 }
 
+/// ## MIN / MAX operations
+
+/// Sets c to the minimum of a and b as 64-bits unsigned values (and flag to false)
 #[inline(always)]
 pub const fn op_minu(a: u64, b: u64) -> (u64, bool) {
-    //if op_s64(a) < op_s64(b)
     if a < b {
         (a, false)
     } else {
         (b, false)
     }
 }
+
+/// InstContext-based wrapper over op_minu()
 #[inline(always)]
 pub fn opc_minu(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_minu(ctx.a, ctx.b);
 }
 
+/// Sets c to the minimum of a and b as 64-bits signed values (and flag to false)
 #[inline(always)]
 pub const fn op_min(a: u64, b: u64) -> (u64, bool) {
     if (a as i64) < (b as i64) {
@@ -762,11 +925,14 @@ pub const fn op_min(a: u64, b: u64) -> (u64, bool) {
         (b, false)
     }
 }
+
+/// InstContext-based wrapper over op_min()
 #[inline(always)]
 pub fn opc_min(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_min(ctx.a, ctx.b);
 }
 
+/// Sets c to the minimum of a and b as 32-bits unsigned values (and flag to false)
 #[inline(always)]
 pub const fn op_minu_w(a: u64, b: u64) -> (u64, bool) {
     if (a as u32) < (b as u32) {
@@ -775,11 +941,14 @@ pub const fn op_minu_w(a: u64, b: u64) -> (u64, bool) {
         (b as i32 as i64 as u64, false)
     }
 }
+
+/// InstContext-based wrapper over op_minu_w()
 #[inline(always)]
 pub fn opc_minu_w(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_minu_w(ctx.a, ctx.b);
 }
 
+/// Sets c to the minimum of a and b as 32-bits signed values (and flag to false)
 #[inline(always)]
 pub const fn op_min_w(a: u64, b: u64) -> (u64, bool) {
     if (a as i32) < (b as i32) {
@@ -788,25 +957,30 @@ pub const fn op_min_w(a: u64, b: u64) -> (u64, bool) {
         (b as i32 as i64 as u64, false)
     }
 }
+
+/// InstContext-based wrapper over op_min_w()
 #[inline(always)]
 pub fn opc_min_w(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_min_w(ctx.a, ctx.b);
 }
 
+/// Sets c to the maximum of a and b as 64-bits unsigned values (and flag to false)
 #[inline(always)]
 pub const fn op_maxu(a: u64, b: u64) -> (u64, bool) {
-    //if op_s64(a) > op_s64(b)
     if a > b {
         (a, false)
     } else {
         (b, false)
     }
 }
+
+/// InstContext-based wrapper over op_maxu()
 #[inline(always)]
 pub fn opc_maxu(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_maxu(ctx.a, ctx.b);
 }
 
+/// Sets c to the maximum of a and b as 64-bits signed values (and flag to false)
 #[inline(always)]
 pub const fn op_max(a: u64, b: u64) -> (u64, bool) {
     if (a as i64) > (b as i64) {
@@ -815,11 +989,14 @@ pub const fn op_max(a: u64, b: u64) -> (u64, bool) {
         (b, false)
     }
 }
+
+/// InstContext-based wrapper over op_max()
 #[inline(always)]
 pub fn opc_max(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_max(ctx.a, ctx.b);
 }
 
+/// Sets c to the maximum of a and b as 32-bits unsigned values (and flag to false)
 #[inline(always)]
 pub const fn op_maxu_w(a: u64, b: u64) -> (u64, bool) {
     if (a as u32) > (b as u32) {
@@ -828,11 +1005,14 @@ pub const fn op_maxu_w(a: u64, b: u64) -> (u64, bool) {
         (b as i32 as i64 as u64, false)
     }
 }
+
+/// InstContext-based wrapper over op_maxu_w()
 #[inline(always)]
 pub fn opc_maxu_w(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_maxu_w(ctx.a, ctx.b);
 }
 
+/// Sets c to the maximum of a and b as 32-bits signed values (and flag to false)
 #[inline(always)]
 pub const fn op_max_w(a: u64, b: u64) -> (u64, bool) {
     if (a as i32) > (b as i32) {
@@ -841,15 +1021,21 @@ pub const fn op_max_w(a: u64, b: u64) -> (u64, bool) {
         (b as i32 as i64 as u64, false)
     }
 }
+
+/// InstContext-based wrapper over op_max_w()
 #[inline(always)]
 pub fn opc_max_w(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_max_w(ctx.a, ctx.b);
 }
 
+/// ## PRECOMPILED operations
+
+/// Performs a Keccak-f hash over a 1600-bits input state stored in memory at the address
+/// specified by register A0, and stores the output state in the same memory address
 #[inline(always)]
 pub fn opc_keccak(ctx: &mut InstContext) {
-    // Get address from register a1 = x11
-    let address = ctx.mem.read(SYS_ADDR + 10_u64 * 8, 8);
+    // Get address from register a0 = x10
+    let address = ctx.mem.read(REG_A0, 8);
 
     // Allocate room for 25 u64 = 128 bytes = 1600 bits
     const WORDS: usize = 25;
@@ -871,6 +1057,9 @@ pub fn opc_keccak(ctx: &mut InstContext) {
     ctx.c = 0;
     ctx.flag = false;
 }
+
+/// Unimplemented.  Keccak can only be called from the system call context via InstContext.
+/// This is provided just for completeness.
 #[inline(always)]
 pub fn op_keccak(_a: u64, _b: u64) -> (u64, bool) {
     unimplemented!("op_keccak() is not implemented");
@@ -882,11 +1071,14 @@ impl From<ZiskRequiredOperation> for ZiskOp {
     }
 }
 
-/// Copies register b into c as a public output data record, where a contains the data index
+/// Copies register b into c as a public output data record, where a contains the data index (and
+/// sets flag to false)
 #[inline(always)]
 pub const fn op_pubout(a: u64, b: u64) -> (u64, bool) {
     (b, false)
 }
+
+/// InstContext-based wrapper over op_pubout()
 #[inline(always)]
 pub fn opc_pubout(ctx: &mut InstContext) {
     (ctx.c, ctx.flag) = op_pubout(ctx.a, ctx.b);
diff --git a/core/src/zisk_required_operation.rs b/core/src/zisk_required_operation.rs
index 59a7aee6..44bf24c4 100644
--- a/core/src/zisk_required_operation.rs
+++ b/core/src/zisk_required_operation.rs
@@ -1,5 +1,10 @@
 use std::collections::HashMap;
 
+/// Stores the minimum information to reproduce an operation execution:
+/// the opcode and the a and b registers values (regardless of their sources);
+/// the step is also stored to keep track of the program execution point.
+/// This data is generated during the first emulation execution.
+/// This data is required by the main state machine executor to generate the witness computation.
 #[derive(Clone)]
 pub struct ZiskRequiredOperation {
     pub step: u64,
@@ -8,6 +13,7 @@ pub struct ZiskRequiredOperation {
     pub b: u64,
 }
 
+/// Stores the minimum information to generate the memory state machine witness computation.
 #[derive(Clone)]
 pub struct ZiskRequiredMemory {
     pub step: u64,
@@ -17,6 +23,7 @@ pub struct ZiskRequiredMemory {
     pub value: u64,
 }
 
+/// Operations required to be proven
 #[derive(Clone, Default)]
 pub struct ZiskRequired {
     pub arith: Vec<ZiskRequiredOperation>,
@@ -25,6 +32,9 @@ pub struct ZiskRequired {
     pub memory: Vec<ZiskRequiredMemory>,
 }
 
+/// Histogram of the program counter values used during the program execution.
+/// Each pc value has a u64 counter, associated to it via a hash map.
+/// The counter is increased every time the corresponding instruction is executed.
 #[derive(Clone, Default)]
 pub struct ZiskPcHistogram {
     pub map: HashMap<u64, u64>,
diff --git a/core/src/zisk_rom.rs b/core/src/zisk_rom.rs
index 4761feb8..1ba2e2a8 100644
--- a/core/src/zisk_rom.rs
+++ b/core/src/zisk_rom.rs
@@ -5,11 +5,18 @@ use crate::{ZiskInst, ZiskInstBuilder, ROM_ADDR, ROM_ENTRY, SRC_IND, SRC_STEP};
 // #[cfg(feature = "sp")]
 // use crate::SRC_SP;
 
-/// RO data structure
+/// RO (read-only) data structure
+/// This structure contains the data generated by the ELF program compiler and that are part of
+/// the program itself, unlike the input data, which can be different for every program execution.
 #[derive(Debug, Default, Clone)]
 pub struct RoData {
+    // Address of the RO data in the program address space
     pub from: u64,
+
+    // Size of the RO data
     pub length: usize,
+
+    // Vector of bytes containing the data
     pub data: Vec<u8>,
 }
 
@@ -21,51 +28,57 @@ impl RoData {
     }
 }
 
-/// ZisK ROM data, including a map address to ZisK instruction
+/// ZisK ROM structure, including a map address to ZisK instruction
 #[derive(Default, Debug, Clone)]
 pub struct ZiskRom {
+    // Address to be used to build the next instruction (and to be increased afterwards)
     pub next_init_inst_addr: u64,
+
+    // Map of instructions that are part of the ROM; the key is the ROM address (pc)
+    // This map contains the instructions that are part of the program, i.e. address >= ROM_ADDR
     pub insts: HashMap<u64, ZiskInstBuilder>,
+
+    // List of RO sections as found in the ELF file
     pub ro_data: Vec<RoData>,
-    pub from: u64,
-    pub length: u64,
-    pub data: Vec<u8>,
+
+    // The following vectors are to store subsets of the ROM instructions in order to improve the
+    // program execution performance while fetching the instruction for the current step pc address
+
+    // Vector of ROM instructions with address < ROM_ADDR
     pub rom_entry_instructions: Vec<ZiskInst>,
+
+    // ROM instructions with an address that is alligned to 4 bytes
     pub rom_instructions: Vec<ZiskInst>,
-    // Rom Non 4 bytes aligned instructions
+
+    // Offset of the non-alligned instructions, to be subtracted to the address when accessing the
+    // corresponding vector
     pub offset_rom_na_unstructions: u64,
+
+    // ROM instructions with an address that is not alligned to 4 bytes
     pub rom_na_instructions: Vec<ZiskInst>,
 }
 
 /// ZisK ROM implementation
 impl ZiskRom {
-    pub fn new() -> ZiskRom {
-        ZiskRom {
-            next_init_inst_addr: 0,
-            insts: HashMap::new(),
-            ro_data: Vec::new(),
-            from: 0,
-            length: 0,
-            data: Vec::new(),
-            rom_entry_instructions: Vec::new(),
-            rom_instructions: Vec::new(),
-            offset_rom_na_unstructions: 0,
-            rom_na_instructions: Vec::new(),
-        }
-    }
-
+    /// Gets the ROM instruction corresponding to the provided pc address.
+    /// Depending on the range and allignment of the address, the function searches for it in the
+    /// corresponding vector.
     #[inline(always)]
     pub fn get_instruction(&self, pc: u64) -> &ZiskInst {
+        // If the address is a program address...
         if pc >= ROM_ADDR {
+            // If the address is alligned, take it from the proper vector
             if pc & 0b11 == 0 {
                 // pc is aligned to a 4-byte boundary
                 &self.rom_instructions[((pc - ROM_ADDR) >> 2) as usize]
+            // Otherwise, take it from the non alligned vector, using the the difference of the pc
+            // vs. the offset as the index
             } else {
                 // pc is not aligned to a 4-byte boundary
                 &self.rom_na_instructions[(pc - self.offset_rom_na_unstructions) as usize]
             }
         } else if pc >= ROM_ENTRY {
-            // pc is in the ROM_ENTRY range
+            // pc is in the ROM_ENTRY range (always alligned)
             &self.rom_entry_instructions[((pc - ROM_ENTRY) >> 2) as usize]
         } else {
             panic!("ZiskRom::get_instruction() pc={} is out of range", pc);
diff --git a/core/src/zv2zisk.rs b/core/src/zv2zisk.rs
index 4e0d4db2..eafa6554 100644
--- a/core/src/zv2zisk.rs
+++ b/core/src/zv2zisk.rs
@@ -1,8 +1,8 @@
 use riscv::{riscv_interpreter, RiscvInstruction};
 
 use crate::{
-    convert_vector, read_u16_le, read_u32_le, read_u64_le, ZiskInstBuilder, ZiskRom, ARCH_ID_ZISK,
-    INPUT_ADDR, OUTPUT_ADDR, ROM_EXIT, SYS_ADDR,
+    convert_vector, ZiskInstBuilder, ZiskRom, ARCH_ID_ZISK, INPUT_ADDR, OUTPUT_ADDR, ROM_EXIT,
+    SYS_ADDR,
 };
 
 use std::collections::HashMap;
@@ -25,7 +25,6 @@ impl Riscv2ZiskContext<'_> {
     /// Converts an input RISCV instruction into a ZisK instruction and stores it into the internal
     /// map
     pub fn convert(&mut self, riscv_instruction: &RiscvInstruction) {
-        //let mut addr = self.s;
         match riscv_instruction.inst.as_str() {
             "lb" => self.load_op(riscv_instruction, "signextend_b", 1),
             "lbu" => self.load_op(riscv_instruction, "copyb", 1),
@@ -140,30 +139,6 @@ impl Riscv2ZiskContext<'_> {
                 riscv_instruction.inst
             ),
         }
-
-        /*if self.insts.contains_key(&addr)
-        {
-            let zib = self.insts.get(&addr).unwrap();
-            println!("Riscv2ZiskContext::convert() addr={} inst={}", addr, zib.i.to_string());
-        }
-        addr += 1;
-        if self.insts.contains_key(&addr)
-        {
-            let zib = self.insts.get(&addr).unwrap();
-            println!("Riscv2ZiskContext::convert() addr={} inst={}", addr, zib.i.to_string());
-        }
-        addr += 1;
-        if self.insts.contains_key(&addr)
-        {
-            let zib = self.insts.get(&addr).unwrap();
-            println!("Riscv2ZiskContext::convert() addr={} inst={}", addr, zib.i.to_string());
-        }
-        addr += 1;
-        if self.insts.contains_key(&addr)
-        {
-            let zib = self.insts.get(&addr).unwrap();
-            println!("Riscv2ZiskContext::convert() addr={} inst={}", addr, zib.i.to_string());
-        }*/
     }
 
     /*amoadd.w rs1, rs2, rd
@@ -177,6 +152,8 @@ impl Riscv2ZiskContext<'_> {
         copyb_w( [%rs1] , last_c) -> [a], j(pc+1, pc+1)
         copyb_d(0, [%tmp1]) -> [%rd], j(pc+1, pc+1), j(pc+1, pc+1)*/
 
+    /// Creates a set of Zisk operations that implement a RISC-V atomic operation,
+    /// i.e. a load-modify-store operation
     pub fn create_atomic_op(
         &mut self,
         i: &RiscvInstruction,
@@ -202,7 +179,6 @@ impl Riscv2ZiskContext<'_> {
             {
                 let mut zib = ZiskInstBuilder::new(self.s);
                 zib.src_a("lastc", 0, false);
-                //zib.ind_width(w);
                 zib.src_b("reg", i.rs2 as u64, false);
                 zib.op(op).unwrap();
                 zib.j(1, 1);
@@ -281,6 +257,7 @@ impl Riscv2ZiskContext<'_> {
     //    copyb_w( same_a , [rs2]) -> [a], j(pc+1, pc+1)
     //    copyb_d(0, [%tmp1]) -> [%rd], j(pc+2, pc+2)
 
+    /// Creates a set of Zisk operations that implement a RISC-V atomic swap operation
     pub fn create_atomic_swap(&mut self, i: &RiscvInstruction, loadf: &str, storef: &str, w: u64) {
         if (i.rd != i.rs1) && (i.rd != i.rs2) {
             {
@@ -348,6 +325,9 @@ impl Riscv2ZiskContext<'_> {
         }
     }
 
+    /// Creates a Zisk operation that implements a RISC-V register operation, i.e. an operation that
+    /// loads both input parameters a and b from their respective registers,
+    /// and stores the result c into a register
     pub fn create_register_op(&mut self, i: &RiscvInstruction, op: &str) {
         let mut zib = ZiskInstBuilder::new(self.s);
         zib.src_a("reg", i.rs1 as u64, false);
@@ -363,6 +343,10 @@ impl Riscv2ZiskContext<'_> {
 
     // beq rs1, rs2, label
     //    eq([%rs1], [rs2]), j(label)
+
+    /// Creates a Zisk operation that implements a RISC-V branch operation, i.e. an operation that
+    /// jumps to another operation, or continues the normal execution, based on a condition
+    /// specifies by the operation
     pub fn create_branch_op(&mut self, i: &RiscvInstruction, op: &str, neg: bool) {
         let mut zib = ZiskInstBuilder::new(self.s);
         zib.src_a("reg", i.rs1 as u64, false);
@@ -379,6 +363,8 @@ impl Riscv2ZiskContext<'_> {
         self.s += 4;
     }
 
+    /// Creates a Zisk flag operation that simply sets the flag to true and continues the execution
+    /// to the next operation
     pub fn nop(&mut self, i: &RiscvInstruction) {
         let mut zib = ZiskInstBuilder::new(self.s);
         zib.src_a("imm", 0, false);
@@ -393,6 +379,9 @@ impl Riscv2ZiskContext<'_> {
 
     // lb rd, imm(rs1)
     //    signextend_b([%rs1], [a + imm]) -> [%rd]
+
+    /// Creates a Zisk operation that loads a value from memory using the specified operation
+    /// and stores the result in a register
     pub fn load_op(&mut self, i: &RiscvInstruction, op: &str, w: u64) {
         let mut zib = ZiskInstBuilder::new(self.s);
         zib.src_a("reg", i.rs1 as u64, false);
@@ -409,6 +398,9 @@ impl Riscv2ZiskContext<'_> {
 
     // sb rs2, imm(rs1)
     //    copyb_d([%rs1], [%rs2]) -> [a + imm]
+
+    /// Creates a Zisk operation that loads a value from register using the specified operation
+    /// and stores the result in memory
     pub fn store_op(&mut self, i: &RiscvInstruction, op: &str, w: u64) {
         let mut zib = ZiskInstBuilder::new(self.s);
         zib.src_a("reg", i.rs1 as u64, false);
@@ -425,6 +417,9 @@ impl Riscv2ZiskContext<'_> {
 
     // addi rd, rs1, imm
     //      add([%rs1], imm) -> [%rd]
+
+    /// Creates a Zisk operation that loads a constant value using the specified operation and
+    /// stores the result in a register
     pub fn immediate_op(&mut self, i: &RiscvInstruction, op: &str) {
         let mut zib = ZiskInstBuilder::new(self.s);
         zib.src_a("reg", i.rs1 as u64, false);
@@ -457,6 +452,7 @@ impl Riscv2ZiskContext<'_> {
     // sc.w rd, rs2, (rs1)
     //    copyb_d([%rs1], [%rs2]) -> [a]
     //    copyb_d(0,0) -> [%rd]
+    /// Implements the RISC-V store-conditional instruction of a 32-bits value
     pub fn sc_w(&mut self, i: &RiscvInstruction) {
         if i.rd > 0 {
             {
@@ -501,6 +497,7 @@ impl Riscv2ZiskContext<'_> {
     // sc.d rd, rs2, (rs1)
     //    copyb([%rs1], [%rs2]) -> [a]
     //    copyb(0,0) -> [%rd]
+    /// Implements the RISC-V store-conditional instruction of a 64-bits value
     pub fn sc_d(&mut self, i: &RiscvInstruction) {
         if i.rd > 0 {
             {
@@ -543,6 +540,7 @@ impl Riscv2ZiskContext<'_> {
 
     // lui rd, imm
     //      copyb_b(0, imm) -> [rd]
+    /// Implementes the RISC-V load-upper-immediate instruction to load a 32-bits constant
     pub fn lui(&mut self, i: &RiscvInstruction) {
         let mut zib = ZiskInstBuilder::new(self.s);
         zib.src_a("imm", 0, false);
@@ -558,6 +556,7 @@ impl Riscv2ZiskContext<'_> {
 
     //     jalr rd, rs1, imm
     //          copyb_d(0, [%rs1]), j(c + imm) -> [rd]
+    /// Implements the RISC-V jump-and-link-register inconditional jump instruction
     pub fn jalr(&mut self, i: &RiscvInstruction) {
         if (i.imm % 4) == 0 {
             let mut zib = ZiskInstBuilder::new(self.s);
@@ -601,6 +600,7 @@ impl Riscv2ZiskContext<'_> {
 
     //    jal rd, label
     //          flag(0,0), j(pc + imm) -> [rd]
+    /// Implements the RISC-V jump-and-link inconditional jump instruction
     pub fn jal(&mut self, i: &RiscvInstruction) {
         let mut zib = ZiskInstBuilder::new(self.s);
         zib.src_a("imm", 0, false);
@@ -614,6 +614,7 @@ impl Riscv2ZiskContext<'_> {
         self.s += 4;
     }
 
+    /// Makes a system call
     pub fn ecall(&mut self, _i: &RiscvInstruction) {
         let mut zib = ZiskInstBuilder::new(self.s);
         zib.src_a("imm", 0, false);
@@ -628,6 +629,9 @@ impl Riscv2ZiskContext<'_> {
         self.s += 4;
     }
 
+    /// RISC-V defines a separate address space of 4096 Control and Status registers associated with
+    /// each hart. All CSR instructions atomically read-modify-write a single CSR,
+
     /*
     csrrw rd, csr, rs1
         if (rd == rs1) {
@@ -648,6 +652,11 @@ impl Riscv2ZiskContext<'_> {
         }
     */
 
+    /// The CSRRW (Atomic Read/Write CSR) instruction atomically swaps values in the CSRs and
+    /// integer registers. CSRRW reads the old value of the CSR, zero-extends the value to XLEN
+    /// bits, then writes it to integer register rd. The initial value in rs1 is written to the CSR.
+    /// If rd=x0, then the instruction shall not read the CSR and shall not cause any of the side
+    /// effects that might occur on a CSR read.
     pub fn csrrw(&mut self, i: &RiscvInstruction) {
         if i.rd == i.rs1 {
             if i.rd == 0 {
@@ -763,6 +772,11 @@ impl Riscv2ZiskContext<'_> {
         }
     */
 
+    /// The CSRRS (Atomic Read and Set Bits in CSR) instruction reads the value of the CSR,
+    /// zero-extends the value to XLEN bits, and writes it to integer register rd. The initial value
+    /// in integer register rs1 is treated as a bit mask that specifies bit positions to be set in
+    /// the CSR. Any bit that is high in rs1 will cause the corresponding bit to be set in the CSR,
+    /// if that CSR bit is writable.
     pub fn csrrs(&mut self, i: &RiscvInstruction) {
         if i.rd == i.rs1 {
             if i.rd == 0 {
@@ -888,6 +902,11 @@ impl Riscv2ZiskContext<'_> {
         }
     */
 
+    /// The CSRRC (Atomic Read and Clear Bits in CSR) instruction reads the value of the CSR,
+    /// zero-extends the value to XLEN bits, and writes it to integer register rd. The initial value
+    /// in integer register rs1 is treated as a bit mask that specifies bit positions to be cleared
+    /// in the CSR. Any bit that is high in rs1 will cause the corresponding bit to be cleared in
+    /// the CSR, if that CSR bit is writable.
     pub fn csrrc(&mut self, i: &RiscvInstruction) {
         if i.rd == i.rs1 {
             if i.rd == 0 {
@@ -1021,6 +1040,11 @@ impl Riscv2ZiskContext<'_> {
         }
     }
 
+    /// The CSRRWI, CSRRSI, and CSRRCI variants are similar to CSRRW, CSRRS, and CSRRC respectively,
+    /// except they update the CSR using an XLEN-bit value obtained by zero-extending a 5-bit
+    /// unsigned immediate (uimm[4:0]) field encoded in the rs1 field instead of a value from an
+    /// integer register.
+
     /*
     csrrci rd, csr
         {
@@ -1267,14 +1291,17 @@ pub fn add_zisk_code(rom: &mut ZiskRom, addr: u64, data: &[u8]) {
     // For all RISCV instructions
     for riscv_instruction in riscv_instructions {
         //print!("add_zisk_code() converting RISCV instruction={}\n",
-        // riscv_instruction.to_string()); Convert RICV instruction to ZisK instruction and
-        // store it in rom.insts
+        // riscv_instruction.to_string());
+
+        // Convert RICV instruction to ZisK instruction and store it in rom.insts
         ctx.convert(&riscv_instruction);
         //print!("   to: {}", ctx.insts.iter().last().)
     }
 }
 
-/// Add initial data to ZisK rom
+/// Add initial data to ZisK rom.
+/// The initial data is copied in chunks of 8 bytes for efficiency, until less than 8 bytes are left
+/// to copy.  The remaining bytes are copied in additional chunks of 4, 2 and 1 byte, if required.
 pub fn add_zisk_init_data(rom: &mut ZiskRom, addr: u64, data: &[u8]) {
     //print!("add_zisk_init_data() addr={}\n", addr);
     let mut o = addr;
@@ -1282,7 +1309,7 @@ pub fn add_zisk_init_data(rom: &mut ZiskRom, addr: u64, data: &[u8]) {
     // Read 64-bit input data chunks and store them in rom
     let nd = data.len() / 8;
     for i in 0..nd {
-        let v = read_u64_le(data, i * 8);
+        let v = u64::from_le_bytes(data[i * 8..i * 8 + 8].try_into().unwrap());
         let mut zib = ZiskInstBuilder::new(rom.next_init_inst_addr);
         zib.src_a("imm", o, false);
         zib.src_b("imm", v, false);
@@ -1299,7 +1326,7 @@ pub fn add_zisk_init_data(rom: &mut ZiskRom, addr: u64, data: &[u8]) {
 
     // Read remaining 32-bit input data chunk, if any, and store them in rom
     if addr + data.len() as u64 - o >= 4 {
-        let v = read_u32_le(data, o as usize);
+        let v = u32::from_le_bytes(data[o as usize..o as usize + 4].try_into().unwrap());
         let mut zib = ZiskInstBuilder::new(rom.next_init_inst_addr);
         zib.src_a("imm", o, false);
         zib.src_b("imm", v as u64, false);
@@ -1316,7 +1343,7 @@ pub fn add_zisk_init_data(rom: &mut ZiskRom, addr: u64, data: &[u8]) {
 
     // Read remaining 16-bit input data chunk, if any, and store them in rom
     if addr + data.len() as u64 - o >= 2 {
-        let v = read_u16_le(data, o as usize);
+        let v = u16::from_le_bytes(data[o as usize..o as usize + 2].try_into().unwrap());
         let mut zib = ZiskInstBuilder::new(rom.next_init_inst_addr);
         zib.src_a("imm", o, false);
         zib.src_b("imm", v as u64, false);
@@ -1354,12 +1381,16 @@ pub fn add_zisk_init_data(rom: &mut ZiskRom, addr: u64, data: &[u8]) {
     }
 }
 
-/// Add the entry/exit jump program section
+/// Add the entry/exit jump program section to the rom instruction set.
 pub fn add_entry_exit_jmp(rom: &mut ZiskRom, addr: u64) {
     //print!("add_entry_exit_jmp() rom.next_init_inst_addr={}\n", rom.next_init_inst_addr);
+
+    // Calculate the trap handler rom pc address as an offset from the current instruction address
+    // to the beginning of the ecall section
     let trap_handler: u64 = rom.next_init_inst_addr + 0x38;
 
-    // :0000
+    // :0000 we note the rom pc address offset from the first address for each instruction
+    // Store the Zisk architecture ID into memory
     let mut zib = ZiskInstBuilder::new(rom.next_init_inst_addr);
     zib.src_a("imm", 0, false);
     zib.src_b("imm", ARCH_ID_ZISK, false);
@@ -1372,6 +1403,7 @@ pub fn add_entry_exit_jmp(rom: &mut ZiskRom, addr: u64) {
     rom.next_init_inst_addr += 4;
 
     // :0004
+    // Store the trap handler address into memory
     let mut zib = ZiskInstBuilder::new(rom.next_init_inst_addr);
     zib.src_a("imm", 0, false);
     zib.src_b("imm", trap_handler, false);
@@ -1384,6 +1416,7 @@ pub fn add_entry_exit_jmp(rom: &mut ZiskRom, addr: u64) {
     rom.next_init_inst_addr += 4;
 
     // :0008
+    // Store the input data address into register #10
     let mut zib = ZiskInstBuilder::new(rom.next_init_inst_addr);
     zib.src_a("imm", 0, false);
     zib.src_b("imm", INPUT_ADDR, false);
@@ -1396,6 +1429,7 @@ pub fn add_entry_exit_jmp(rom: &mut ZiskRom, addr: u64) {
     rom.next_init_inst_addr += 4;
 
     // :000c
+    // Store the output data address into register #11
     let mut zib = ZiskInstBuilder::new(rom.next_init_inst_addr);
     zib.src_a("imm", 0, false);
     zib.src_b("imm", OUTPUT_ADDR, false);
@@ -1408,6 +1442,7 @@ pub fn add_entry_exit_jmp(rom: &mut ZiskRom, addr: u64) {
     rom.next_init_inst_addr += 4;
 
     // :0010
+    // Call to the program rom pc address, i.e. call the program
     let mut zib = ZiskInstBuilder::new(rom.next_init_inst_addr);
     zib.src_a("imm", 0, false);
     zib.src_b("imm", addr, false);
@@ -1435,7 +1470,25 @@ pub fn add_entry_exit_jmp(rom: &mut ZiskRom, addr: u64) {
             end
     */
 
-    // :0014 -> copyb: reg1 = c = b = mem(OUTPUT_ADDR,4), a=0
+    // :0014
+    // Returns from the program execution.
+    // Reads output data using the specific pubout operation
+    // Read the output data length located at first 64 bits of output data,
+    // and then then read output data in chunks of 64 bits:
+    //
+    // loadw: c(reg1) = b(mem=OUTPUT_ADDR), a=0   // TODO: check that Nx4 < OUTPUT_SIZE
+    // copyb: c(reg2)=b=0, a=0
+    // copyb: c(reg3)=b=OUTPUT_ADDR+4, a=0
+    //
+    // eq: if reg2==reg1 jump to end
+    // pubout: c=b.mem(reg3), a = reg2
+    // add: reg3 = reg3 + 4 // Increment memory address
+    // add: reg2 = reg2 + 1, jump -12 // Increment index, goto eq
+    //
+    // end
+    //
+    // Copy output data address into register #1
+    // copyb: reg1 = c = b = mem(OUTPUT_ADDR,4), a=0
     let mut zib = ZiskInstBuilder::new(rom.next_init_inst_addr);
     zib.src_a("imm", OUTPUT_ADDR, false);
     zib.src_b("ind", 0, false);
@@ -1449,6 +1502,7 @@ pub fn add_entry_exit_jmp(rom: &mut ZiskRom, addr: u64) {
     rom.next_init_inst_addr += 4;
 
     // :0018 -> copyb: copyb: c(reg2)=b=0, a=0
+    // Set register #2 to zero
     let mut zib = ZiskInstBuilder::new(rom.next_init_inst_addr);
     zib.src_a("imm", 0, false);
     zib.src_b("imm", 0, false);
@@ -1461,6 +1515,8 @@ pub fn add_entry_exit_jmp(rom: &mut ZiskRom, addr: u64) {
     rom.next_init_inst_addr += 4;
 
     // :001c -> copyb: c(reg3)=b=OUTPUT_ADDR, a=0
+    // Set register #3 to OUTPUT_ADDR + 4, i.e. to the beginning of the actual data after skipping
+    // the data length value
     let mut zib = ZiskInstBuilder::new(rom.next_init_inst_addr);
     zib.src_a("imm", 0, false);
     zib.src_b("imm", OUTPUT_ADDR + 4, false);
@@ -1473,6 +1529,7 @@ pub fn add_entry_exit_jmp(rom: &mut ZiskRom, addr: u64) {
     rom.next_init_inst_addr += 4;
 
     // :0020 -> eq: if reg2==reg1 jump to end
+    // Jump to end if registers #1 and #2 are equal, to break the data copy loop
     let mut zib = ZiskInstBuilder::new(rom.next_init_inst_addr);
     zib.src_a("reg", 1, false);
     zib.src_b("reg", 2, false);
@@ -1485,6 +1542,7 @@ pub fn add_entry_exit_jmp(rom: &mut ZiskRom, addr: u64) {
     rom.next_init_inst_addr += 4;
 
     // :0024 -> copyb: c = b = mem(reg3, 4)
+    // Copy the contents of memory at address set by register #3 into c, i.e. copy output data chunk
     let mut zib = ZiskInstBuilder::new(rom.next_init_inst_addr);
     zib.src_a("reg", 3, false);
     zib.src_b("ind", 0, false);
@@ -1498,6 +1556,7 @@ pub fn add_entry_exit_jmp(rom: &mut ZiskRom, addr: u64) {
     rom.next_init_inst_addr += 4;
 
     // :0028 -> pubout: c = last_c = mem(reg3, 4), a = reg2 = index
+    // Call the special operation pubout with this data, being a the data chunk index
     let mut zib = ZiskInstBuilder::new(rom.next_init_inst_addr);
     zib.src_a("reg", 2, false);
     zib.src_b("lastc", 0, false);
@@ -1510,6 +1569,7 @@ pub fn add_entry_exit_jmp(rom: &mut ZiskRom, addr: u64) {
     rom.next_init_inst_addr += 4;
 
     // :002c -> add: reg3 = reg3 + 4
+    // Increase the register #3, i.e. the data address, in 4 units
     let mut zib = ZiskInstBuilder::new(rom.next_init_inst_addr);
     zib.src_a("reg", 3, false);
     zib.src_b("imm", 4, false);
@@ -1522,6 +1582,8 @@ pub fn add_entry_exit_jmp(rom: &mut ZiskRom, addr: u64) {
     rom.next_init_inst_addr += 4;
 
     // :0030 -> add: reg2 = reg2 + 1, jump -16
+    // Increase the register #2, i.e. the data chunk index, in 1 unit.
+    // Jump to the beginnig of the output data read loop
     let mut zib = ZiskInstBuilder::new(rom.next_init_inst_addr);
     zib.src_a("reg", 2, false);
     zib.src_b("imm", 1, false);
@@ -1534,6 +1596,7 @@ pub fn add_entry_exit_jmp(rom: &mut ZiskRom, addr: u64) {
     rom.next_init_inst_addr += 4;
 
     // :0034 jump to end (success)
+    // Jump to the last instruction (ROM_EXIT) to properly finish the program execution
     let mut zib = ZiskInstBuilder::new(rom.next_init_inst_addr);
     zib.src_a("imm", 0, false);
     zib.src_b("imm", ROM_EXIT, false);
@@ -1545,8 +1608,12 @@ pub fn add_entry_exit_jmp(rom: &mut ZiskRom, addr: u64) {
     rom.insts.insert(rom.next_init_inst_addr, zib);
     rom.next_init_inst_addr += 4;
 
-    // :0038 trap_handle
-    // If register a7==CAUSE_EXIT, end the program
+    // :0038 trap_handle -> This is the address offset we use at the beginning of the function
+    // This code is executed when the program makes an ecall (system call).
+    // The pc is set to this address, and after the system call, it returns to the pc next to the
+    // one that made the ecall
+    // If register a7==CAUSE_EXIT, then execute the next instruction to end the program;
+    // otherwise jump to the one after the next one
     let mut zib = ZiskInstBuilder::new(rom.next_init_inst_addr);
     zib.src_a("reg", 17, false);
     zib.src_b("imm", CAUSE_EXIT, false);
@@ -1569,8 +1636,8 @@ pub fn add_entry_exit_jmp(rom: &mut ZiskRom, addr: u64) {
     rom.insts.insert(rom.next_init_inst_addr, zib);
     rom.next_init_inst_addr += 4;
 
-    // :0040 trap_handle
-    // If register a7==CAUSE_KECCAK, call the keccak opcode and return
+    // :0040
+    // If register a7==CAUSE_KECCAK, then call the keccak opcode and return
     let mut zib = ZiskInstBuilder::new(rom.next_init_inst_addr);
     zib.src_a("reg", 17, false);
     zib.src_b("imm", CAUSE_KECCAK, false);
@@ -1582,6 +1649,7 @@ pub fn add_entry_exit_jmp(rom: &mut ZiskRom, addr: u64) {
     rom.next_init_inst_addr += 4;
 
     // :0044
+    // Call the keccak precompiled opcode
     let mut zib = ZiskInstBuilder::new(rom.next_init_inst_addr);
     zib.src_a("reg", 11, false);
     zib.src_b("imm", 0, false);
@@ -1593,6 +1661,7 @@ pub fn add_entry_exit_jmp(rom: &mut ZiskRom, addr: u64) {
     rom.next_init_inst_addr += 4;
 
     // :0048
+    // Return to the instruction next to the one that made this ecall
     let mut zib = ZiskInstBuilder::new(rom.next_init_inst_addr);
     zib.src_a("imm", 0, false);
     zib.src_b("reg", 1, false);
@@ -1605,6 +1674,8 @@ pub fn add_entry_exit_jmp(rom: &mut ZiskRom, addr: u64) {
     rom.next_init_inst_addr += 4;
 
     // END: all programs should exit here, regardless of the execution result
+    // This is the last instruction to be executed.  The emulator must stop after the instruction
+    // end flag is found to be true
     rom.next_init_inst_addr = ROM_EXIT;
     let mut zib = ZiskInstBuilder::new(rom.next_init_inst_addr);
     zib.src_a("imm", 0, false);
diff --git a/emulator/src/emu_context.rs b/emulator/src/emu_context.rs
index df15954e..811baa6c 100644
--- a/emulator/src/emu_context.rs
+++ b/emulator/src/emu_context.rs
@@ -1,7 +1,5 @@
 use crate::{EmuTrace, Stats};
-use zisk_core::{
-    write_u64_le, InstContext, Mem, INPUT_ADDR, MAX_INPUT_SIZE, RAM_ADDR, RAM_SIZE, ROM_ENTRY,
-};
+use zisk_core::{InstContext, Mem, INPUT_ADDR, MAX_INPUT_SIZE, RAM_ADDR, RAM_SIZE, ROM_ENTRY};
 
 /// ZisK emulator context data container, storing the state of the emulation
 pub struct EmuContext {
@@ -25,7 +23,7 @@ impl EmuContext {
     pub fn new(input: Vec<u8>) -> EmuContext {
         let mut ctx = EmuContext {
             inst_ctx: InstContext {
-                mem: Mem::new(),
+                mem: Mem::default(),
                 a: 0,
                 b: 0,
                 c: 0,
@@ -52,12 +50,9 @@ impl EmuContext {
             panic!("EmuContext::new() input size too big size={}", input.len());
         }
 
-        // Create a new empty vector
-        let mut buffer: Vec<u8> = vec![0; 8];
-        write_u64_le(&mut buffer, 0, input.len() as u64);
-
         // Add the length and input data read sections
-        ctx.inst_ctx.mem.add_read_section(INPUT_ADDR, &buffer);
+        let input_len = input.len() as u64;
+        ctx.inst_ctx.mem.add_read_section(INPUT_ADDR, &input_len.to_le_bytes());
         ctx.inst_ctx.mem.add_read_section(INPUT_ADDR + 8, &input);
 
         // Add the write section
diff --git a/emulator/src/emu_trace.rs b/emulator/src/emu_trace.rs
index 0048a1c4..6713fbde 100644
--- a/emulator/src/emu_trace.rs
+++ b/emulator/src/emu_trace.rs
@@ -1,4 +1,4 @@
-//use crate::MemTrace;
+/// Trace data at the beginning of the program execution: pc, sp, c and step
 #[derive(Default, Debug, Clone)]
 pub struct EmuTraceStart {
     pub pc: u64,
@@ -7,16 +7,35 @@ pub struct EmuTraceStart {
     pub step: u64,
 }
 
+/// Trace data at every step.  Only the values of registers a and b are required.
+/// The current value of pc evolves starting at the start pc value, as we execute the ROM.
+/// The value of c and flag can be obtained by executing the ROM instruction corresponding to the
+/// current value of pc and taking a and b as the input.
 #[derive(Default, Debug, Clone)]
 pub struct EmuTraceStep {
     pub a: u64,
     pub b: u64,
 }
+
+/// Trace data at the end of the program execution, including only the end flag
+/// If the end flag is true, the program executed completely.  This does not mean that the
+/// program ended successfully; it could have found an error condition due to, for example, invalid
+/// input data, and then jump directly to the end of the ROM.  In this error situation, the output
+/// data should reveal the success or fail of the completed execution.  This table shows the
+/// possible combinations:
+///
+/// - end = false  --> program did not complete, e.g. the emulator run out of steps (you can
+///   configure more steps)
+/// - end = true --> program completed
+///     - output data correct --> program completed successfully
+///     - output data incorrect --> program completed with an error
 #[derive(Default, Debug, Clone)]
 pub struct EmuTraceEnd {
     pub end: bool,
 }
 
+/// Trace data of a complete program execution (start, steps, and end) or of a segment of a program
+/// execution (also includes last_state)
 #[derive(Default, Debug, Clone)]
 pub struct EmuTrace {
     pub start_state: EmuTraceStart,
diff --git a/emulator/src/emulator.rs b/emulator/src/emulator.rs
index d7996937..aa952c1f 100644
--- a/emulator/src/emulator.rs
+++ b/emulator/src/emulator.rs
@@ -92,7 +92,7 @@ impl ZiskEmulator {
         }
 
         // TODO: load from file
-        let rom: ZiskRom = ZiskRom::new();
+        let rom: ZiskRom = ZiskRom::default();
         Self::process_rom(&rom, inputs, options, callback)
     }
 
diff --git a/riscv/src/riscv_inst.rs b/riscv/src/riscv_inst.rs
index ef48eefb..daa949ec 100644
--- a/riscv/src/riscv_inst.rs
+++ b/riscv/src/riscv_inst.rs
@@ -10,7 +10,7 @@
 ///     J-type: unconditional jumps, a variation of U-type
 ///
 /// RV32I instruction formats showing immediate variants:
-///
+/// ```ignore
 ///  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00
 /// |      funct7        |      rs2     |      rs1     | funct3 |      rd      |       opcode       | R-type
 /// |               imm[11:0]           |      rs1     | funct3 |      rd      |       opcode       | I-type
@@ -18,7 +18,7 @@
 /// |12|    imm[10:5]    |      rs2     |      rs1     | funct3 |imm[4:1]   |11|       opcode       | B-type
 /// |                         imm[31:12]                        |      rd      |       opcode       | U-type
 /// |20|           imm[10:1]         |11|      imm[19:12]       |      rd      |       opcode       | J-type
-///
+/// ```
 /// RV32I has x0 register hardwired to constant 0, plus x1-x31 general purpose registers.
 /// All registers are 32 bits wide but in RV64I they become 64 bits wide.
 /// RV32I is a load-store architecture. This means that only load and store instructions access
@@ -29,7 +29,7 @@
 /// need for RV128I is debatable and its specification is evolving. We also have RV32E for embedded
 /// systems. RV32E has only 16 32-bit registers and makes the counters of RV32I optional.
 ///
-/// See https://devopedia.org/risc-v-instruction-sets
+/// See <https://devopedia.org/risc-v-instruction-sets>
 pub struct RiscvInstruction {
     pub rvinst: u32,
     pub t: String,