CS107e libextra files

#pragma once
/*
 * File: dma.h
 *
 * Description: Functions to use DMA transfer to copy data between memory and peripherals.
 *
 * Author: Daniel James <drjames@stanford.edu>
 * Author: Julie Zelenski <zelenski@cs.stanford.edu>
 */

#include <stdint.h>
#include <stdbool.h>

/*
 * Type: `dma_width_t`
 *
 * Enumerated type to specify width of the data unit moving across bux.
 */
typedef enum {
    DMA_BITWIDTH_8 = 0,
    DMA_BITWIDTH_16,
    DMA_BITWIDTH_32,
    DMA_BITWIDTH_64,
} dma_width_t;

/*
 * Type: `dma_addr_mode_t`
 *
 * Enumerated type to specify whether address increases after each data unit
 * or is fixed (e.g. hardware fifo address).
 */
typedef enum {
    DMA_ADDR_MODE_LINEAR = 0,   // address increases linearly (memory)
    DMA_ADDR_MODE_IO = 1,       // address fixed (fifo)
} dma_addr_mode_t;

/*
 * Type: `dma_drq_type_t`
 *
 * Enumerated type to specify port type of the endpoint.
 */
typedef enum {
    DRQ_TYPE_DRAM = 1,  // values from table pg 217-8
    DRQ_TYPE_I2S2 = 5,
    DRQ_TYPE_SPI1 = 23,
    DRQ_TYPE_TWI0 = 43,
} dma_drq_type_t;

/*
 * Type: `dma_endpoint_t`
 *
 * Struct containing endpoint addr, type, and mode.
 */
typedef struct {
    uintptr_t addr;
    dma_drq_type_t type;
    dma_addr_mode_t mode;
} dma_endpoint_t;

/*
 * Type: `dma_transfer_id_t`
 *
 * Struct containing info about an in-progress transfer.
 */
typedef struct {
    int channel;
    int generation;
} dma_transfer_id_t;


/*
 * `dma_init` : Required initialization for DMA module
 *
 * Initializes the DMA module, start all clocks.
 * Call `dma_init` once
 */
void dma_init(void);

/*
 * `dma_create_endpoint` : fill in endpoint struct
 *
 * Fill in endpoint struct with the desired type and addr. The endpoint mode
 * is set automatically depending on the type (linear for memory, I/O for peripherals)
 */
dma_endpoint_t dma_create_endpoint(dma_drq_type_t type, const volatile void *addr);

/*
 * `dma_transfer` : Start a new dma transfer
 *
 * Initiates a transfer of `nbytes` from `src` endpoint to `dst` endpoint.
 * The `datawidth` parameter specifies the number of bytes that can be moved across
 * the bus in one data unit. The function starts the transfer and returns immediately,
 * the transfer will progress asynchronously. The return value is a `dma_transfer_id_t`
 * that can be used to later check on the status of the transfer .
 */
dma_transfer_id_t dma_transfer(dma_endpoint_t src, dma_endpoint_t dst, dma_width_t datawidth, int nbytes);

/*
 * `dma_transfer_completed` : Check if transfer has finished
 *
 * Call this function to check on the status of a transaction previously initiated by
 * `dma_transfer`. The function immediately returns either `true` (this transfer has finished,
 * all data has been transferred) or `false` (transfer is still in process of transferring data).
 */
bool dma_transfer_completed(dma_transfer_id_t id);

/*
 * File: dma.c
 *
 * DMA controller
 *
 * Author: Daniel James <drjames@stanford.edu>
 * Author: Julie Zelenski <zelenski@cs.stanford.edu>
 *
 * Last updated: Curis summer 2025
 */

#include "dma.h"
#include "assert.h"
#include "ccu.h"

typedef struct {
    struct {
        uint32_t src_drq_type   : 6;
        uint32_t src_burst_cnt  : 2; // note: erroneously called block size in the manual
        uint32_t src_addr_mode  : 1;
        uint32_t src_data_width : 2;
        uint32_t                : 5;
        uint32_t dst_drq_type   : 6;
        uint32_t dst_burst_cnt  : 2; // note: erroneously called block size in the manual
        uint32_t dst_addr_mode  : 1;
        uint32_t dst_data_width : 2;
        uint32_t                : 3;
        uint32_t bmode_sel      : 1;
        uint32_t                : 1;
    } config;
    uint32_t src;
    uint32_t dst;
    uint32_t byte_cnt;
    struct {
        uint32_t wait_clk_cycles : 8;
        uint32_t                 : 8;
        uint32_t src_high_bits   : 2;
        uint32_t dst_high_bits   : 2;
        uint32_t                 :12;
    } param;
    uint32_t link;
} dma_desc_t;

#define LINK_END_SENTINEL 0xFFFFF800

typedef enum {
    DMA_CHANNEL_NONE = -1,
    DMA_N_CHANNELS = 16,
} dma_channel_id_t;

typedef enum {
    // The manual claims that bit 0 corresponds to burst count of 1 but some of
    // our observations seems to suggest it may actually be 2 (but honestly, we are
    // not really sure).  When in normal (e.g. not bmode), the burst
    // setting seems to be ignored, so may not matter what it is set to.
    DMA_BURST_CNT_2 = 0,
    DMA_BURST_CNT_4,
    DMA_BURST_CNT_8,
    DMA_BURST_CNT_16,
} dma_burst_cnt_t;

typedef enum {
    DMA_NORMAL_MODE = 0,
    DMA_BMODE,
} dma_bmode_t;

typedef struct {
    uint32_t irq_enable[2];
    uint32_t _resa[2];
    uint32_t irq_pending_status[2];
    uint32_t _resb[4];
    struct {
        uint32_t chan_circuit   : 1;
        uint32_t common_circuit : 1;
        uint32_t mclk_circuit   : 1; // Important to set this on init
        uint32_t                :29;
    } auto_gating;
    uint32_t _resc;
    const struct {
        uint32_t dma_status     :16;
        uint32_t                :15;
        uint32_t mbus_status    : 1;
    } status;
    uint8_t _resd[204];
    struct {
        uint32_t enable;
        uint32_t pause;
        uint32_t desc_addr;
        const uint32_t config;    // these read-only values are copied from descriptor
        const uint32_t src_addr;
        const uint32_t dst_addr;
        const uint32_t byte_cnt_left;
        const uint32_t params;
        const uint32_t _rese[2];
        uint32_t mode;          // we are using default waiting mode (do we need to support handshake?)
        const uint32_t former_desc_addr;
        const uint32_t pkg_cnt;
        const uint32_t _resf[3];
    } channels[DMA_N_CHANNELS];
} dmac_t;

#define DMAC_BASE ((dmac_t *)0x03002000)
_Static_assert(sizeof(dma_desc_t)  == 6*sizeof(uint32_t), "dma descriptor must be size 6 32-bit words");
_Static_assert(&(DMAC_BASE->status)      == (void *)0x03002030, "dma status reg must be at 0x03002030");
_Static_assert(&(DMAC_BASE->channels[1]) == (void *)0x03002140, "dma channel 1 must start at 0x03002140");

static struct {
    volatile dmac_t * dmac; // non-NULL after init()
    dma_desc_t descriptors[DMA_N_CHANNELS];

    // each time a DMA transfer completes we increment the generation number.
    // this lets us tell if a transfer is complete even if we later reuse the
    // channel for a new transfer.
    int generation[DMA_N_CHANNELS];
} module;

static dma_addr_mode_t mode_for_drq(dma_drq_type_t drq) {
    switch (drq) {
        case DRQ_TYPE_TWI0:
        case DRQ_TYPE_I2S2:
        case DRQ_TYPE_SPI1:
            return DMA_ADDR_MODE_IO;
        case DRQ_TYPE_DRAM:
            return DMA_ADDR_MODE_LINEAR;
    }
    error("Invalid DRQ type");
}

// future TODO: expose link structure for endpoint and/or allow circular link?
dma_endpoint_t dma_create_endpoint(dma_drq_type_t type, const volatile void *addr) {
    dma_addr_mode_t mode = mode_for_drq(type);
    dma_endpoint_t end = { .type = type, .mode = mode, .addr = (uintptr_t)addr };
    return end;
}

void dma_init(void) {
    // The address of the descriptors cannot be more than 2^34 since we only
    // have 32 bits + 2 zero bits to store the address.
    assert((uintptr_t)(&module.descriptors[DMA_N_CHANNELS - 1]) <= ((uintptr_t)1 << 34));

    // Enable DMA gate on MBUS clock. It is already ungated if program started
    // from FEL, but not if proram started from SD card, so we do it here to be sure.
    // It is safe to ungate again if already ungated.
    ccu_ungate_mbus_bits(0b1);

    // Enable the DMA clock. See manual page 225.
    ccu_ungate_bus_clock(CCU_DMA_BGR_REG);

    module.dmac = DMAC_BASE;
    // Disable auto-gatin, but perhaps we should not? See manual page 243.
    module.dmac->auto_gating.mclk_circuit = 1;
}

static bool channel_is_idle(dma_channel_id_t ch) {
    return (module.dmac->status.dma_status & (1 << ch)) == 0;
}

static dma_channel_id_t find_open_channel(void) {
    for (int i = 0; i < DMA_N_CHANNELS; i++) {
        if (channel_is_idle(i)) return i;
    }
    return DMA_CHANNEL_NONE;
}

static uint32_t swizzle_desc_ptr(dma_desc_t *desc) {
    uintptr_t p = (uintptr_t)desc;
    uint32_t lo = p & ~0b11;
    uint32_t hi = (p >> 32) & 0b11;
    return lo | hi;
}

dma_transfer_id_t dma_transfer(dma_endpoint_t src, dma_endpoint_t dst, dma_width_t w, int n) {
    if (!module.dmac) error("dma_init() has not been called!\n");
    assert(w == DMA_BITWIDTH_8 || w == DMA_BITWIDTH_16 || w == DMA_BITWIDTH_32 || w == DMA_BITWIDTH_64);

    assert(n > 0);

    dma_channel_id_t ch = find_open_channel();
    assert(ch != DMA_CHANNEL_NONE);

    // increment generation number now so if someone who used this channel
    // for a previous transfer checks back they will know that it is complete.
    module.generation[ch]++;

    dma_desc_t *desc = &module.descriptors[ch];

    // about burst count: the manual refers to burst count as block size and 
    // claims that it is refers to number of bits. Observed behavior and
    // read linux driver code suggests instead that burst val is the number of
    // transfers done in a single burst when interacting with an IO device.
    // (burst val seems to be ignored/no change when not in bmode?)
    // The unit for burst is not in bits/bytes, appears to be count of data
    // transfers moved across the bus in one burst. (i.e. number of transfers
    // per memory bus arbitration)
    // The performance benefit to increasing burst count seems to only
    // apply if in bmode and IO device. We are currently not using bmode
    // due to its buggy alignment behavior and use default value for burst cnt.
    desc->config.src_burst_cnt = DMA_BURST_CNT_2;

    desc->config.src_data_width = w;
    desc->config.src_drq_type = src.type;
    desc->config.src_addr_mode = src.mode;

    desc->config.dst_burst_cnt = DMA_BURST_CNT_2;

    desc->config.dst_data_width = w;
    desc->config.dst_drq_type = dst.type;
    desc->config.dst_addr_mode = dst.mode;

    // A note about BMODE. We currently do not fully understand what "bmode"
    // (theorized to be burst mode) does. It seems to have some relationship
    // to data-width and block-size (probably more accurately called burst
    // count). One thing we do know is that it is only allowed to be enabled
    // when one endpoint is an IO device and other is memory.  Bmode
    // behaves incorrectly when attempting to auto-align the memory address.
    // Although forcing memory pointers to align on 32-byte boundary seems to
    // avoid the alignment bugs of bmode, it isn't clear that we need to use
    // bmode at all, and can just use normal mode in all cases (which seems to
    // work without issues). The linux driver also does not use bmode.
    desc->config.bmode_sel = DMA_NORMAL_MODE;

    assert(src.addr < ((uintptr_t)1 << 32));
    desc->src = src.addr; // assignment truncates to lower 32-bits
    desc->param.src_high_bits = 0;

    assert(dst.addr < ((uintptr_t)1 << 32));
    desc->dst = dst.addr; // assignment truncates to lower 32-bits
    desc->param.dst_high_bits = 0;

    desc->byte_cnt = n;
    desc->link = LINK_END_SENTINEL; // no linked packets

    // Each wait clock cycle here adds 31.38 ps of delay. This triggers in
    // between every DMA transmission so to find the number of times it triggers
    // you have to divide your data size by block size * data width. This also
    // only triggers when there is an IO device involved. So memory to memory
    // transmissions are (as usual) unaffected. Also important to remember that
    // if something else is limiting your speed (e.g. the SPI clock) you may
    // not observe any difference from changing this.
    desc->param.wait_clk_cycles = 0;
    
    module.dmac->channels[ch].desc_addr = swizzle_desc_ptr(desc);

    // Start the transaction
    module.dmac->channels[ch].enable = 1;

    return (dma_transfer_id_t) { 
        .generation = module.generation[ch],
        .channel = ch,
    };
}

bool dma_transfer_completed(dma_transfer_id_t id) {
    if (!module.dmac) error("dma_init() has not been called!\n");
    assert(id.channel >= 0 && id.channel <= DMA_N_CHANNELS);

    return id.generation < module.generation[id.channel] ||
        channel_is_idle(id.channel);
}

#pragma once
/*
 * File: i2c.h
 *
 * Description: Module to support communication with devices
 * over I2C bus.
 *
 * Author: Elias Chikwanda <eliaschi@stanford.edu>
 * Author: Joe Robertazzi  <tazzi@stanford.edu>
 */

#include <stdbool.h>
#include <stdint.h>

/*
 * Type: `i2c_clck_freq_t`
 *
 * `i2c_clk_freq_t` is an enumerated type used to refer to the clock frequency
 * of the I2C module. Values can be I2C_25KHZ`, `I2C_100KHZ`, or `I2C_400KHZ.
 * Speed is the same for all devices on shared bus.
 */
typedef enum {
    I2C_100KHZ = 0,
    I2C_400KHZ = 1,
    I2C_20KHZ =  2,
} i2c_clk_freq_t;

/*
 * Type: `i2c_reg_size_t`
 *
 * `i2c_reg_size_t` is an enumerated type used to refer to the size of
 * the register for an I2C device. Values can be `I2C_REG_8BIT` or `I2C_REG_16BIT`.
 * This setting is per-device.
 */
typedef enum {
    I2C_REG_8BIT =  1,
    I2C_REG_16BIT = 2,
} i2c_reg_size_t;

/*
 * Type: `i2c_transition_t`
 *
 * `i2c_transition_t` is an enumerated type that refers to how the device should
 * transition from write to read in a transaction that read from a register.
 * Values can be `I2C_REPEATED_START` or `I2C_STOP_START`.
 * This setting is per-device.
 */
typedef enum {
    I2C_REPEATED_START =  0,
    I2C_STOP_START = 1,
} i2c_transition_t;

#define I2C_NO_DELAY 0

/*
 * `i2c_device_t`
 *
 * This typedef gives a nickname to the struct that will be used to represent a
 * single I2C device. The internal details of the struct will be given in the file
 * i2c.c; those details are private to the implementation and are not shared in the
 * public interface. Clients of the I2C module are not privy to the details of
 * `i2c_device_t`, nor should they be. A client simply holds on to the pointer returned
 * by `i2c_new` and sends that pointer to any of the functions below reading from and
 * writing to the device over the I2C protocol.
 */
typedef struct i2c_device i2c_device_t;

/*
 * `i2c_init` : Required initialization for I2C module
 *
 * Initializes the I2C module. Must run before communicating with any I2C
 * devices. Valid clock rates are `I2C_20KHZ`, `I2C_100KHZ`, and `I2C_400KHZ`.
 * The pins `GPIO_PG12` and `GPIO_PG13` are re-configured for use as I2C SCL and SDA.
 *
 * @param rate      desired clock rate of the I2C module (see options above)
 *
 * Only need to call `i2c_init` once -- subsequent calls reinitialize the module.
 */
void i2c_init(i2c_clk_freq_t rate);

/*
 * `i2c_new` : Create a new I2C device
 *
 * Creates a new I2C device for the given I2C address/id. Must run before attempting to read/write
 * from the device. `i2c_new` will establish communication with device at address `addr` on the bus,
 * and return a pointer to a struct containing that device's data if successful or return `NULL`
 * if the communication fails. Can alternatively be used as a scan of bus if check every possible
 * address and those that return valid pointers are valid devices.
 *
 * @param addr      address of the target I2C device
 */
i2c_device_t* i2c_new(uint8_t addr);

/*
 * `i2c_config_device_settings` : Optional configuration of I2C device settings
 *
 * Default settings for a new device are `I2C_REG_8BIT`, `I2C_REPEATED_START`, and `I2C_NO_DELAY`.
 * Call this function if needed to change settings to match requirements of your device.
 *
 * Note: `trans_delay_us` MUST be `0` if `transition` is `I2C_REPEATED_START` as there is no delay between
 * repeated starts. An assert is raised if requested configuration is invalid.
 *
 * @param dev               pointer to target I2C device
 * @param reg_size          size of registers on this device (either `I2C_REG_8BIT` or `I2C_REG_16BIT`)
 * @param transition        how to handle transition (either `I2C_REPEATED_START` or `I2C_STOP_START`)
 * @param trans_delay_us    if transition stop-start, number of microseconds to insert stop-delay-start (`0` if none)
 */
void i2c_config_device_settings(i2c_device_t* dev, i2c_reg_size_t reg_size, i2c_transition_t transition, int trans_delay_us);

/*
 * `i2c_write_reg_n` : Multi-byte register write for I2C device
 *
 * Writes `n` bytes to register `reg` for target I2C device. Returns `true` if the bytes
 * were successfully written or `false` if the write failed.
 *
 * @param dev       pointer to target I2C device
 * @param reg       register where data bytes will be written
 * @param bytes     array of data bytes to write
 * @param n         number of bytes to write
 */
bool i2c_write_reg_n(i2c_device_t* dev, uint16_t reg, const uint8_t* bytes, int n);

/*
 * `i2c_write_reg` : Single byte register write for I2C device
 *
 * Writes one byte to register `reg` for target I2C device. Returns `true` if the byte
 * was successfully written or `false` if the write failed.
 *
 * @param dev       pointer to target I2C device
 * @param reg       register where data byte will be written
 * @param val       data byte to write
 */
bool i2c_write_reg(i2c_device_t* dev, uint16_t reg, uint8_t val);

/*
 * `i2c_read_reg_n` : Multi-byte register read for I2C device
 *
 * Reads `n` bytes from register `reg` for target I2C device. Returns `true` if the bytes
 * were successfully read or `false` if the read failed.
 *
 * @param dev       pointer to target I2C device
 * @param reg       register where data bytes will be read
 * @param bytes     array to store the data bytes that are read
 * @param n         number of bytes to read
 */
bool i2c_read_reg_n(i2c_device_t* dev, uint16_t reg, uint8_t* bytes, int n);

/*
 * `i2c_read_reg` : Single byte register read for I2C device
 *
 * Reads one byte from register `reg` for target I2C device. Returns the byte read
 * from the register if read succeeded, and `-1` if the read failed.
 *
 * @param dev       pointer to target I2C device
 * @param reg       register where data byte will be read
 */
int i2c_read_reg(i2c_device_t* dev, uint16_t reg);

/*
 * `i2c_block_write` : Multi-byte write for I2C device
 *
 * Writes `n` bytes to the target I2C device. Returns `true` if the bytes
 * were successfully written or `false` if the write failed.
 *
 * @param dev       pointer to target I2C device
 * @param bytes     array of data bytes to write
 * @param n         number of bytes to write
 */
bool i2c_block_write(i2c_device_t* dev, const uint8_t* bytes, int n);

/*
 * `i2c_block_read` : Multi-byte read for I2C device
 *
 * Reads `n` bytes from the target I2C device. Returns `true` if the bytes
 * were successfully read or `false` if the read failed.
 *
 * @param dev       pointer to target I2C device
 * @param bytes     array to store the data bytes that are read
 * @param n         number of bytes to read
 */
bool i2c_block_read(i2c_device_t* dev, uint8_t* bytes, int n);

/*
 * File: i2c.c
 *
 * Description: Contains API support for I2C devices on the MangoPi. Allows users
 * to initialize an I2C module, initialize multiple I2C devices, and read/write to
 * said devices by registers or direct access. Check i2c.h for more specification 
 * on the `i2c_reg_size_t` and `i2c_clk_freq_t` types.
 *
 * Author: Elias Chikwanda <eliaschi@stanford.edu>
 * Author: Joe Robertazzi  <tazzi@stanford.edu>
 *
 * Last updated: Curis summer 2025
 */

#include "i2c.h"
#include "twi_driver.h"
#include "assert.h"
#include "malloc.h"
#include "strings.h"
#include "timer.h"

#define SENTINEL 0x7e  // For debugging

struct i2c_device {
    uint8_t addr;
    i2c_reg_size_t reg_size;  // Valid values are `I2C_REG_8BIT` & `I2C_REG_16BIT`
    i2c_transition_t transition; // Valid values are `I2C_REPEATED_START` & `I2C_STOP_START`
    int trans_delay_us; // If transition is I2C_STOP_START and requires delay
};

// Initialize I2C module with clockspeed `rate`
void i2c_init(i2c_clk_freq_t rate) { 
    twi_init(rate); 
}

// Create I2C device with address `addr` and default device settings.
i2c_device_t* i2c_new(uint8_t addr) {
    i2c_device_t *dev = malloc(sizeof(*dev));
    dev->addr = addr;
    dev->reg_size = I2C_REG_8BIT;  // default config
    dev->transition = I2C_REPEATED_START;
    dev->trans_delay_us = I2C_NO_DELAY;

    // Try to communicate with device at addr, return NULL on failure
    if (!twi_do_transaction(dev->addr, NULL, 0, NULL, 0)) {
        free(dev);
        return NULL;
    }
    return dev;
}

// Allows the user to change I2C device register size, transition state, and transition delay in microseconds
// Note: Invalid to have a non-zero delay if I2C_REPEATED_START transition
void i2c_config_device_settings(i2c_device_t* dev, i2c_reg_size_t reg_size, i2c_transition_t transition, int trans_delay_us) {
    assert(dev);
    dev->reg_size = reg_size;
    dev->transition = transition;
    assert(!(transition == I2C_REPEATED_START && trans_delay_us != 0));   // If repeated start, there CANNOT be any transition delay (acts as a single continuous operation)
    dev->trans_delay_us = trans_delay_us;
}

bool i2c_write_reg(i2c_device_t* dev, uint16_t reg, uint8_t val) {
    assert(dev);
    uint8_t buf[1] = { val };
    return i2c_write_reg_n(dev, reg, buf, sizeof(buf));
}

bool i2c_write_reg_n(i2c_device_t* dev, uint16_t reg, const uint8_t* input_buffer, int n) {
    assert(dev);
    uint8_t buf[dev->reg_size + n];

    /*
    * Big Endian assumed -- if register is 16-bits wide, the most significant byte is assumed
    * to be the lower byte (i.e. the first byte in the register sequence) while the least 
    * significant byte is assumed to be the upper byte (the second byte in the register sequence).
    * 
    * Ex: If the 16 bit register address is 0x1234, buffer sent is broken into [0x12] [0x34] [data]
    * --> Little Endian would be swapped ([0x34] [0x12] [data])
    */
    if (dev->reg_size == I2C_REG_16BIT) buf[0] = (reg >> 8) & 0xff;
    buf[dev->reg_size - 1] = reg & 0xff;

    memcpy(buf + dev->reg_size, input_buffer, n);
    return twi_do_transaction(dev->addr, buf, sizeof(buf), NULL, 0);
}

int i2c_read_reg(i2c_device_t* dev, uint16_t reg) {
    assert(dev);
    uint8_t buf[1];
    
    // Return value if write succeeds, otherwise return -1 on failure
    if (i2c_read_reg_n(dev, reg, buf, 1)) return buf[0];
    else return -1;
}

bool i2c_read_reg_n(i2c_device_t* dev, uint16_t reg, uint8_t* output_buffer, int n) {
    assert(dev);
    memset(output_buffer, SENTINEL, n);
    uint8_t buf[dev->reg_size];

    // Refer to explanation above regarding how the registers are formatted
    if (dev->reg_size == I2C_REG_16BIT) buf[0] = (reg >> 8) & 0xff;
    buf[dev->reg_size - 1] = reg & 0xff;

    // Write to I2C device to specify which register to read from
    if (dev->transition == I2C_STOP_START) {
        if (!twi_do_transaction(dev->addr, buf, sizeof(buf), NULL, 0)) return false;
        if (dev->trans_delay_us > 0) timer_delay_us(dev->trans_delay_us);
        return twi_do_transaction(dev->addr, NULL, 0, output_buffer, n);
    } else {
        return twi_do_transaction(dev->addr, buf, sizeof(buf), output_buffer, n);
    }
}

// Wrapper for HAL block write
bool i2c_block_write(i2c_device_t* dev, const uint8_t* bytes, int n) {
    return twi_do_transaction(dev->addr, bytes, n, NULL, 0);
}

// Wrapper for HAL block read
bool i2c_block_read(i2c_device_t* dev, uint8_t* bytes, int n) {
    return twi_do_transaction(dev->addr, NULL, 0, bytes, n);
}

#pragma once
/*
 * File: twi_driver.h
 *
 * Description: HAL module for the TWI (I2C) driver.
 * Clients not expected to directly use twi_driver, instead use the i2c module
 * that layers on top.
 *
 * Author: Elias Chikwanda <eliaschi@stanford.edu>
 * Author: Joe Robertazzi <tazzi@stanford.edu>
 */

#include <stdbool.h>
#include <stdint.h>
#include "i2c.h"

/*
 * `twi_init` : Required initialization for the TWI driver
 *
 * Initializes the TWI module and configures it to run at the clock rate `rate`. `twi_init`
 * should be called once to init module, a subsequent call will re-initialize. Supported
 * clock rates are I2C_20KHZ, I2C_100KHZ, and I2C_400KHZ.
 *
 * @param rate   the rate/clock frequency of the TWI module
 */
void twi_init(int rate);

/*
 * `twi_do_transaction` : process single TWI transaction
 *
 * Does one transaction of the form:
 *    <start>
 *       <addr+W><bytes_w><repeated start>
 *       <addr+R><bytes_r>
 *   <stop>
 * If no bytes to write, write portion is elided
 * If no bytes to read, read portion is elided
 *
 * Returns `true` if transaction successfully completed or `false`
 * on failure.
 *
 * @param dev_id        address of the TWI device
 * @param bytes_w       array of bytes to write
 * @param num_w         number of bytes to write
 * @param bytes_r       array to store bytes that are read
 * @param num_r         number of bytes to read
 */
bool twi_do_transaction(uint8_t dev_id, const uint8_t* bytes_w, int num_w, uint8_t* bytes_r, int num_r);

/*
 * File: twi_driver.c
 *
 * Description: Contains backend HAL for TWI driver. Allows for the initialization
 * of the TWI driver and essential block read/write features to interact with 
 * TWI/I2C based devices. This is the layer under the generalized API in i2c.c
 *
 * Author: Julie Zelenski  <zelenski@cs.stanford.edu>
 * Author: Elias Chikwanda <eliaschi@stanford.edu>
 * Author: Joe Robertazzi  <tazzi@stanford.edu>
 *
 * Last updated: Curis summer 2025
 */

#include "twi_driver.h"
#include "assert.h"
#include "ccu.h"
#include "gpio.h"
#include "malloc.h"
#include "strings.h"
#include "timer.h"
#include "i2c.h"
#include <stddef.h>


/*
 * IMPORTANT: bitfields & hardware registers
 * -----------------------------------------
 * TL;DR  Be sure to compile with gcc flag -fstrict-volatile-bitfields
 *
 * This flag tells gcc to generate 32-bit load/store instructions (i.e. lw/sw)
 * to access volatile bitfields. Without flag, gcc can generate 8 or 16-bit
 * instructions (i.e. sh or lb) that access subword. Subword access appears to
 * interact badly with twi hardware registers. This did not appear to be documented
 * anywhere; I only found out the hard way when observing garbled bits and lost
 * updates. I think it best to assume it is needed for all volatile bitfields
 * (i.e. any bitfield within peripheral registers)
*/

typedef union {
    struct {
        uint32_t padding[0x200 / sizeof(uint32_t)];
        struct { // TWI driver fields
            struct {
                uint32_t en                : 1; // driver enable
                uint32_t soft_reset        : 1;
                uint32_t                   : 6;
                uint32_t timeout_n         : 8;
                const uint32_t sta         : 8; // codes from FSM p.867, list p.875
                const uint32_t read_tran_error : 4;
                uint32_t read_tran_mode    : 1;
                uint32_t restart_mode      : 1;
                uint32_t                   : 1;
                uint32_t start_tran        : 1; // auto cleared
            } ctrl;
            struct {
                uint32_t
                    packet_interval        :16; // time between packets (in cycles?)
                uint32_t packet_cnt        :16;
            } cfg;
            struct {
                uint32_t id_x              : 8;
                uint32_t cmd               : 1; // cmd is read or write
                uint32_t id                : 7;
                uint32_t                   :16;
            } slv;
            struct {
                uint32_t n_data_bytes      :16;
                uint32_t n_addr_bytes      : 8;
                uint32_t                   : 8;
            } fmt;
            struct {
                uint32_t                   : 6;
                const uint32_t sta_sda     : 1;
                const uint32_t sta_scl     : 1;
                uint32_t clk_M             : 4; // output = (F0/(clkM+1)) /10
                uint32_t clk_N             : 3; // sampling 24Mhz/2^clkN
                uint32_t clk_duty          : 1;
                uint32_t clk_count_mode    : 1;
                uint32_t                   :15;
            } bus_ctrl;
            struct {
                uint32_t transmit_complete : 1;
                uint32_t transmit_error    : 1;
                uint32_t txfifo_req        : 1;
                uint32_t rxfifo_req        : 1;
                uint32_t                   :12;
                uint32_t transmit_complete_int_en : 1;
                uint32_t transmit_error_int_en    : 1;
                uint32_t txfifo_req_int_en  : 1;
                uint32_t rxfifo_req_int_en  : 1;
                uint32_t                    :12;
            } int_ctrl;
            uint32_t dma_cfg; // not used
            struct {
                const uint32_t txfifo_cnt   : 6;
                uint32_t txfifo_clear       : 1;
                uint32_t                    : 9;
                const uint32_t rxfifo_cnt   : 6;
                uint32_t rxfifo_clear       : 1;
                uint32_t                    : 9;
            } fcr; // fifo control
            uint32_t padding[56];
            struct {
                uint32_t d8                 : 8;
                uint32_t                    : 24;
            } txfifo;
            struct {
                uint32_t d8                 : 8;
                uint32_t                    : 24;
            } rxfifo;
        } driver;
    } regs;
    uint32_t padding[0x100];
} twi_t;

// NEVER remove: Static asserts to ensure bit field layout is correct
#define TWI_BASE ((twi_t *)0x02502000)
_Static_assert(&(TWI_BASE[0].regs.driver.ctrl)   == (void *)0x02502200, "TWI0 driver ctrl reg must be at address 0x02502200");
_Static_assert(&(TWI_BASE[0].regs.driver.fcr)    == (void *)0x0250221c, "TWI0 driver fifo control reg must be at address 0x0250221c");
_Static_assert(&(TWI_BASE[0].regs.driver.rxfifo) == (void *)0x02502304, "TWI0 driver rx fifo reg must be at address 0x02502304");

/*
 * Type: `twi_timing_t`
 *
 * `twi_timing_t` is a struct type used to refer to the M and N values associated
 * with setting the clock speed of the TWI (I2C) module. 
 */
typedef struct {
    int n;
    int m;
} twi_timing_t;

/*
 * Type: `twi_timing_entry_t`
 *
 * `twi_timing_entry_t` is a struct type used to store the frequency of the TWI (I2C) clock
 * and the timing configuration (in `twi_timing_t`) of the clock.
 */
typedef struct {
    int frequency;
    twi_timing_t timing;
} twi_timing_entry_t;

// TWI module creation
static struct {
    volatile twi_t *const twi_base, *twi;
    const gpio_id_t sda, scl;
} module = {
    .twi_base = &TWI_BASE[0], // TWI0
    .sda = GPIO_PG13,
    .scl = GPIO_PG12,
    .twi = NULL,
};

enum {
    CMD_WRITE = 0,
    CMD_READ = 1,
    TRANSITION_REPEATED_START = 0,
    TRANSITION_STOP_START = 1,
    YES_W_BEFORE_READ = 0,
    NO_W_BEFORE_READ = 1,
};

/*
 * The TWI serial clock line output frequency
 * Fscl = Fin/(2^N * (M + 1) * 10), Fin = 24 MHz (APB clock input)
 */
const twi_timing_entry_t i2c_timing_table[] = {
                // N , M
    {I2C_100KHZ, { 1 , 11 }},
    {I2C_20KHZ,  { 3 , 14 }},
    {I2C_400KHZ, { 1 ,  2 }}
};

// Chooses correct CLK_M and CLK_N values matching the passed in rate
static bool select_timing_settings(int frequency, twi_timing_t *timing) {
    for (int i = 0; i  < sizeof(i2c_timing_table) / sizeof(i2c_timing_table[0]); i++) {
        if (i2c_timing_table[i].frequency == frequency) {
            *timing = i2c_timing_table[i].timing;
            return true;
        }
    }
    return false;
}

#if 0
#include "printf.h"
static void debug_status(const char *label) {
    printf("\t[%02x] %s%s%s%s%s (%s)\n", module.twi->regs.driver.ctrl.sta,
        !module.twi->regs.driver.int_ctrl.txfifo_req && module.twi->regs.driver.fcr.txfifo_cnt? " txfifo_nonempty" : "",
        module.twi->regs.driver.int_ctrl.rxfifo_req && module.twi->regs.driver.fcr.rxfifo_cnt? " rxfifo_nonempty" : "",
        module.twi->regs.driver.int_ctrl.transmit_error ? " TRANSMIT_ERROR" : "",
        module.twi->regs.driver.int_ctrl.transmit_complete ? " transmit_complete" : "",
        module.twi->regs.driver.ctrl.read_tran_error ? " READ_TRAN_ERROR" : "",
        label);
}
#endif

// Initializes the TWI module and runs it at `rate` clock frequency
void twi_init(int frequency) {
    twi_timing_t timing = {i2c_timing_table[0].timing.n, i2c_timing_table[0].timing.m}; // Default 100 KHZ
    if (!select_timing_settings(frequency, &timing)) error("Invalid clock speed.");

    module.twi = &module.twi_base[0];                           // TWI0
    ccu_ungate_bus_clock(CCU_TWI_BGR_REG);
    gpio_set_function(module.sda, GPIO_FN_ALT3);
    gpio_set_function(module.scl, GPIO_FN_ALT3);
    module.twi->regs.driver.ctrl.en = 1;                        // enable TWI driver
    module.twi->regs.driver.bus_ctrl.clk_duty = 1;
    module.twi->regs.driver.bus_ctrl.clk_M = timing.m;
    module.twi->regs.driver.bus_ctrl.clk_N = timing.n;
    module.twi->regs.driver.int_ctrl.transmit_complete_int_en = 1;
    module.twi->regs.driver.int_ctrl.transmit_error_int_en = 1; // JMR - enable error flagging
}

static void reset_on_error(void) {
    // TODO: does anything else need reset, fifos? force release sda/scl?
    module.twi->regs.driver.ctrl.soft_reset = 1; // reset on error
    module.twi->regs.driver.ctrl.soft_reset = 0; // clear
   // debug_status("after reset on error");
}

static void send_bytes(const uint8_t *bytes, int n) {
    #define FIFO_FULL_CNT 32
    for (int i = 0; i < n && !module.twi->regs.driver.int_ctrl.transmit_error; /* adv in loop */) {
        if (module.twi->regs.driver.fcr.txfifo_cnt < FIFO_FULL_CNT) {
            module.twi->regs.driver.txfifo.d8 = bytes[i++];
        }
    }
}

static void receive_bytes(uint8_t *bytes, int n) {
    for (int i = 0; i < n && !module.twi->regs.driver.int_ctrl.transmit_error; /* adv in loop */) {
        if (module.twi->regs.driver.fcr.rxfifo_cnt > 0) {
            bytes[i++] = module.twi->regs.driver.rxfifo.d8;
        }
    }
}

static bool finish_transaction(void) {
    while (1) {
        if (module.twi->regs.driver.int_ctrl.transmit_complete) {
            return true;
        }
        if (module.twi->regs.driver.int_ctrl.transmit_error) {
            reset_on_error();
            return false;
        }
    }
}

static void config_transaction(uint8_t dev_id, int n_write, int n_read) {
    module.twi->regs.driver.slv.id = dev_id;
    module.twi->regs.driver.cfg.packet_cnt = 1;
    if (n_write > 0 && n_read > 0) {    // (write THEN read)
        module.twi->regs.driver.ctrl.restart_mode = TRANSITION_REPEATED_START;
        module.twi->regs.driver.ctrl.read_tran_mode = YES_W_BEFORE_READ;
        module.twi->regs.driver.slv.cmd = CMD_READ;
        module.twi->regs.driver.fmt.n_addr_bytes = n_write;
        module.twi->regs.driver.fmt.n_data_bytes = n_read;
    } else {                            // else
        module.twi->regs.driver.ctrl.restart_mode = TRANSITION_STOP_START;
        module.twi->regs.driver.ctrl.read_tran_mode = NO_W_BEFORE_READ;
        module.twi->regs.driver.fmt.n_addr_bytes = 0;
        if (n_read > 0) {               // (read ONLY)
            module.twi->regs.driver.slv.cmd = CMD_READ;
            module.twi->regs.driver.fmt.n_data_bytes = n_read;
         } else {                       // (write ONLY)
            module.twi->regs.driver.slv.cmd = CMD_WRITE;
            module.twi->regs.driver.fmt.n_data_bytes = n_write;
        }
    }

    // clear previous state before start new transaction
    module.twi->regs.driver.int_ctrl.transmit_complete = 1;    // write 1 to clear
    module.twi->regs.driver.int_ctrl.transmit_error = 1;       // write 1 to clear
    module.twi->regs.driver.ctrl.start_tran = 1;
}

bool twi_do_transaction(uint8_t dev_id, const uint8_t* bytes_w, int num_w, uint8_t* bytes_r, int num_r) {
    config_transaction(dev_id, num_w, num_r);
    if (num_w) send_bytes(bytes_w, num_w);
    if (num_r) receive_bytes(bytes_r, num_r);
    return finish_transaction();
}

#pragma once
/*
 * File: i2s.h
 *
 * Description: Module for digital audio input and output
 * using I2S/PCM peripheral.
 *
 * Author: Chris Gregg     <cgregg@stanford.edu>
 * Author: Julie Zelenski  <zelenski@cs.stanford.edu>
 * Author: Joe Robertazzi  <tazzi@stanford.edu>
 */

#include <stdint.h>
#include "dma.h"

/*
 * Type: `i2s_frame_type_t`
 *
 * `i2s_frame_type_t` is an enumerated type used to refer to the type of frame
 * of the audio stream. Values can be `I2S_MONO` or `I2S_STEREO`.
 */
typedef enum {
    I2S_MONO = 0,
    I2S_STEREO = 1,
} i2s_frame_type_t;

/*
 * Type: `i2s_freq_t`
 *
 * `i2s_freq_t` is an enumerated type used to refer to the sample frequency
 * that the I2S device will use. Values can be `I2S_48KHZ` or `I2S_44p1KHZ`.
 * The frequency is same for all uses of I2S device. Call i2s_init again to change.
 */
typedef enum {
    I2S_48KHZ = 48000,
    I2S_44p1KHZ = 44100,
} i2s_freq_t;

/*
 * `i2s_init` : Required initialization for I2S module
 *
 * Initializes the I2S module. Must run before using playback or capture.
 * Typical values are `I2S_48KHZ` and `I2S_44p1KHZ` for the `sample_frequency`
 * parameter. Also accepts additional rates, but these are not guaranteed to work.
 *
 * @param sample_frequency   the desired sample frequency
 *
 * Only need to call `i2s_init` once -- subsequent calls reinitialize the module.
 */
void i2s_init(i2s_freq_t sample_frequency);

/*
 * `i2s_stream_playback_nb` : Non-blocking DMA implementation for I2S playback
 *
 * Takes in an array of 16-bit samples `samples` of size `nsamples` and data type `ftype`
 * (which can either be `I2S_MONO` or `I2S_STEREO`) and plays the samples through the I2S
 * device via DMA.
 *
 * CAUTION: The memory for samples array must remain valid until playback completes.
 * For example, a stack-allocated buffer is almost certainly not valid. Do not modify
 * contents of samples array while playback is in progress. Use i2s_await() to wait.
 *
 * Note: Keep mind of some of the following common issues when using I2S play stream:
 *   - ONLY input `I2S_MONO` data into `I2S_MONO` outputs (audio will break if mismatched)
 *   - ONLY input `I2S_STEREO` data into `I2S_STEREO` outputs (audio will break if mismatched)
 *   - Audio sped up or slowed down usually means mismatched input/output data
 *
 * @param samples    the array of samples to be played on the I2S device - each sample is 16 bits
 *                   and in `I2S_MONO` mode, samples are copied to the left and right channels while
 *                   in `I2S_STEREO` mode, samples alternate (i.e. [L16][R16]...[L16][R16])
 * @param nsamples   the number of samples in the `samples` array
 * @param ftype      the data type of the samples, either `I2S_MONO` or `I2S_STEREO`
 */
void i2s_stream_playback_nb(const int16_t samples[], int nsamples, i2s_frame_type_t ftype);

/*
 * `i2s_stream_capture_nb` : Non-blocking DMA implementation for I2S capture
 *
 * Takes in an array of 16-bit samples `samples` of size `nsamples` and data type `ftype`
 * (which can either be `I2S_MONO` or `I2S_STEREO`) and captures samples from the I2S
 * device via DMA.
 *
 * CAUTION: The memory for samples array must remain valid until capture completes.
 * For example, a stack-allocated buffer is almost certainly not valid. Do not modify
 * contents of samples array while playback is in progress. Use i2s_await() to wait.
 *
 * Note: Keep mind of some of the following common issues when using I2S capture:
 *   - Staticky audio: check for wiring/grounding issues, as they typically are at fault
 *   - When doing STEREO mic/input, provide high signal to L/R or SELECT pin for one mic
 *
 * @param samples    the array of samples to be played on the I2S device - each sample is 16 bits
 *                   and in `I2S_MONO` mode, samples are copied to the left and right channels while
 *                   in `I2S_STEREO` mode, samples alternate (i.e. [L16][R16]...[L16][R16])
 * @param nsamples   the number of samples in the `samples` array
 * @param ftype      the data type of the samples, either `I2S_MONO` or `I2S_STEREO`
 */
void i2s_stream_capture_nb(int16_t samples[], int nsamples, i2s_frame_type_t ftype);

/*
 * `i2s_transfer_completed` : Returns whether current I2S transfer has completed
 *
 * Blocks until the DMA transfer for the current I2S transaction has completed.
 * Works for both `i2s_play_stream_nb()` and `i2s_capture_nb()`, and can be called
 * after each to determine whether the transaction is still ongoing.
 */
bool i2s_transfer_completed(void);

/*
 * `i2s_await` : Waits until current I2S transfer completes
 *
 * Blocks until the DMA transfer for the current I2S transaction has completed.
 * Works for both `i2s_play_stream_nb()` and `i2s_capture_nb()`, and should be
 * called after each to simulate a blocking implementation as both are non-blocking
 * by default.
 */
void i2s_await(void);
/*
 * File: i2s.c
 *
 * Description: Contains API support for I2S devices on the MangoPi. Allows users
 * to initialize the I2S module. Check i2c.h for more specification on the
 * `i2s_frame_type_t` and `i2s_freq_t` types.
 *
 * Author: Chris Gregg     <cgregg@stanford.edu>
 * Author: Julie Zelenski  <zelenski@cs.stanford.edu>
 * Author: Joe Robertazzi  <tazzi@stanford.edu>
 *
 * Last updated: Curis summer 2025
 */

#include "i2s.h"
#include "assert.h"
#include "ccu.h"
#include "dma.h"
#include "gpio.h"
#include "printf.h"

// Bit codes for selecting sample resolution (formula = (res >> 2) - 1 in binary)
typedef enum {
    SAMPLE_RES_16 = 0b011,
    SAMPLE_RES_24 = 0b101,
} i2s_sample_resolution;

// Allows the driver to track the progress of non-blocking transfers.
typedef struct {
    dma_transfer_id_t id;
} i2s_transfer_t;

// Structs defined to match layout of hardware registers
typedef union {
    struct {
        struct {
            uint32_t global_ena : 1;
            uint32_t rx_ena     : 1;
            uint32_t tx_ena     : 1;
            uint32_t            : 1;
            uint32_t mode       : 2;
            uint32_t out_mute   : 1;
            uint32_t            : 1;
            uint32_t dout_ena   : 4; // dout0-3
            uint32_t            : 5;
            uint32_t dir_bclk   : 1;
            uint32_t dir_lrclk  : 1;
            uint32_t            :13;
        } ctl;
        struct {
            uint32_t slot_width : 3;
            uint32_t            : 1;
            uint32_t sample_res : 3;
            uint32_t            : 1;
            uint32_t lr_period  :10;
            uint32_t            :13;
            uint32_t            :32; // fmt1 fields
        } fmt;
        uint32_t ista;
        const uint32_t rx_fifo;
        struct {
            uint32_t rx_mode    : 2;
            uint32_t tx_mode    : 1;
            uint32_t            : 1;
            uint32_t rx_trigger : 6;
            uint32_t            : 2;
            uint32_t tx_trigger : 6;
            uint32_t            : 5;
            uint32_t rx_flush   : 1;
            uint32_t tx_flush   : 1;
            uint32_t            : 7;
        } fifoctl;
        const struct {
            uint32_t rx_acnt    : 7;
            uint32_t            : 1;
            uint32_t rx_avail   : 1;
            uint32_t            : 7;
            uint32_t tx_ecnt    : 8;
            uint32_t            : 4;
            uint32_t tx_empty   : 1;
            uint32_t            : 3;
        } fsta;
        struct uint32_t {
            uint32_t rxai_en    : 1;
            uint32_t rxoi_en    : 1;
            uint32_t rxui_en    : 1;
            uint32_t rx_drq     : 1;
            uint32_t txei_en    : 1;
            uint32_t txoi_en    : 1;
            uint32_t txui_en    : 1;
            uint32_t tx_drq     : 1;
            uint32_t            :24;
        } irq;
        uint32_t tx_fifo;
        struct {
            uint32_t mclk_div   : 4;
            uint32_t bclk_div   : 3;
            uint32_t mclk_ena   : 1;
            uint32_t            :24;
        } clkd;
        uint32_t tx_cntr;
        uint32_t rx_cntr;
        struct {
            uint32_t tx_slot_num : 4;
            uint32_t rx_slot_num : 4;
            uint32_t             :24;
        } chcfg;
        struct {
            uint32_t chen       :16;
            uint32_t chsel      : 4;
            uint32_t offset     : 2;
            uint32_t            :10;
        } txchsel[4];
        struct {
            uint32_t            :32; // padding for channels 8 - 15
            uint32_t ch0        : 4; // since we only use mono/stereo, we only need channels 1/0
            uint32_t ch1        : 4; // can manually change the other fields if need more channels
            uint32_t            :24; // padding for channels 2 - 7
        } txchmap[4];
        struct {
            uint32_t            :16;
            uint32_t chsel      : 4;
            uint32_t offset     : 2;
            uint32_t            :10;
        } rxchsel;
        struct {
           uint32_t             :32; // padding for channels 12 - 15
           uint32_t             :32; // padding for channels 8 - 11
           uint32_t             :32; // padding for channels 4 - 7
           uint32_t ch0map      : 4;
           uint32_t ch0sel      : 2;
           uint32_t             : 2;
           uint32_t ch1map      : 4;
           uint32_t ch1sel      : 2;
           uint32_t             : 2;
           uint32_t             :16; // padding for channels 2 - 3
        } rxchmap;
    } regs;
    unsigned char padding[0x1000]; // sizeof entire I2S block
} i2s_t;

// DO NOT REMOVE - confirms that our bit field is laid out corrected
#define I2S_BASE ((i2s_t *)0x02032000)
_Static_assert(&(I2S_BASE[0].regs.tx_fifo)  == (void *)0x02032020, "i2s0 ttx_fifo reg must be at address 0x02032040");
_Static_assert(&(I2S_BASE[1].regs.rxchsel)  == (void *)0x02033064, "i2s1 rxchsel must be at address 0x02033000");
_Static_assert(&(I2S_BASE[2].regs.ctl)      == (void *)0x02034000, "i2s2 ctrl reg must be at address 0x02032080");
_Static_assert(sizeof(I2S_BASE[2].regs.rxchmap) == 16, "i2s2 rxchmap should have one byte per channel = 16 bytes total");

// Struct containing i2s config data
static struct {
    volatile i2s_t * const i2s_base, *i2s;
    long mclk_rate;
    int frequency;
    bool is_active;
    i2s_transfer_t current_transfer;
} module = {
    .i2s_base = &I2S_BASE[0],
    .i2s = NULL, // points to i2s after init()
    .is_active = false,
};

// Struct containing audio clock configuration
static struct audio_clock {
    uint32_t multiple; // clock setting appropriate for output frequency of this multiple
    struct {
        ccu_pll_id_t id;
        long rate;
    } pll;
    struct {
        ccu_parent_id_t parent;
        long rate, actual_rate;
    } mod_clk;
} clock_options [] = {
    {.multiple= 11025, .pll= {.id=CCU_PLL_AUDIO0_CTRL_REG, .rate= 22545454},   .mod_clk = {.parent= PARENT_AUDIO0, .rate= 22545454}},
    {.multiple=  8000, .pll= {.id=CCU_PLL_AUDIO1_CTRL_REG, .rate= 3072000000}, .mod_clk = {.parent= PARENT_AUDIO1_DIV5, .rate= 24576000}},
    {0},
};

// Helper function prototypes
static void validate_request(void);
static void choose_clock(int sample_frequency);
static void set_clock_divider(int bitrate);
static void config_gpio(void);
static void config_for_playback(i2s_frame_type_t ftype);
static void config_for_capture(i2s_frame_type_t ftype);
static void playback_start(void);
static void capture_start(void);

// Initializes the I2S module -- must be called before calling any other I2S operations
void i2s_init(i2s_freq_t sample_frequency) {
     // disbale before config (reset in case multiple inits in one program)
    module.i2s->regs.ctl.global_ena = 0;

    module.i2s = &I2S_BASE[2]; // use I2S2, gpio pins on header
    ccu_ungate_bus_clock_bits(CCU_I2S_BGR_REG, 1 << 18, 1 << 2); // I2S2
    choose_clock(sample_frequency); // configure clocks
    int bits_per_sample             = 32;
    int bits_per_frame = bits_per_sample * 2;
    int bitrate = module.frequency * bits_per_frame;
    set_clock_divider(bitrate);
    module.i2s->regs.fmt.lr_period  = 31;
    module.i2s->regs.ctl.global_ena = 0;
    module.i2s->regs.ctl.rx_ena     = 0;
    module.i2s->regs.ctl.tx_ena     = 0;
    config_gpio();

    module.i2s->regs.ctl.dir_bclk   = 1;      // BCLK direction output
    module.i2s->regs.ctl.dir_lrclk  = 1;      // LRCLK direction output
    module.i2s->regs.clkd.mclk_ena  = 0;      // disable MCLK (not used)
    module.i2s->regs.ctl.mode       = 1;      // mode is I2S standard, left-justified
    module.i2s->regs.ctl.dout_ena   = 0b0001; // enable D0, disable D1-3

    // enable after config
    module.i2s->regs.ctl.global_ena = 1;
}

// Returns whether the current i2s transaction has completed
bool i2s_transfer_completed(void) {
    if (!module.is_active) error("must call a playback/capture before calling i2s_transfer_completed()\n");
    return dma_transfer_completed(module.current_transfer.id);
}

// Blocks until the current I2S transaction has completed
void i2s_await(void) {
    if (!module.is_active) error("must call a playback/capture before calling i2s_await()\n");
    while (!dma_transfer_completed(module.current_transfer.id)) {}
}

// Non-blocking DMA implementation for streaming playback using the I2S device
void i2s_stream_playback_nb(const int16_t samples[], int nsamples, i2s_frame_type_t ftype) {
    validate_request();

    config_for_playback(ftype);
    playback_start();
    module.i2s->regs.irq.tx_drq = 1;

    dma_endpoint_t from_mem         = dma_create_endpoint(DRQ_TYPE_DRAM, samples);
    dma_endpoint_t to_i2s_tx        = dma_create_endpoint(DRQ_TYPE_I2S2, &module.i2s->regs.tx_fifo);
    module.current_transfer.id      = dma_transfer(from_mem, to_i2s_tx, DMA_BITWIDTH_16, nsamples * sizeof(*samples));
}

// Non-blocking DMA implementation for capturing samples from the I2S device
void i2s_stream_capture_nb(int16_t samples[], int nsamples, i2s_frame_type_t ftype) {
    validate_request();

    config_for_capture(ftype);
    capture_start();
    module.i2s->regs.irq.rx_drq = 1;

    dma_endpoint_t from_i2s_rx      = dma_create_endpoint(DRQ_TYPE_I2S2, (uint16_t*)((char*)(&(module.i2s->regs.rx_fifo)) + 2));
    dma_endpoint_t to_mem           = dma_create_endpoint(DRQ_TYPE_DRAM, samples);
    module.current_transfer.id      = dma_transfer(from_i2s_rx, to_mem, DMA_BITWIDTH_16, nsamples * sizeof(*samples));
}

// Validates whether I2S has been properly initialized and if the previous transfer has completed already
static void validate_request(void) {
    if (module.i2s == NULL) error("i2s_init() has not been called!\n");
    if (module.is_active) {
        if (!dma_transfer_completed(module.current_transfer.id)) {
            error("attempted new I2S transfer while previous transfer still underway.\n"
                  "hint: use i2s_await() to wait for your previous non-blocking transfer to complete."
                );
        }
    }
    module.is_active = true; // Ensures that we don't check module.current_transfer before it's initialized
}

// Chooses the appropriate clock based on the input sample frequency
static void choose_clock(int sample_frequency) {
    for (struct audio_clock *opt = clock_options; opt->multiple; opt++) {
        if (sample_frequency % opt->multiple == 0) {
            if (opt->mod_clk.actual_rate == 0) ccu_config_pll_rate(opt->pll.id, opt->pll.rate); // only config pll once
            opt->mod_clk.actual_rate = ccu_config_module_clock_rate(CCU_I2S2_CLK_REG, opt->mod_clk.parent, opt->mod_clk.rate);
            assert(opt->mod_clk.actual_rate == opt->mod_clk.rate); // confirm we have rate we wanted
            module.frequency = sample_frequency;
            module.mclk_rate = opt->mod_clk.actual_rate;
            return;
        }
    }
    assert(!"No available clock option for requested frequency");
}

// Sets the clock divider based on the input bitrate
static void set_clock_divider(int bitrate) {
    int fraction = module.mclk_rate/bitrate;
    const int available_dividers[] = { -1, 1, 2, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 176, 192 };
    int n_avail = sizeof(available_dividers)/sizeof(*available_dividers);
    int chosen_index = -1;
    for (int i = 0; i < n_avail; i++) {
        if (available_dividers[i] >= fraction) {
            chosen_index = i; break;
        }
    }
    module.i2s->regs.clkd.bclk_div = chosen_index;
}

// Configures GPIO pins for I2S
static void config_gpio(void) {
    static struct  {
        gpio_id_t pin;
        uint8_t fn;                         // Connections from Mango Pi to pins on DAC
    } bclk  = {GPIO_PB5, GPIO_FN_ALT3},     // GPIO_PB5 to BCLK
      lrclk = {GPIO_PB6, GPIO_FN_ALT3},     // GPIO_PB6 to LRCLK
      dout0 = {GPIO_PB4, GPIO_FN_ALT3},     // GPIO_PB4 to Data Out
      din0  = {GPIO_PB3, GPIO_FN_ALT5};     // GPIO_PB3 to Data In
    gpio_set_function(bclk.pin,  bclk.fn);
    gpio_set_function(lrclk.pin, lrclk.fn);
    gpio_set_function(dout0.pin, dout0.fn);
    gpio_set_function(din0.pin, din0.fn);
}

// Configures I2S registers for playback
static void config_for_playback(i2s_frame_type_t ftype) {
    int bits_per_sample = 16;                                // hardcoded at 16 bits since we're using 16-bit samples
    int bits_per_frame = bits_per_sample * 2;
    int bitrate = module.frequency * bits_per_frame;
    set_clock_divider(bitrate);
    module.i2s->regs.fmt.lr_period = bits_per_sample - 1;    // bclks per sample
    module.i2s->regs.fmt.sample_res = bits_per_sample/4 - 1; // sample resolution = 16
    module.i2s->regs.fmt.slot_width = bits_per_sample/4 - 1; // slot width = 16
    module.i2s->regs.txchsel[0].offset = 1;                  // mode is i2s standard: data offset by 1 BCLK relative to LRCK
    module.i2s->regs.fifoctl.tx_mode = 1;                    // if sample width < 32-bit, harvest from LSB of fifo

    int n_output_channels = 2;                               // output 2-channel (either stereo or duplicate mono)
    uint32_t enable_bits = (1 << n_output_channels) - 1;
    module.i2s->regs.txchsel[0].chen = enable_bits;
    module.i2s->regs.txchsel[0].chsel = n_output_channels - 1; // set size of map

    // mono frame consumes 1 sample from fifo
    // stereo frame consumes 2 samples from fifo
    module.i2s->regs.chcfg.tx_slot_num = ftype == I2S_MONO ? 0 : 1;
    module.i2s->regs.txchmap[0].ch0 = 0;                             // fill L => first sample
    module.i2s->regs.txchmap[0].ch1 = ftype == I2S_MONO ? 0 : 1;     // fill R => first sample (replicate if ftype = I2S_MONO)
                                                                     // fill R => second sample (if ftype = I2S_STEREO)
}

// Configures I2S registers for capture
static void config_for_capture(i2s_frame_type_t ftype) {
    int bits_per_sample = ftype == I2S_MONO ? 16 : 32;          // 16 bits for mono, 32 bits for stereo
    int bits_per_frame = bits_per_sample * 2;
    int bitrate = module.frequency * bits_per_frame;
    set_clock_divider(bitrate);
    module.i2s->regs.fmt.lr_period = 31;             // bclks per sample
    module.i2s->regs.fmt.sample_res = SAMPLE_RES_16; // 16-bit sample resolution
    module.i2s->regs.fmt.slot_width = SAMPLE_RES_16; // 16-bit slot width
    module.i2s->regs.fifoctl.rx_mode = 0;            // if sample width < 32-bit, zero fill
    module.i2s->regs.rxchsel.chsel = 1;              // two input channels (right will duplicate left)
    module.i2s->regs.rxchsel.offset = 1;             // mode is i2s standard: data offset by 1 BCLK relative to LRCK
    int n_samples_fifo = 2;
    module.i2s->regs.chcfg.rx_slot_num = n_samples_fifo - 1;
    module.i2s->regs.rxchmap.ch0map = 0;                           // left sample
    module.i2s->regs.rxchmap.ch1map = ftype == I2S_MONO ? 0 : 1;   // right --> duplicate if ftype = I2S_MONO
                                                                   // use second channel if ftype = I2S_STEREO
}

// Starts audio playback on the I2S device
static void playback_start(void) {
    module.i2s->regs.fifoctl.tx_flush = 1;
    module.i2s->regs.tx_cntr = 0;
    module.i2s->regs.ctl.tx_ena = 1;
}

//Starts capturing samples from the I2S device
static void capture_start(void) {
    module.i2s->regs.fifoctl.rx_flush = 1;
    module.i2s->regs.rx_cntr = 0;
    module.i2s->regs.ctl.rx_ena = 1;
}


#if 0
/* Example code for blocking implementation for I2S playback (not utilizing the DMA). Use this
 * code as a template if you need access to samples as they enter the fifo and exit the samples[] array.
 * This code manually loops through the fifo one element at a time so it prevent you from being able to
 * run tasks in the background, but gives you the ability to directly interact with the fifo stream
 * in real time.
 */
void i2s_play_stream_blocking(const int16_t samples[], int nsamples, i2s_frame_type_t ftype) {
    if (module.i2s == NULL) error("i2s_init() has not been called!\n");
    config_for_playback(ftype);
    playback_start();
    for (int i = 0; i < nsamples; i++) {
        while (module.i2s->regs.fsta.tx_ecnt == 0) ; // wait for empty space in fifo
        module.i2s->regs.tx_fifo = samples[i];
    }
    module.i2s->regs.ctl.tx_ena = 0; // done audio playback

}

/* Example code for blocking implementation for I2S capture (not utilizing the DMA). Use this code
 * as a template if you need access to samples as they exit the fifo and enter the samples[] array.
 * This code manually loops through the fifo one element at a time so it prevent you from being able to
 * run tasks in the background, but gives you the ability to directly interact with the fifo stream
 * in real time.
 */
void i2s_capture_blocking(int16_t samples[], int nsamples) {
    if (module.i2s == NULL) error("i2s_init() has not been called!\n");
    config_for_capture(I2S_MONO);
    capture_start();
    for (int i = 0; i < nsamples; i++) {
        while (module.i2s->regs.fsta.rx_acnt == 0) ; // wait for sample avail
        uint32_t val = module.i2s->regs.rx_fifo;
        samples[i] = val >> 16; // Most you can shift by before distortion is 12 bits
    }
    module.i2s->regs.ctl.rx_ena = 0; // done audio capture
}

#endif
#pragma once
/*
 * File: spi.h
 *
 * Description: Module of functions that support communication
 * with devices over SPI bus.
 *
 * Author: Daniel James <drjames@stanford.edu>
 * Author: Julie Zelenski <zelenski@cs.stanford.edu>
 */

#include "gpio.h"
#include <stdint.h>
/*
 * Type: `spi_mode_t`
 *
 * `spi_mode_t` is an enumerated type for the device sampling mode
 */
typedef enum {
    SPI_MODE_0 = 0,     // CPOL = 0, CPHA = 0 (Clock Low, Data Captured on Rising Edge)
    SPI_MODE_1,         // CPOL = 0, CPHA = 1 (Clock Low, Data Captured on Falling Edge)
    SPI_MODE_2,         // CPOL = 1, CPHA = 0 (Clock High, Data Captured on Falling Edge)
    SPI_MODE_3,         // CPOL = 1, CPHA = 1 (Clock High, Data Captured on Rising Edge)
} spi_mode_t;


/*
 * `spi_device_t`
 *
 * This typedef gives a nickname to the struct that will be used to represent a
 * single SPI device. The internal details of the struct will be given in the file
 * spi.c; those details are private to the implementation and are not shared in the
 * public interface. Clients of the SPI module are not privy to the details of
 * `spi_device_t`, nor should they be. A client simply holds on to the pointer returned
 * by `spi_new` and sends that pointer to the functions below that communicate with
 * the device over the SPI protocol.
 */
typedef struct spi_device spi_device_t;


/*
 * `spi_init` : Required initialization for SPI module
 *
 * Initializes the SPI module. Must run before communicating with any SPI
 * devices.
 *
 * Only need to call `spi_init` once -- subsequent calls reinitialize the module.
 */
void spi_init(void);

/*
 * `spi_new` : Create a new SPI device
 *
 * Creates a new SPI device. The given gpio pin is activated as chip select before
 * any transmissions. Sampling for this device uses the specified mode and
 * operates at speed specified by rate (expressed in bits per second). Choose
 * the mode and speed according to device requirements. Returns pointer to newly created SPI device.
 *
 * @param chip_select      gpio pin connect to the device select
 * @param mode             SPI mode for sampling
 * @param rate             sampling rate
 */
spi_device_t * spi_new(gpio_id_t chip_select, spi_mode_t mode, long rate);

/*
 * `spi_write_and_read` : Transmission will write and read at same time
 *
 * Transmit tx_len bytes from tx while reading rx_len bytes into rx.
 * If either length is zero, the corresponding buffer may be NULL.
 *
 * @param dev       pointer to target SPI device
 * @param tx        buffer of bytes to transmit
 * @param tx_len    number of bytes in transmit buffer
 * @param rx        buffer of received bytes
 * @param rx_len    number of bytes in receive buffer
 */
void spi_write_and_read(spi_device_t *dev, const uint8_t *tx, int tx_len, uint8_t *rx, int rx_len);

/*
 * `spi_write_then_read` : Transmission will write, followed by read
 *
 * First transmit tx_len bytes from tx, then receive rx_len bytes into rx.
 * If either length is zero, the corresponding buffer may be NULL.
 *
 * @param dev       pointer to target SPI device
 * @param tx        buffer of bytes to transmit
 * @param tx_len    number of bytes in transmit buffer
 * @param rx        buffer of received bytes
 * @param rx_len    number of bytes in receive buffer
 */
void spi_write_then_read(spi_device_t *dev, const uint8_t *tx, int tx_len, uint8_t *rx, int rx_len);

/*
 * `spi_write` : Transmission will write, no read
 *
 * Transmit `len` bytes from buffer `tx` to device. Does not receive any data.
 *
 * @param dev       pointer to target SPI device
 * @param tx        buffer of bytes to transmit
 * @param tx_len    number of bytes in transmit buffer
 */
void spi_write(spi_device_t *dev, const uint8_t *tx, int tx_len);

/*
 * File: spi.c
 *
 *  SPI implementation
 *
 * Most of these functions are simple wrappers that pass through to spi_driver
 * to access the hardware. The standard interface only supports blocking transactions
 * (simple and safe), but internally all are implemented as a nb transfer
 * followed by a synchronous wait.
 *
 * Author: Daniel James <drjames@stanford.edu>
 * Author: Julie Zelenski <zelenski@cs.stanford.edu>
 *
 * Last updated: Curis summer 2025
 */

#include "spi.h"
#include "spi_driver.h"
#include "spi_extra.h"
#include <stddef.h>

void spi_init(void) {
    spid_init();
}

spi_device_t *spi_new(gpio_id_t chip_select, spi_mode_t mode, long rate) {
    return spid_new_device(chip_select, mode, rate);
}

void spi_write_and_read(spi_device_t *dev, const uint8_t *tx, int tx_len, uint8_t *rx, int rx_len) {
    spid_transfer_nb(dev, tx, tx_len, rx, rx_len, false);
    spi_await_transfer_completed(dev);
}

void spi_write_then_read(spi_device_t *dev, const uint8_t *tx, int tx_len, uint8_t *rx, int rx_len) {
    spid_transfer_nb(dev, tx, tx_len, rx, rx_len, true);
    spi_await_transfer_completed(dev);
}

void spi_write(spi_device_t *dev, const uint8_t *tx, int len) {
    spi_write_then_read(dev, tx, len, NULL, 0);
}

void spi_write_and_read_nb(spi_device_t *dev, const uint8_t *tx, int tx_len, uint8_t *rx, int rx_len) {
    spid_transfer_nb(dev, tx, tx_len, rx, rx_len, false);
}
void spi_write_then_read_nb(spi_device_t *dev, const uint8_t *tx, int tx_len, uint8_t *rx, int rx_len) {
    spid_transfer_nb(dev, tx, tx_len, rx, rx_len, true);
}
void spi_write_nb(spi_device_t *dev, const uint8_t *tx, int len) {
    spid_transfer_nb(dev, tx, len, NULL, 0, true);
}

bool spi_transfer_completed(spi_device_t *dev) {
    return spid_transfer_completed(dev);
}
void spi_await_transfer_completed(spi_device_t *dev) {
    while (!spid_transfer_completed(dev));
}

bool spi_is_bus_free(void) {
    return spid_is_bus_free();
}
void spi_await_bus_free(void) {
    while (!spid_is_bus_free());
}

#pragma once
/*
 * File: spi_extra.h
 *
 * This header documents the advanced/optional features of the spi module.
 *
 * The *_nb versions of spi_write perform SPI communication in non-blocking
 * manner (asychronous) using DMA transfer. This allows CPU to perform other work while spi
 * transfer is ongoing. This can be tricky to use correctly, read the caution
 * notes below.
 *
 * The convenience macros can be handy to transmit fixed values without having
 * to create an array.
 *
 * Author: Daniel James <drjames@stanford.edu>
 * Author: Julie Zelenski <zelenski@cs.stanford.edu>
 */

#include "spi.h"

// Non blocking versions of the equivalent functions from spi.h. Use
// spi_is_bus_free to determine when the transfer is finished.
//
// *DO NOT* initiate a new transfer while a non-blocking transfer is ongoing.
//
// CAUTION: Be very careful that the buffer(s) passed as argument live long
// enough for the transfer to finish. For example, a stack-allocated buffer
// is almost certainly not valid. Also do not modify contents of buffers
// while transfer is in progress.
//
void spi_write_and_read_nb(spi_device_t *dev, const uint8_t *tx, int tx_len, uint8_t *rx, int rx_len);
void spi_write_then_read_nb(spi_device_t *dev, const uint8_t *tx, int tx_len, uint8_t *rx, int rx_len);
void spi_write_nb(spi_device_t *dev, const uint8_t *tx, int len);

// Returns true once most recently initiated non-blocking transfer on this device
// is complete.
bool spi_transfer_completed(spi_device_t *dev);
// Blocking wait for spi_transfer_complete to return true.
void spi_await_transfer_completed(spi_device_t *dev);

// Similar to spi_transfer_completed but instead of checking a specific device
// it waits for the transfer to complete on whichever the most recent device
// was. 
//
// CAUTION: If you call any spi transfer function while spi_is_bus_free is
// returning false, you will get an error. Always use spi_await_bus_free or
// check spi_is_bus_free before initiating a new transaction.
bool spi_is_bus_free(void);
void spi_await_bus_free(void);


// Convenience function for calling spi_write. Instead of creating a stack
// array just write out the bytes you want to transfer as the arguments directly.
#define spi_write_v(dev, ...) do { \
    uint8_t __tx[] = { __VA_ARGS__ }; \
    spi_write(dev, __tx, sizeof(__tx)); \
} while(0)

// Convenience function for calling spi_write_and_read. Instead of creating a stack
// array just write out the bytes you want to transfer as the arguments directly.
// Note that unlike spi_write_and_read the rx buffer and len go first due to the limitations
// of C macros.
#define spi_write_and_read_v(dev, rx, rx_len, ...) do { \
    uint8_t __tx[] = { __VA_ARGS__ }; \
    spi_write_and_read(dev, __tx, sizeof(__tx), rx, rx_len); \
} while(0)

#define spi_write_then_read_v(dev, rx, rx_len, ...) do { \
    uint8_t __tx[] = { __VA_ARGS__ }; \
    spi_write_then_read(dev, __tx, sizeof(__tx), rx, rx_len); \
} while(0)

#pragma once
/*
 * File: spi_driver.h
 *
 * Description: HAL module for the SPI driver.
 * Clients not expected to directly use spi_driver, instead use the spi.h module
 * that layers on top.
 *
 * Author: Daniel James <drjames@stanford.edu>
 * Author: Julie Zelenski <zelenski@cs.stanford.edu>
 */

#include <stdint.h>
#include "gpio.h"
#include "spi.h"


// Call once to init SPI peripheral
void spid_init(void);

// Create a new spi device. Transmissions on the device will be done in the
// provided mode. The given gpio pin is activated as the chip select before
// any transmissions. The device can then be used with the spid_transfer_nb
// function.
spi_device_t *spid_new_device(gpio_id_t chip_select, spi_mode_t mode, long rate);

// If serialize is false then then tx_len bytes are sent from tx and rx_len bytes
// are received into rx at the same time. If serialize is true then first the
// tx bytes are sent and the rx bytes are received afterwards. If either of the 
// lengths is zero then the corresponding buffer may be NULL.
void spid_transfer_nb(
    spi_device_t *dev, 
    const uint8_t *tx, int tx_len, 
    uint8_t *rx, int rx_len, 
    bool serialize
);

// Returns true once a transfer initiated by spid_transfer_nb is finished.
bool spid_transfer_completed(spi_device_t *dev);

// Returns if the bus is currently available to start a transaction. If you
// call spid_transfer_nb while this is false you will get an error.
bool spid_is_bus_free(void);

/*
 * File: spi_driver.c
 *
 *  SPI hardware driver, including support for DMA for non-blocking transfer
 *
 * Author: Daniel James <drjames@stanford.edu>
 * Author: Julie Zelenski <zelenski@cs.stanford.edu>
 *
 * Last updated: Curis summer 2025
 */

#include "spi_driver.h"
#include "assert.h"
#include "ccu.h"
#include "dma.h"
#include "gpio.h"
#include "malloc.h"

typedef union {
    struct {
        uint32_t _resa;
        struct {
            uint32_t en             : 1;
            uint32_t master_mode_sel: 1; // 0 slave 1 master
            uint32_t timing_mode_sel: 1; // 0 old mode 1 new mode
            uint32_t                : 4;
            uint32_t tx_pause_en    : 1;
            uint32_t                :23;
            uint32_t soft_reset     : 1;
        } gcr;
        struct {
            uint32_t cpha           : 1;
            uint32_t cpol           : 1;
            uint32_t spol           : 1;
            uint32_t ssctl          : 1;
            uint32_t chip_sel       : 2; // select one of four lines
            uint32_t ssowner        : 1;
            uint32_t sslevel        : 1;
            uint32_t dhb            : 1;
            uint32_t dummy_type     : 1;
            uint32_t rapid_mode_sel : 1;
            uint32_t sdc            : 1;
            uint32_t fbs            : 1;
            uint32_t sdm            : 1;
            uint32_t sddm           : 1;
            uint32_t sdc1           : 1;
            uint32_t                :15;
            uint32_t start_burst    : 1; // xch exchange burst, autoclear when mbc = 0
        } tcr;
        uint32_t _resb;
        uint32_t ier;
        struct {
            uint32_t rx_ready       : 1;
            uint32_t rx_empty       : 1;
            uint32_t rx_full        : 1;
            uint32_t                : 1;
            uint32_t tx_ready       : 1;
            uint32_t tx_empty       : 1;
            uint32_t tx_full        : 1;
            uint32_t                : 1;
            uint32_t rx_overflow    : 1;
            uint32_t rx_underrun    : 1;
            uint32_t tx_overflow    : 1;
            uint32_t tx_underrun    : 1;
            uint32_t tx_complete    : 1;
            uint32_t ss_invalid     : 1;
            uint32_t                :18;
        } isr;
        struct {
            uint32_t rx_trig_level  : 8;
            uint32_t rx_drq_en      : 1;
            uint32_t                : 5;
            uint32_t rx_test_en     : 1;
            uint32_t rx_fifo_rst    : 1;
            uint32_t tx_trig_level  : 8;
            uint32_t tx_drq_en      : 1;
            uint32_t                : 5;
            uint32_t tx_test_en     : 1;
            uint32_t tx_fifo_rst    : 1;
        } fcr; // fifo control
        const struct {
            uint32_t rx_fifo_cnt    : 8;
            uint32_t                : 4;
            uint32_t rx_wb_cnt      : 3;
            uint32_t rx_wb_en       : 1;
            uint32_t tx_fifo_cnt    : 8;
            uint32_t                : 4;
            uint32_t tx_wb_cnt      : 3;
            uint32_t tx_wb_en       : 1;
        } fsr; // fifo status
        uint32_t wcr; // wait clock
        uint32_t _resc;
        uint32_t sample_delay;
        uint32_t _resd;
        uint32_t mbc;
        uint32_t mtc;
        struct {
            uint32_t stc            :24; // single mode transmit counter
            uint32_t                : 8; // dual/quad mode,burst control...
        } bcc;
        uint32_t _rese;
        uint32_t batcr;
        uint32_t ba_ccr;
        uint32_t tbr;
        uint32_t rbr;
        uint32_t _resf[14];
        struct {
            uint32_t dma_wait : 4;
            uint32_t ack_mode : 1;
            uint32_t act_mode : 2;
            uint32_t          : 25;
        } ndma_mode_ctl;
        uint32_t dbi[93];
        union {
            uint8_t b8;
            uint16_t b16;
            uint32_t b32;
        } txd;
        uint32_t _resg[63];
        const union {
            uint8_t b8;
            uint16_t b16;
            uint32_t b32;
        } rxd;
    } regs;
    uint8_t padding[0x1000];
} spi_t;

#define SPI_BASE ((spi_t *)0x04025000)
_Static_assert(&(SPI_BASE[0].regs.ier)     == (void *)0x04025010, "SPI0 ier reg must be at address 0x04025010");
_Static_assert(&(SPI_BASE[1].regs.rxd.b8)  == (void *)0x04026300, "SPI1 rxd reg must be at address 0x04026300");

static struct {
    volatile spi_t * const spi_base, *spi;
    const gpio_id_t clock, mosi, miso;
    const unsigned int fn_spi;

    // keep track of whether the driver is currently in use.
    spi_device_t *transferring_device;
    int generation;
} module = {
    .spi_base = &SPI_BASE[0],
    .clock  =  GPIO_PD11,
    .mosi   =  GPIO_PD12,
    .miso   =  GPIO_PD13,
    .fn_spi =  GPIO_FN_ALT4,
    .spi = NULL, // points to spi after init()

    .transferring_device = NULL,
    .generation = 0,
};

struct spi_device {
    gpio_id_t chip_select;
    spi_mode_t mode;
    long rate;

    // keep track of the DMA transfer(s) this device is currently engaged in.
    int generation;
    bool has_rx, has_tx;
    dma_transfer_id_t trans_id_rx;
};

static void config_clock(long rate) {
    static const long HOSC_24MHZ =  24 * 1000 * 1000;
    static const long PERI_600MHZ = 600 * 1000 * 1000;
    
    if (rate == 0) rate = 10*1000*1000; // 10 Mhz default

    long set_to;
    if (rate == 0) {
        set_to = ccu_config_module_clock_rate(CCU_SPI1_CLK_REG, PARENT_HOSC, rate);
    } else if (rate <= HOSC_24MHZ && (HOSC_24MHZ % rate == 0)) {
        set_to = ccu_config_module_clock_rate(CCU_SPI1_CLK_REG, PARENT_HOSC, rate);
    } else if (rate <= PERI_600MHZ && (PERI_600MHZ % rate == 0)) {
        set_to = ccu_config_module_clock_rate(CCU_SPI1_CLK_REG, PARENT_PERI, rate);
    } else {
        error("Clock rate does not divide parent clock rates.");
    }
    long mhz = set_to/(1000*1000);
    // Timing settings below gleaned from D1 user manual p. 935
    // Empiricially verified using read from SPI flash + logic analzyer
    // Flash reliably read up to 120 Mhz
    // (can reach 150Mhz if connect short & high-quality)
    module.spi->regs.gcr.timing_mode_sel = 1; // enable new mode
    module.spi->regs.tcr.sdc1 = 0; // per manual, do not use this part of spectrum
    if (mhz >= 75) {
        module.spi->regs.tcr.sdm = 0; // one cycle delay for highest speeds
        module.spi->regs.tcr.sdc = 1;
    } else if (mhz >= 24) {
        module.spi->regs.tcr.sdm = 0; // half cycle delay at medium speed
        module.spi->regs.tcr.sdc = 0;
     } else {
        module.spi->regs.tcr.sdm = 1; // normal mode, no delay at lower speeds
        module.spi->regs.tcr.sdc = 0;
    }
}

void spid_init(void) {
    // this driver code supports only SPI 1 which is broken out on GPIO header
    module.spi = &module.spi_base[1];

    gpio_set_function(module.clock, module.fn_spi);
    gpio_set_function(module.mosi, module.fn_spi);
    gpio_set_function(module.miso, module.fn_spi);

    ccu_ungate_bus_clock_bits(CCU_SPI_BGR_REG, (1 << 1), (1 << 17)); // SPI 1

    module.spi->regs.tcr.ssowner = 1; // use manual control for chip select
    module.spi->regs.gcr.master_mode_sel = 1;
    module.spi->regs.gcr.en = 1;    // enable
}

static void set_spi_mode(spi_mode_t mode) {
    // set phase and polarity according to mode
    module.spi->regs.tcr.cpol = (mode >> 1) & 1;
    module.spi->regs.tcr.cpha = (mode >> 0) & 1;
}

static void activate_device(spi_device_t *dev) {
    module.spi->regs.gcr.en = 1;    // disable while changing
    set_spi_mode(dev->mode);
    config_clock(dev->rate);
    module.spi->regs.gcr.en = 1;    // re-enable
    gpio_write(dev->chip_select, 0);     // chip select active low
}

static void deactivate_device(spi_device_t *dev) {
    gpio_write(dev->chip_select, 1); // release chip select
}

spi_device_t * spid_new_device(gpio_id_t chip_select, spi_mode_t mode, long rate) {
    if (module.spi == NULL) error("spi_init() has not been called!\n");
    spi_device_t *dev = malloc(sizeof(*dev));
    assert(dev != NULL);
    dev->mode = mode;
    dev->chip_select = chip_select;
    dev->rate = rate;
    dev->generation = -1;
    dev->has_tx = dev->has_rx = false;

    gpio_set_output(dev->chip_select);
    gpio_write(dev->chip_select, 1); // select idle high

    return dev;
}

#define MAX(a, b) ((a) > (b) ? (a) : (b))

static void large_transfer_dma_nb(spi_device_t *dev, const uint8_t *tx, int tx_len, uint8_t *rx, int rx_len, bool serialize) {
    // configure the total number of spi bursts
    module.spi->regs.mbc = serialize ? tx_len + rx_len : MAX(tx_len, rx_len);

    // after we have depleted the data in the tx buffer we send dummy data.
    module.spi->regs.mtc     = tx_len;
    module.spi->regs.bcc.stc = tx_len;

    // The number of bits we write at once to the txd and rxd registers. This
    // doesn't seem to have any performance impact so the only considerations are
    // 1) This should be <= 32 since the txd and rxd registers are only 32 bits
    // 2) This effects the trigger levels. See below.
    const dma_width_t DATA_WIDTH = DMA_BITWIDTH_32;

    // About these trigger levels: The TX and RX FIFOs are each 64 bits. The 
    // purpose of the trigger levels is to ensure that when the DMA writes to or
    // reads from the FIFOs there is enough data to read and enough space to
    // write. If we don't ensure this various wacky things happen. The relevent
    // number is the number of bytes the DMA writes/reads at once. This is equal
    // to the data width (chosen above) times the DMA burst count. In the DMA
    // driver we have the burst count set at 2. 
    // 
    // For the TX trigger level there is an additional complication that the
    // TX FIFO seems to have four spare bytes in it. This makes the general
    // formula for an acceptable tx trigger level:
    //            trig_level = 64 + 4 - data_width * burst_cnt.
    // Right now we set the trigger level to 32 which will work for a wide range
    // of values.
    module.spi->regs.fcr.tx_trig_level = 0x20;
    // The RX trigger level should match the data from one transaction:
    //             trig_level = data_width * burst_cnt.
    // Here I set it to 8 to match the current 32 bit width set above. If you
    // don't set the trig_level exactly then there might be data left in the 
    // FIFO after the transaction is complete. Currently we always clear that
    // out but if it matters it is something to keep in mind.
    module.spi->regs.fcr.rx_trig_level = 0x08;

    // make sure that the DMA is triggered properly
    module.spi->regs.fcr.tx_drq_en = 1;
    module.spi->regs.fcr.rx_drq_en = 1;

    // begin the transfers
    module.spi->regs.isr.tx_complete = 1;
    module.spi->regs.tcr.dhb = serialize; // don't receive during tx period if we are serialized
    module.spi->regs.tcr.start_burst = 1;

    // Dispatch the DMA requests to fill/empty the FIFO buffers
    if (rx_len > 0) {
        dma_endpoint_t from_spi = dma_create_endpoint(DRQ_TYPE_SPI1, &module.spi->regs.rxd.b8);
        dma_endpoint_t to_mem   = dma_create_endpoint(DRQ_TYPE_DRAM, rx);
        dev->trans_id_rx        = dma_transfer(from_spi, to_mem, DATA_WIDTH, rx_len);
        dev->has_rx = true;
    }
    if (tx_len > 0) {
        dma_endpoint_t from_mem = dma_create_endpoint(DRQ_TYPE_DRAM, tx);
        dma_endpoint_t to_spi   = dma_create_endpoint(DRQ_TYPE_SPI1, &module.spi->regs.txd.b8);
        dma_transfer(from_mem, to_spi, DATA_WIDTH, tx_len);
        dev->has_tx = true;
    }
    module.generation++;
    dev->generation = module.generation;

    // remember this device so we can clean it up later
    module.transferring_device = dev;
}

static void cleanup_from_dma(void) {
    // disable the DMA again
    module.spi->regs.fcr.tx_drq_en = 0;
    module.spi->regs.fcr.rx_drq_en = 0;

    // clear the rx fifo in case the dma didn't clear it
    while(module.spi->regs.fsr.rx_fifo_cnt > 0) module.spi->regs.rxd.b8;

    module.transferring_device = NULL;
}

static void small_transfer_manual(const uint8_t *tx, int tx_len, uint8_t *rx, int rx_len, bool serialize) {
    // the max length is the total number of bursts the SPI controller will do
    module.spi->regs.mbc = serialize ? tx_len + rx_len : MAX(tx_len, rx_len);

    // after we have depleted the data in the tx buffer we send dummy data.
    module.spi->regs.mtc     = tx_len;
    module.spi->regs.bcc.stc = tx_len;

    // enqueue the tx data in the tx fifo
    for (int i = 0; i < tx_len; i++) {
        module.spi->regs.txd.b8 = tx[i];
    }

    // begin the transfer
    module.spi->regs.isr.tx_complete = 1;
    module.spi->regs.tcr.dhb = serialize; // don't receive during tx period if we are serialized
    module.spi->regs.tcr.start_burst = 1;

    // busy wait for the transfer to complete.
    while (!module.spi->regs.isr.tx_complete);

    for (int i = 0; i < rx_len; i++) {
        rx[i] = module.spi->regs.rxd.b8; // dequeue rx fifo
    }
}

#define FIFO_MAX 63

void spid_transfer_nb(
    spi_device_t *dev, 
    const uint8_t *tx, int tx_len, 
    uint8_t *rx, int rx_len, 
    bool serialize
) {
    if (module.spi == NULL) error("spi_init() has not been called!\n");
    assert(dev != NULL);

    if (module.transferring_device) {
        if (!spid_transfer_completed(module.transferring_device)) {
            error("attempted new SPI transfer while previous transfer still underway.\n"
                  "hint: use spi_await_bus_free to wait for your previous non-blocking transfers to complete."
            );
        }

        deactivate_device(module.transferring_device);
        cleanup_from_dma();
    }

    activate_device(dev);

    if (tx_len > FIFO_MAX || rx_len > FIFO_MAX) {
        large_transfer_dma_nb(dev, tx, tx_len, rx, rx_len, serialize);
    } else {
        small_transfer_manual(tx, tx_len, rx, rx_len, serialize);
        deactivate_device(dev);
    }
}

bool spid_transfer_completed(spi_device_t *dev) {
    if (module.spi == NULL) error("spi_init() has not been called!\n");

    if (!dev) return true;

    // The driver has moved on to a later transfer so we must be done.
    if (dev->generation < module.generation)
        return true;

    // The transfer is complete if we have both finished sending all the bytes
    // and the DMA has finished copying out all the bytes.
    return (!dev->has_tx || module.spi->regs.isr.tx_complete) &&
           (!dev->has_rx || dma_transfer_completed(dev->trans_id_rx));
}

bool spid_is_bus_free(void) {
    if (module.spi == NULL) error("spi_init() has not been called!\n");

    return spid_transfer_completed(module.transferring_device);
}