CS107e library source files

/*
 * Functions to manage D1 clocks
 *
 * Author: Julie Zelenski <zelenski@cs.stanford.edu>
 * Sat Sep  7 11:08:33 PDT 2024
 */

#include "ccu.h"
#include "assert.h"
#include <stddef.h>
#include "printf.h"
#include "timer.h"

/*
 * Models the D-1 clock tree as diagrammed on p.39 of D-1 manual
 * (with some simplifications to support clocks we use)
 * See debug info below for more info on what is modeled.
 */

static long debug_rate_pll(pll_id_t id);
static long debug_rate_clk(module_clk_id_t id);
static long debug_rate_bgr(bgr_id_t id);

typedef union {
    struct {
        uint32_t factor_m0  : 1;
        uint32_t factor_m1  : 1;
        uint32_t            : 6;
        uint32_t factor_n   : 8;
        uint32_t factor_p   : 6;
        uint32_t            : 5;
        uint32_t output_ena : 1;
        uint32_t locked     : 1;
        uint32_t lock_ena   : 1;
        uint32_t ldo_ena    : 1;
        uint32_t ena        : 1;
    };
    uint32_t bits;
} pll_reg_t;

typedef union {
    struct {
        uint32_t factor_m   : 5;
        uint32_t            : 3;
        uint32_t factor_n   : 2;
        uint32_t            : 14;
        uint32_t src        : 3;
        uint32_t            : 4;
        uint32_t ena        : 1;
    };
    uint32_t bits;
} module_clk_reg_t;

typedef struct {
    union {
        uint8_t raw[0xD10];
        uint32_t regs[1];
    } ;
} ccu_t;

#define CCU_BASE ((ccu_t *)0x02001000)
_Static_assert(&CCU_BASE->raw[0]                ==  (uint8_t *)0x02001000, "CCU pll cpu reg must be at address 0x02001000");
_Static_assert(&CCU_BASE->regs[0]               == (uint32_t *)0x02001000, "CCU pll cpu reg must be at address 0x02001000");
_Static_assert(&CCU_BASE->raw[CCU_APB0_CLK_REG] ==  (uint8_t *)0x02001520, "CCU apb0 reg must be at address 0x02001520");

static volatile ccu_t *const module = CCU_BASE;

static volatile uint32_t *reg_for_id(uint32_t raw_offset) {
    assert(raw_offset % 4 == 0);                // reject if not on 4-byte boundary
    assert(raw_offset < sizeof(module->raw));   // reject if not in range
    return (uint32_t *)(module->raw + raw_offset);
}

// Procedure to update PLL from p46 of D-1 user manual
// NOTE: code explicitly sets and clears bits using bitwise ops!
// This ensures it acts exactly as required in spec
// (not using bitfields, gcc-generated code can vary)
static void update_pll_bits(volatile uint32_t *reg, uint32_t factor_mask, uint32_t factor_bits) {
    const uint32_t PLL_ENA =  (1 << 31);
    const uint32_t LOCK_ENA = (1 << 29);
    const uint32_t LOCKED =   (1 << 28);
    const uint32_t OUT_ENA =  (1 << 27);

    *reg |= PLL_ENA;            // enable PLL
    *reg &= ~OUT_ENA;           // disable output while changing
    *reg = (*reg & ~factor_mask) | factor_bits;   // clear previous factors & apply new
    *reg |= LOCK_ENA;           // lock enable
    while (! (*reg & LOCKED))   // wait until lock
       ;
    timer_delay_us(50);         // short delay to stabilize
    *reg |= OUT_ENA;            // re-enable output
}

long ccu_config_pll_dividers(pll_id_t id, uint32_t n, uint32_t m) {
    const pll_reg_t factor_mask = { .factor_m1 = 1,   .factor_n = 255 };
    pll_reg_t new_factors =       { .factor_m1 = m-1, .factor_n = n-1 };
    assert(n >= 13 && n <= 255); // confirm dividers within spec
    assert(m >= 1 && m <= 2);
    update_pll_bits(reg_for_id(id), factor_mask.bits, new_factors.bits);
    return debug_rate_pll(id);
}

long ccu_config_pll_audio0(uint32_t n, uint32_t p, uint32_t m1, uint32_t m0) {
    const pll_reg_t factor_mask = { .factor_m1 = 1,    .factor_n = 255, .factor_p = 63,  .factor_m0 = 1 };
    pll_reg_t new_factors =       { .factor_m1 = m0-1, .factor_n = n-1, .factor_p = p-1, .factor_m0 = m0-1 };
    pll_id_t id = CCU_PLL_AUDIO0_CTRL_REG;
    assert(p >= 1 && p <= 64);  // confirm dividers within spec
    assert(n >= 13 && n <= 255);
    assert(m0 >= 1 && m0 <= 2 && m1 >= 1 && m1 <= 2);
    uint32_t n_div_m0_m1 = n/m0/m1;
    assert(n_div_m0_m1 >= 8 && n_div_m0_m1 <= 125); // required to keep out freq within range 180M-3G
    update_pll_bits(reg_for_id(id), factor_mask.bits, new_factors.bits);
    return debug_rate_pll(id);
}

/* From p47 D-1 user manual:
 * Configure the clock source and frequency division factor first,
 * and then release the clock gating (that is, set enable to 1)
 */
static void update_clock_bits(volatile uint32_t *reg, uint32_t bits) {
    const uint32_t ENA = (1 << 31);
    *reg &= ~ENA;   // disable clock during change
    *reg = bits;
    *reg |= ENA;    // re-enable
}

long ccu_config_module_clock(module_clk_id_t id, uint32_t src, uint32_t factor_n, uint32_t factor_m) {
    module_clk_reg_t new_settings = { .src= src, .factor_n= factor_n, .factor_m= factor_m };
    assert(src <= 7);
    assert(factor_n <= 3 && factor_m <= 31);
    update_clock_bits(reg_for_id(id), new_settings.bits);
    return debug_rate_clk(id);
}

/*
 *  From p47 D-1 user manual:
 *  For the Bus Gating Reset register of a module, the reset bit is de-asserted first,
 *  and then the clock gating bit is enabled to avoid potential problems
 *  caused by the asynchronous release of the reset signal.
 */
long ccu_ungate_bus_clock_bits(bgr_id_t id, uint32_t gating_bits, uint32_t reset_bits) {
    volatile uint32_t *reg = reg_for_id(id);
    *reg |= reset_bits;      // de-assert reset
    *reg |= gating_bits;     // enable
    return debug_rate_bgr(id);
}

// most bus clocks use standard bits for reset/gate
// general function above allow other use cases
long ccu_ungate_bus_clock(bgr_id_t id) {
    const uint32_t standard_gating_bits = 1 << 0;
    const uint32_t standard_reset_bits = 1 << 16;
    return ccu_ungate_bus_clock_bits(id, standard_gating_bits, standard_reset_bits);
}

/****  DEBUG INFO from here down ***/

typedef enum {
    NOT_IN_MODEL = 0,
    PARENT_HOSC = 1,
    PARENT_32K,
    PARENT_DDR,
    PARENT_PERI,
    PARENT_PERI_2X,
    PARENT_VIDEO0,
    PARENT_VIDEO0_4X,
    PARENT_VIDEO1,
    PARENT_VIDEO1_4X,
    PARENT_AUDIO0,
    PARENT_AUDIO1,
    PARENT_AUDIO1_DIV5,
    PARENT_AHB0,
    PARENT_APB0,
    PARENT_APB1,
    PARENT_PSI,
} parent_id_t;

struct debug_info {
    long (*fn)(uint32_t);
    uint32_t reg_id;
    const char *name;
    parent_id_t parents[4];
};

#define STRINGIFY(x) #x
#define INFO_PLL(x) debug_rate_pll, x, STRINGIFY(x)
#define INFO_CLK(x) debug_rate_clk, x, STRINGIFY(x)
#define INFO_BGR(x) debug_rate_bgr, x, STRINGIFY(x)

static struct debug_info info_table[] = {
    { .name= "PLL" },
    { INFO_PLL(CCU_PLL_CPU_CTRL_REG)    },
    { INFO_PLL(CCU_PLL_DDR_CTRL_REG)    },
    { INFO_PLL(CCU_PLL_PERI_CTRL_REG)   },
    { INFO_PLL(CCU_PLL_VIDEO0_CTRL_REG) },
    { INFO_PLL(CCU_PLL_VIDEO1_CTRL_REG) },
    { INFO_PLL(CCU_PLL_VE_CTRL_REG)     },
    { INFO_PLL(CCU_PLL_AUDIO0_CTRL_REG) },
    { INFO_PLL(CCU_PLL_AUDIO1_CTRL_REG) },
    { .name= "Module Clock" },       // parent not listed defaults to NOT_IN_MODEL
    { INFO_CLK(CCU_PSI_CLK_REG),      {PARENT_HOSC, PARENT_32K, NOT_IN_MODEL, PARENT_PERI} },
    { INFO_CLK(CCU_APB0_CLK_REG),     {PARENT_HOSC, PARENT_32K, PARENT_PSI, PARENT_PERI} },
    { INFO_CLK(CCU_APB1_CLK_REG),     {PARENT_HOSC, PARENT_32K, PARENT_PSI, PARENT_PERI} },
    { INFO_CLK(CCU_DRAM_CLK_REG),     {PARENT_DDR, NOT_IN_MODEL, PARENT_PERI_2X} },
    { INFO_CLK(CCU_DE_CLK_REG),       {PARENT_PERI_2X, PARENT_VIDEO0_4X, PARENT_VIDEO1_4X} },
    { INFO_CLK(CCU_TCONTV_CLK_REG),   {PARENT_VIDEO0, PARENT_VIDEO0_4X } },
    { INFO_CLK(CCU_HDMI_24M_CLK_REG), {PARENT_HOSC} },
    { INFO_CLK(CCU_SPI0_CLK_REG),     {PARENT_HOSC, PARENT_PERI, PARENT_PERI_2X } },
    { INFO_CLK(CCU_I2S2_CLK_REG),     {PARENT_AUDIO0, NOT_IN_MODEL, NOT_IN_MODEL, PARENT_AUDIO1_DIV5} },
    { .name= "Bus Clock" },
    { INFO_BGR(CCU_DE_BGR_REG),       {PARENT_AHB0} },
    { INFO_BGR(CCU_DPSS_TOP_BGR_REG), {PARENT_AHB0} },
    { INFO_BGR(CCU_HDMI_BGR_REG),     {PARENT_AHB0} },
    { INFO_BGR(CCU_TCONTV_BGR_REG),   {PARENT_AHB0} },
    { INFO_BGR(CCU_DMA_BGR_REG),      {PARENT_AHB0} },
    { INFO_BGR(CCU_HSTIMER_BGR_REG),  {PARENT_AHB0} },
    { INFO_BGR(CCU_PWM_BGR_REG),      {PARENT_APB0} },
    { INFO_BGR(CCU_UART_BGR_REG),     {PARENT_APB1} },
    { INFO_BGR(CCU_I2S_BGR_REG),      {PARENT_APB0} },
    { INFO_BGR(CCU_TWI_BGR_REG),      {PARENT_APB1} },
    { INFO_BGR(CCU_SPI_BGR_REG),      {PARENT_APB1} },
    {0},
  };

static struct debug_info *info_for_id(uint32_t id) {
    for (struct debug_info *i = info_table; i->name; i++) {
        if (!i->fn) continue;
        if (i->reg_id == id) {
            return i;
        }
    }
    printf("No such reg id 0x%x\n", id);
    assert(0);
    return NULL;
}

void ccu_debug_show_clocks(const char *label) {
    printf("\n++++++++ CCU clock debug (%s) ++++++++\n", label);
    for (struct debug_info *i = info_table; i->name; i++) {
        if (!i->fn) {
            printf("\n        Rate  %s\n", i->name);
            continue;
        }
        long rate = i->fn(i->reg_id);
        if (rate != 0) printf("%12ld  %s\t raw=[%08x]\n", rate, i->name, *reg_for_id(i->reg_id));
    }
}

static long debug_rate_parent(parent_id_t id) {
    int mult = 1, div = 1;
    switch (id) {
        case NOT_IN_MODEL:      return -1;
        case PARENT_HOSC:       return 24*1000*1000;
        case PARENT_32K:        return 32768;
        case PARENT_DDR:        return debug_rate_pll(CCU_PLL_DDR_CTRL_REG);
        case PARENT_PERI_2X:           mult = 2; // *** fallthrough
        case PARENT_PERI:       return mult*debug_rate_pll(CCU_PLL_PERI_CTRL_REG);
        case PARENT_VIDEO0_4X:         mult = 4; // *** fallthrough
        case PARENT_VIDEO0:     return mult*debug_rate_pll(CCU_PLL_VIDEO0_CTRL_REG);
        case PARENT_VIDEO1_4X:         mult = 4; // *** fallthrough
        case PARENT_VIDEO1:     return mult*debug_rate_pll(CCU_PLL_VIDEO1_CTRL_REG);
        case PARENT_AUDIO0:     return debug_rate_pll(CCU_PLL_AUDIO0_CTRL_REG);
        case PARENT_AUDIO1_DIV5:       div = 5; // *** fallthrough
        case PARENT_AUDIO1:     return debug_rate_pll(CCU_PLL_AUDIO1_CTRL_REG) / div;
        case PARENT_APB0:       return debug_rate_clk(CCU_APB0_CLK_REG);
        case PARENT_APB1:       return debug_rate_clk(CCU_APB1_CLK_REG);
        case PARENT_AHB0:       return debug_rate_clk(CCU_PSI_CLK_REG);
        case PARENT_PSI:        return debug_rate_clk(CCU_PSI_CLK_REG);
    }
    return -1;
}

static long debug_rate_pll(pll_id_t id) {
    pll_reg_t pll;
    pll.bits = *reg_for_id(id);
    if (!pll.ena || !pll.output_ena) return 0;
    int p = pll.factor_p+1, n = pll.factor_n+1, m1 = pll.factor_m1+1, m0 = pll.factor_m0+1;
    long parent_rate = debug_rate_parent(PARENT_HOSC);

    switch (id) {
        case CCU_PLL_PERI_CTRL_REG:
        case CCU_PLL_VIDEO0_CTRL_REG:
        case CCU_PLL_VIDEO1_CTRL_REG:
            return parent_rate*n/m1/4;
        case CCU_PLL_AUDIO0_CTRL_REG:
            return parent_rate*n/m1/m0/p/4;
        default:
            return parent_rate*n/m1/m0;
    }
}

static long debug_rate_clk(module_clk_id_t id) {
    module_clk_reg_t clk;
    clk.bits = *reg_for_id(id);
    if (id > CCU_APB1_CLK_REG && !clk.ena) return 0; // cheezy (ena bits not applicable for psi/apb?)
    int n = 1 << clk.factor_n;
    int m = clk.factor_m + 1;
    struct debug_info *i = info_for_id(id);
    parent_id_t parent = i->parents[clk.src];
    return debug_rate_parent(parent)/n/m;
}

static long debug_rate_bgr(bgr_id_t id) {
    uint32_t val = *reg_for_id(id);
    struct debug_info *i = info_for_id(id);
    return (val & 0xff) ? debug_rate_parent(i->parents[0]) : 0;
}

/*
 * File: cstart.c
 * --------------
 *
 * Author: Julie Zelenski <zelenski@cs.stanford.edu>
 */

#include "mango.h"
#include "strings.h"

extern void main(void);
void _cstart(void);
void *sys_memset(void *s, int c, size_t n);

// The C function _cstart is called from the assembly in start.s
// _cstart zeroes out the BSS section and then calls the main function
void _cstart(void) {
    // linker script memmap.ld places symbols to mark bss boundaries
    extern char __bss_start, __bss_end;
    sys_memset(&__bss_start, 0, &__bss_end - &__bss_start);

    mango_actled(LED_ON);   // turn on blue onboard LED while executing main
    main();
    mango_reboot();         // reset the Pi if main() completed successfully
}

/*
 * Module to control DisplayEngine 2.0 peripheral on Mango Pi
 * 
 * Author: Julie Zelenski <zelenski@cs.stanford.edu>
 * Feb 2024
 */

#include "de.h"
#include "assert.h"
#include "timer.h"

// Display Engine 2.0

typedef struct {
    uint32_t width  :13;
    uint32_t        :3;
    uint32_t height :13;
    uint32_t        :3;
} de_size_t;

// minimal layout of DE device reigsters to init & configure mixer/blender/ui layer
typedef union {
    struct {
        uint32_t sclk_gate;
        uint32_t hclk_gate;
        uint32_t ahb_reset;
        uint32_t sclk_div; // 4 bits per module
        uint32_t de2tcon_mux; // @[0] swap mixer/tcon
    } regs;
} de_t;

typedef union {
    struct {
        uint32_t glb_ctl;
        uint32_t glb_sts;
        uint32_t glb_dbuffer;
        de_size_t glb_size;
    } regs;
} de_mixer_t;

typedef union {
    struct {
        uint32_t pipe_ctrl;
        struct {
            uint32_t fill_color;
            de_size_t input_size;
            uint32_t offset;
            uint32_t reserved;
        } pipe[4];
        // not sure re: # of pipes? (D1 manual says 4 in one place and 2 in another)
        // we are currently only using pipe [1]
        uint32_t reserved[15];
        uint32_t route;
        uint32_t premultiply;
        uint32_t background_color;
        de_size_t output_size;
    } regs;
} de_blender_t;

typedef union {
    struct {
        struct {
            uint32_t attr_ctrl;
            de_size_t size;
            uint32_t offset;
            uint32_t pitch_nbytes;
            uint32_t top_laddr;
            uint32_t bot_laddr;
            uint32_t fill_color;
            uint32_t reserved;
        } layer[4];
        uint32_t top_haddr;
        uint32_t bot_haddr;
        de_size_t overlay_size;
    } regs;
} de_ui_t;

typedef union {
    struct {
        uint32_t ctrl;
        uint32_t status;
        uint32_t field_ctrl;
        uint32_t bist;
        uint32_t reservedA[12];
        de_size_t output_size;
        uint32_t reservedB[15];
        de_size_t input_size;
        uint32_t reservedC;
        uint32_t horiz_step;
        uint32_t vert_step;
        uint32_t horiz_phase;
        uint32_t reservedD;
        uint32_t vert_phase[2];
        uint32_t reservedE[88];
        uint32_t horiz_coeff[16];
    } regs;
} de_scaler_t;

#define DE_BASE          ((de_t *)0x5000000)
#define DE_MIXER0  ((de_mixer_t *)0x5100000)
#define DE_BLD0  ((de_blender_t *)0x5101000)
#define DE_UI_CH1     ((de_ui_t *)0x5103000)
#define DE_SCALER ((de_scaler_t *)0x5140000)

_Static_assert(&(DE_BASE->regs.de2tcon_mux)    == (uint32_t *)0x5000010, "de de2tcon_mux reg must be at address 0x5000010");
_Static_assert(&(DE_MIXER0->regs.glb_dbuffer)  == (uint32_t *)0x5100008, "de mixer0 glb_dbuffer reg must be at address 0x5100008");
_Static_assert(&(DE_BLD0->regs.pipe[1].offset) == (uint32_t *)0x510101c, "de blender0 pipe[1] offset reg must be at address 0x510101c");
_Static_assert(&(DE_UI_CH1->regs.top_haddr)    == (uint32_t *)0x5103080, "de ui ch1 topaddr reg must be at address 0x5103080");
_Static_assert(&(DE_SCALER->regs.horiz_step)   == (uint32_t *)0x5140088, "de scaler horiz step reg must be at address 0x5140088");

static struct {
    volatile de_t *de;
    volatile de_mixer_t * de_mixer0;
    volatile de_blender_t * de_bld0;
    volatile de_ui_t * de_ui_ch1;
    volatile de_scaler_t * de_scaler;
} const module = {
     .de        = DE_BASE,
     .de_mixer0 = DE_MIXER0,
     .de_bld0   = DE_BLD0,
     .de_ui_ch1 = DE_UI_CH1,
     .de_scaler = DE_SCALER,
};

static void de_config_mixer0(de_size_t full_screen);
static void de_config_blender0(de_size_t full_screen);
static void de_config_ui_ch1(de_size_t fb_size, de_size_t full_screen);
static void de_config_ui_scaler(de_size_t fb_size, de_size_t full_screen);

// Simplest possible init of DE2 to config for dispay of single framebuffer
// Key references:
//      DisplayEngine 2.0 spec https://linux-sunxi.org/images/7/7b/Allwinner_DE2.0_Spec_V1.0.pdf
//      https://linux-sunxi.org/DE2_Register_Guide
void de_init(int fb_width, int fb_height, int screen_width, int screen_height) {
    // top-level reset, ungate clocks
    module.de->regs.ahb_reset = module.de->regs.sclk_gate = module.de->regs.hclk_gate = 1;  // 1 to ungate mixer0

    if (fb_width > screen_width || fb_height > screen_height)
        error("de_init(): requested framebuffer size does not fit on screen");

   // de_size_t registers are slightly wacky: actual width/height = (stored value + 1)
    de_size_t full_screen = {.width= screen_width-1, .height= screen_height-1};
    de_size_t fb_size = {.width= fb_width-1, .height= fb_height-1};

    de_config_mixer0(full_screen);
    de_config_blender0(full_screen);
    de_config_ui_ch1(fb_size, full_screen);
}

void de_set_active_framebuffer(void *addr) {
    module.de_ui_ch1->regs.layer[0].attr_ctrl &= ~(1 << 4); // disable fill
    uintptr_t full_address = (uintptr_t)addr;
    uint32_t low_addr = full_address & 0xffffffff;
    assert((uintptr_t)low_addr == full_address); // confirm address fits in 32 bits
    module.de_ui_ch1->regs.layer[0].top_laddr = low_addr;
    timer_delay_ms(10);  // resync delay
}

// DE Mixer block is a pipeline: framebuffer(s) -> overlay channel(s) -> (optional scaler) -> blender -> output to TCON
// Mixer-0 more full featured (1 video channel, 3 UI overlay)
// Mixer-1 only has 1 video + 1 UI
static void de_config_mixer0(de_size_t full_screen) {
    module.de_mixer0->regs.glb_ctl = 1; // enable mixer 0
    module.de_mixer0->regs.glb_size = full_screen;
}

// DE Blender pairwise composites 2 overlay channels together. Three separate blenders allow blending 4 channels.
// We use only blender 0 with single UI channel.
static void de_config_blender0(de_size_t full_screen) {
    // #warning TODO TEMPORARY: setting blender background to magenta
    // module.de_bld0->regs.background_color = 0xff00ff;
    module.de_bld0->regs.output_size = full_screen;
    uint32_t pipe_index = 1;  // use pipe index 1, route for first ui layer (ch1)
    module.de_bld0->regs.pipe_ctrl = ((1 << pipe_index) << 8); // enable pipe index 1
    module.de_bld0->regs.pipe[pipe_index].input_size = full_screen;
    module.de_bld0->regs.route = 0x3210; // channels 0-3, each channel routed to pipe at corresponding index
}

enum format_t
   { ARGB_8888 = 0x0, ABGR_8888 = 0x1, RGBA_8888 = 0x2, BGRA_8888 = 0x3,
     XRGB_8888 = 0x4, XBGR_8888 = 0x5, RGBX_8888 = 0x6, BGRX_8888 = 0x7,
      RGB_888  = 0x8,  BGR_888  = 0x9,  RGB_565  = 0xa,  BGR_565  = 0xb,
     ARGB_4444 = 0xc, ABGR_4444 = 0xd, RGBA_4444 = 0xe, BGRA_4444 = 0xf };

// DE UI Overlay represents a single framebuffer. Mixer0 has three UI overlay channels, we use only channel 1.
// An optional UI scaler can be used to up/downscale from framebuffer input on route to blender.
static void de_config_ui_ch1(de_size_t fb_size, de_size_t full_screen) {
    // default alpha @[24], top-addr-only @[23] no premul @[16] format @[8] enable fill @[4] use global alpha @[1] enable @[0]
    uint32_t features = (0xff << 24) | (0 << 23) | (0 << 16) | (XRGB_8888 << 8) | (1 << 4)  | (1 << 1) | (1 << 0);
    module.de_ui_ch1->regs.layer[0].attr_ctrl = features;
    module.de_ui_ch1->regs.layer[0].size = fb_size;
    module.de_ui_ch1->regs.layer[0].offset = 0; // position @ top left corner of output
    module.de_ui_ch1->regs.layer[0].pitch_nbytes = (fb_size.width+1) * 4; // 4 bytes (32 bits) per pixel
    module.de_ui_ch1->regs.overlay_size = fb_size;
    // #warning TODO TEMPORARY: setting ui layer background to yellow
    // module.de_ui_ch1->regs.layer[0].fill_color = 0xffff00;
    de_config_ui_scaler(fb_size, full_screen);   // will center on screen and apply scaler if necessary
}

// From DE2 docs:
// UI scaler supports 1/16x downscale to 32x upscale
// horizontal scaler is 16-phase 4-tap anti-aliasing filter
// vertical scaler is 16-phase linear filter

// return ratio in/out as count of 32nds (round up)
static int scale_factor(int in, int out) {
    return ((in * 32) + out - 1)/out; // round up to nearest 32nd (minimum scale for DE2)
}

static int compute_scale_step(de_size_t fb_size, de_size_t full_screen, de_size_t *p_scaled_size, unsigned int *p_offset) {
    int screen_width = full_screen.width+1;
    int screen_height = full_screen.height+1;
    int fb_width = fb_size.width+1;
    int fb_height = fb_size.height+1;

    int horiz_f = scale_factor(fb_width, screen_width);
    int vert_f = scale_factor(fb_height, screen_height);
    // force square pixels, use larger of two scale factors
    int scale_f = (horiz_f > vert_f) ? horiz_f : vert_f;  // in 32nds
    // JDZ: for now, only apply scaling if at least 2x (scale to partial pixels has poor result)
    // otherwise do not scale, center orig rect on screen as-is
    // revisit later, consider whether to use video scaler instead
    if (scale_f > 16) scale_f = 32;
    int output_width = (fb_width*32)/scale_f;
    int output_height = (fb_height*32)/scale_f;
    *p_scaled_size = (de_size_t){.width= output_width - 1, .height= output_height - 1};
    int margin_x = (screen_width - output_width)/2;
    int margin_y = (screen_height - output_height)/2;
    *p_offset = (margin_y << 16) | margin_x; // position in center
    return scale_f << 15; // scale factor stored as x.15 fixed point (only 5 bits of fraction used tho')
}

// DE UI Scaler used to upscale a framebuffer before feeding into blender pipe
// scaler used when frame buffer size is < 1/2 of screen size
static void de_config_ui_scaler(de_size_t fb_size, de_size_t full_screen) {
    de_size_t scaler_output_size;
    uint32_t center_offset;
    int step = compute_scale_step(fb_size, full_screen, &scaler_output_size, &center_offset);
    module.de_bld0->regs.pipe[1].offset = center_offset; // position in center
    if (step == 0x100000) {
        module.de_scaler->regs.ctrl = 0;    // disable scaler
        module.de_bld0->regs.pipe[1].input_size = fb_size; //  ui layer is direct input to blender pipe 1
    } else {
        module.de_scaler->regs.ctrl = 1;    // enable scaler
        module.de_scaler->regs.horiz_phase = 1 << 20;   // correct for first pixel
        module.de_scaler->regs.horiz_step = module.de_scaler->regs.vert_step = step;
        module.de_scaler->regs.input_size = fb_size; // ui layer is input to scaler
        module.de_scaler->regs.output_size = scaler_output_size;
        module.de_bld0->regs.pipe[1].input_size = scaler_output_size; // scaler output is input to blender pipe 1

        // UI scaler works line by line, array of coeff controls of 4-tap blend within line (horiz)
        // below I am setting coeffs to 0,0,0,64 to replicate right pixel of each 4-tap (hard cutoff)
        // note that vert is fixed linear scale of taps without configurable control (fuzzy instead of crisp...)
        // if this ends being unacceptable, could switch to video scaler which has controls for both horiz and vert
        for (int i = 0; i < 16; i++) module.de_scaler->regs.horiz_coeff[i] = 0x40;
        module.de_scaler->regs.ctrl |= (1 << 4); // apply coefficients
    }
}

/*
 * Access to font pixel data stored as a bitmap.
 *
 * Author: Philip Levis <pal@cs.stanford.edu>
 * Last modified: 3/17/16
 */

#include "font.h"

static const struct font {
    unsigned char first_char, last_char;
    int glyph_width, glyph_height;
    uint8_t pixel_data[];
} apple_II_font;

static struct {
    const struct font *font;
} const module = {
    .font = &apple_II_font,
};

int font_get_glyph_height(void) {
    return module.font->glyph_height;
}

int font_get_glyph_width(void) {
    return module.font->glyph_width;
}

int font_get_glyph_size(void) {
    return font_get_glyph_width() * font_get_glyph_height();
}

/*
 * Extract glyph pixels for requested character from font bitmap.
 * Read bits from font bitmap and store into array of bytes, one byte per pixel.
 * Use 0xff byte for 'on' pixel, 0x0 for 'off' pixel.
 */
bool font_get_glyph(char ch, uint8_t buf[], size_t buflen) {
    if ((ch != ' ' && (ch < module.font->first_char || ch > module.font->last_char)) || (buflen != font_get_glyph_size())) {
        return false;
    }

    if (ch == ' ') { // Handle space as special case, return all-off image
        for (int i = 0; i < buflen; i++) {
            buf[i] = 0;
        }
    } else {
        int index = 0;
        int nbits_in_row = (module.font->last_char - module.font->first_char + 1) * font_get_glyph_width();
        int x_offset = (ch - module.font->first_char);
        for (int y = 0; y < font_get_glyph_height(); y++) {
            for (int x = 0; x < font_get_glyph_width(); x++) {
                int bit_index = y * nbits_in_row + x_offset * font_get_glyph_width() + x;
                int bit_start = bit_index / 8;
                int bit_offset = bit_index % 8;
                // extract single bit for this pixel from bitmap
                int val = module.font->pixel_data[bit_start] & (1 << (7 - bit_offset));
                // use 0xff for on pixel, 0x0 for off pixel
                buf[index++] = val != 0 ? 0xFF : 0x00;
            }
        }
    }
    return true;
}

/*
 * Apple II font stored as a bitmap.
 * Each character is 14 bits wide and 16 bits tall (long story
 * about some C conversion).
 *
 * Generated from a screenshot of the original font, turned into
 * a 32-bit color C structure with GIMP, then turned into a bitmap
 * C structure with a simple C program.
 */
static const struct font apple_II_font = {
    .first_char = 0x21, .last_char = 0x7F,
    .glyph_width = 14, .glyph_height = 16,
    .pixel_data = {
        0x03, 0x00, 0x33, 0x00, 0xcc, 0x00, 0xc0, 0x3c,
        0x00, 0x30, 0x00, 0x30, 0x00, 0xc0, 0x03, 0x00,
        0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x03, 0xf0, 0x03, 0x00, 0x3f, 0x03,
        0xff, 0x00, 0x30, 0x3f, 0xf0, 0x0f, 0xc3, 0xff,
        0x03, 0xf0, 0x0f, 0xc0, 0x00, 0x00, 0x00, 0x00,
        0x30, 0x00, 0x00, 0x30, 0x00, 0xfc, 0x03, 0xf0,
        0x03, 0x00, 0xff, 0x00, 0xfc, 0x0f, 0xf0, 0x3f,
        0xf0, 0xff, 0xc0, 0xff, 0x0c, 0x0c, 0x0f, 0xc0,
        0x00, 0xc3, 0x03, 0x0c, 0x00, 0x30, 0x30, 0xc0,
        0xc0, 0xfc, 0x0f, 0xf0, 0x0f, 0xc0, 0xff, 0x00,
        0xfc, 0x0f, 0xfc, 0x30, 0x30, 0xc0, 0xc3, 0x03,
        0x0c, 0x0c, 0x30, 0x30, 0xff, 0xc3, 0xff, 0x00,
        0x00, 0x3f, 0xf0, 0x00, 0x00, 0x00, 0x03, 0x00,
        0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, 0x0c, 0x00,
        0x00, 0x0f, 0x00, 0x00, 0x0c, 0x00, 0x03, 0x00,
        0x03, 0x03, 0x00, 0x03, 0xc0, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0x00,
        0xc0, 0x3f, 0x00, 0x3c, 0xc0, 0x00, 0x00, 0xc0,
        0x0c, 0xc0, 0x33, 0x00, 0x30, 0x0f, 0x00, 0x0c,
        0x00, 0x0c, 0x00, 0x30, 0x00, 0xc0, 0x03, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0xfc, 0x00, 0xc0, 0x0f, 0xc0, 0xff, 0xc0,
        0x0c, 0x0f, 0xfc, 0x03, 0xf0, 0xff, 0xc0, 0xfc,
        0x03, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00,
        0x00, 0x0c, 0x00, 0x3f, 0x00, 0xfc, 0x00, 0xc0,
        0x3f, 0xc0, 0x3f, 0x03, 0xfc, 0x0f, 0xfc, 0x3f,
        0xf0, 0x3f, 0xc3, 0x03, 0x03, 0xf0, 0x00, 0x30,
        0xc0, 0xc3, 0x00, 0x0c, 0x0c, 0x30, 0x30, 0x3f,
        0x03, 0xfc, 0x03, 0xf0, 0x3f, 0xc0, 0x3f, 0x03,
        0xff, 0x0c, 0x0c, 0x30, 0x30, 0xc0, 0xc3, 0x03,
        0x0c, 0x0c, 0x3f, 0xf0, 0xff, 0xc0, 0x00, 0x0f,
        0xfc, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00,
        0x30, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x03,
        0xc0, 0x00, 0x03, 0x00, 0x00, 0xc0, 0x00, 0xc0,
        0xc0, 0x00, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x0f, 0xc0, 0x30, 0x0f,
        0xc0, 0x0f, 0x30, 0x00, 0x00, 0x30, 0x03, 0x30,
        0x0c, 0xc0, 0x3f, 0xc3, 0xc3, 0x0c, 0xc0, 0x03,
        0x00, 0x30, 0x00, 0x0c, 0x0c, 0xcc, 0x03, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0xc0,
        0xc0, 0xf0, 0x0c, 0x0c, 0x00, 0x30, 0x0f, 0x03,
        0x00, 0x03, 0x00, 0x00, 0x30, 0xc0, 0xc3, 0x03,
        0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
        0xc0, 0x30, 0x30, 0xc0, 0xc0, 0xcc, 0x0c, 0x0c,
        0x30, 0x30, 0xc0, 0xc3, 0x00, 0x0c, 0x00, 0x30,
        0x00, 0xc0, 0xc0, 0x30, 0x00, 0x0c, 0x30, 0xc0,
        0xc0, 0x03, 0xcf, 0x0c, 0x0c, 0x30, 0x30, 0xc0,
        0xc3, 0x03, 0x0c, 0x0c, 0x30, 0x30, 0x0c, 0x03,
        0x03, 0x0c, 0x0c, 0x30, 0x30, 0xc0, 0xc3, 0x03,
        0x00, 0x0c, 0x3c, 0x00, 0xc0, 0x00, 0x0f, 0x00,
        0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x0c, 0x00,
        0x00, 0x00, 0x00, 0xc0, 0x00, 0x03, 0x0c, 0x00,
        0x00, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x30, 0x00,
        0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x0f, 0x00, 0x0c, 0x00, 0x3c, 0x0c,
        0xf0, 0x33, 0x30, 0x0c, 0x00, 0xcc, 0x03, 0x30,
        0x0f, 0xf0, 0xf0, 0xc3, 0x30, 0x00, 0xc0, 0x0c,
        0x00, 0x03, 0x03, 0x33, 0x00, 0xc0, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x0c, 0x30, 0x30, 0x3c,
        0x03, 0x03, 0x00, 0x0c, 0x03, 0xc0, 0xc0, 0x00,
        0xc0, 0x00, 0x0c, 0x30, 0x30, 0xc0, 0xc0, 0x00,
        0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x30, 0x0c,
        0x0c, 0x30, 0x30, 0x33, 0x03, 0x03, 0x0c, 0x0c,
        0x30, 0x30, 0xc0, 0x03, 0x00, 0x0c, 0x00, 0x30,
        0x30, 0x0c, 0x00, 0x03, 0x0c, 0x30, 0x30, 0x00,
        0xf3, 0xc3, 0x03, 0x0c, 0x0c, 0x30, 0x30, 0xc0,
        0xc3, 0x03, 0x0c, 0x0c, 0x03, 0x00, 0xc0, 0xc3,
        0x03, 0x0c, 0x0c, 0x30, 0x30, 0xc0, 0xc0, 0x03,
        0x0f, 0x00, 0x30, 0x00, 0x03, 0xc0, 0x00, 0x00,
        0x00, 0x03, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
        0x00, 0x30, 0x00, 0x00, 0xc3, 0x00, 0x00, 0x30,
        0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x03, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x03, 0xc0, 0x03, 0x00, 0x0f, 0x03, 0x3c, 0x0c,
        0xcc, 0x03, 0x00, 0x33, 0x03, 0xff, 0x0c, 0xc0,
        0x00, 0xc0, 0xcc, 0x00, 0x30, 0x0c, 0x00, 0x00,
        0x30, 0x3f, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x0c, 0x0c, 0x3c, 0x03, 0x00, 0x00,
        0xc0, 0x0c, 0x03, 0x30, 0x3f, 0xc0, 0xc0, 0x00,
        0x0c, 0x0c, 0x0c, 0x30, 0x30, 0x0c, 0x00, 0x30,
        0x03, 0x00, 0x3f, 0xf0, 0x03, 0x00, 0x0c, 0x0c,
        0xcc, 0x30, 0x30, 0xc0, 0xc3, 0x00, 0x0c, 0x0c,
        0x30, 0x00, 0xc0, 0x03, 0x00, 0x0c, 0x0c, 0x03,
        0x00, 0x00, 0xc3, 0x30, 0x0c, 0x00, 0x33, 0x30,
        0xf0, 0xc3, 0x03, 0x0c, 0x0c, 0x30, 0x30, 0xc0,
        0xc3, 0x00, 0x00, 0xc0, 0x30, 0x30, 0xc0, 0xc3,
        0x03, 0x03, 0x30, 0x0c, 0xc0, 0x03, 0x03, 0xc0,
        0x03, 0x00, 0x00, 0xf0, 0x0c, 0x00, 0x00, 0x00,
        0x30, 0x0f, 0xc0, 0xff, 0x00, 0xff, 0x03, 0xfc,
        0x0f, 0xc0, 0x30, 0x00, 0xfc, 0x0f, 0xf0, 0x0f,
        0x00, 0x0f, 0x03, 0x03, 0x00, 0xc0, 0x3c, 0xf0,
        0xff, 0x00, 0xfc, 0x0f, 0xf0, 0x0f, 0xf0, 0xcf,
        0xc0, 0xff, 0x0f, 0xf0, 0x30, 0x30, 0xc0, 0xc3,
        0x03, 0x0c, 0x0c, 0x30, 0x30, 0xff, 0xc0, 0xf0,
        0x00, 0xc0, 0x03, 0xc0, 0x00, 0x00, 0xcc, 0x00,
        0xc0, 0x0c, 0xc0, 0xff, 0xc3, 0x30, 0x00, 0x30,
        0x33, 0x00, 0x0c, 0x03, 0x00, 0x00, 0x0c, 0x0f,
        0xc0, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x03, 0x03, 0x0f, 0x00, 0xc0, 0x00, 0x30, 0x03,
        0x00, 0xcc, 0x0f, 0xf0, 0x30, 0x00, 0x03, 0x03,
        0x03, 0x0c, 0x0c, 0x03, 0x00, 0x0c, 0x00, 0xc0,
        0x0f, 0xfc, 0x00, 0xc0, 0x03, 0x03, 0x33, 0x0c,
        0x0c, 0x30, 0x30, 0xc0, 0x03, 0x03, 0x0c, 0x00,
        0x30, 0x00, 0xc0, 0x03, 0x03, 0x00, 0xc0, 0x00,
        0x30, 0xcc, 0x03, 0x00, 0x0c, 0xcc, 0x3c, 0x30,
        0xc0, 0xc3, 0x03, 0x0c, 0x0c, 0x30, 0x30, 0xc0,
        0x00, 0x30, 0x0c, 0x0c, 0x30, 0x30, 0xc0, 0xc0,
        0xcc, 0x03, 0x30, 0x00, 0xc0, 0xf0, 0x00, 0xc0,
        0x00, 0x3c, 0x03, 0x00, 0x00, 0x00, 0x0c, 0x03,
        0xf0, 0x3f, 0xc0, 0x3f, 0xc0, 0xff, 0x03, 0xf0,
        0x0c, 0x00, 0x3f, 0x03, 0xfc, 0x03, 0xc0, 0x03,
        0xc0, 0xc0, 0xc0, 0x30, 0x0f, 0x3c, 0x3f, 0xc0,
        0x3f, 0x03, 0xfc, 0x03, 0xfc, 0x33, 0xf0, 0x3f,
        0xc3, 0xfc, 0x0c, 0x0c, 0x30, 0x30, 0xc0, 0xc3,
        0x03, 0x0c, 0x0c, 0x3f, 0xf0, 0x3c, 0x00, 0x30,
        0x00, 0xf0, 0x00, 0x00, 0x33, 0x00, 0x30, 0x00,
        0x00, 0x0c, 0xc0, 0x3f, 0x00, 0x30, 0x03, 0x00,
        0x00, 0x00, 0xc0, 0x00, 0x03, 0x00, 0xc0, 0x3f,
        0xf0, 0x00, 0x03, 0xff, 0x00, 0x00, 0x03, 0x00,
        0xcc, 0xc0, 0x30, 0x00, 0xf0, 0x03, 0xc0, 0xc3,
        0x00, 0x03, 0x0f, 0xf0, 0x03, 0x00, 0x3f, 0x00,
        0xff, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00,
        0x00, 0x0c, 0x03, 0x00, 0xcf, 0xc3, 0x03, 0x0f,
        0xf0, 0x30, 0x00, 0xc0, 0xc3, 0xfc, 0x0f, 0xf0,
        0x30, 0x00, 0xff, 0xc0, 0x30, 0x00, 0x0c, 0x3c,
        0x00, 0xc0, 0x03, 0x33, 0x0c, 0xcc, 0x30, 0x30,
        0xff, 0x03, 0x03, 0x0f, 0xf0, 0x0f, 0xc0, 0x0c,
        0x03, 0x03, 0x0c, 0x0c, 0x33, 0x30, 0x0c, 0x00,
        0x30, 0x00, 0xc0, 0x3c, 0x00, 0x0c, 0x00, 0x0f,
        0x03, 0x30, 0x00, 0x00, 0x00, 0x00, 0x03, 0x0c,
        0x0c, 0x30, 0x00, 0xc0, 0xc3, 0x03, 0x0f, 0xf0,
        0x30, 0x30, 0xc0, 0xc0, 0x30, 0x00, 0x30, 0x30,
        0xc0, 0x0c, 0x03, 0x33, 0x0c, 0x0c, 0x30, 0x30,
        0xc0, 0xc3, 0x03, 0x0f, 0x00, 0x30, 0x00, 0x30,
        0x03, 0x03, 0x0c, 0x0c, 0x30, 0x30, 0x33, 0x03,
        0x03, 0x00, 0x30, 0x3c, 0x00, 0x0c, 0x00, 0x0f,
        0x00, 0x00, 0x33, 0x30, 0x0c, 0x00, 0x00, 0x03,
        0x30, 0x0f, 0xc0, 0x0c, 0x00, 0xc0, 0x00, 0x00,
        0x30, 0x00, 0x00, 0xc0, 0x30, 0x0f, 0xfc, 0x00,
        0x00, 0xff, 0xc0, 0x00, 0x00, 0xc0, 0x33, 0x30,
        0x0c, 0x00, 0x3c, 0x00, 0xf0, 0x30, 0xc0, 0x00,
        0xc3, 0xfc, 0x00, 0xc0, 0x0f, 0xc0, 0x3f, 0xc0,
        0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x03,
        0x00, 0xc0, 0x33, 0xf0, 0xc0, 0xc3, 0xfc, 0x0c,
        0x00, 0x30, 0x30, 0xff, 0x03, 0xfc, 0x0c, 0x00,
        0x3f, 0xf0, 0x0c, 0x00, 0x03, 0x0f, 0x00, 0x30,
        0x00, 0xcc, 0xc3, 0x33, 0x0c, 0x0c, 0x3f, 0xc0,
        0xc0, 0xc3, 0xfc, 0x03, 0xf0, 0x03, 0x00, 0xc0,
        0xc3, 0x03, 0x0c, 0xcc, 0x03, 0x00, 0x0c, 0x00,
        0x30, 0x0f, 0x00, 0x03, 0x00, 0x03, 0xc0, 0xcc,
        0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x03, 0x0c,
        0x00, 0x30, 0x30, 0xc0, 0xc3, 0xfc, 0x0c, 0x0c,
        0x30, 0x30, 0x0c, 0x00, 0x0c, 0x0c, 0x30, 0x03,
        0x00, 0xcc, 0xc3, 0x03, 0x0c, 0x0c, 0x30, 0x30,
        0xc0, 0xc3, 0xc0, 0x0c, 0x00, 0x0c, 0x00, 0xc0,
        0xc3, 0x03, 0x0c, 0x0c, 0x0c, 0xc0, 0xc0, 0xc0,
        0x0c, 0x0f, 0x00, 0x03, 0x00, 0x03, 0xc0, 0x00,
        0x0c, 0xcc, 0x03, 0x00, 0x00, 0x03, 0xff, 0x00,
        0xcc, 0x0c, 0x00, 0xcc, 0xc0, 0x00, 0x0c, 0x00,
        0x00, 0x30, 0x3f, 0x00, 0x30, 0x00, 0xc0, 0x00,
        0x00, 0x00, 0x00, 0xc0, 0x0f, 0x0c, 0x03, 0x00,
        0x30, 0x00, 0x03, 0x0f, 0xfc, 0x00, 0x30, 0xc0,
        0xc0, 0xc0, 0x0c, 0x0c, 0x00, 0x30, 0x0c, 0x00,
        0x30, 0x03, 0x00, 0x3f, 0xf0, 0x03, 0x00, 0x30,
        0x0c, 0xf0, 0x3f, 0xf0, 0xc0, 0xc3, 0x00, 0x0c,
        0x0c, 0x30, 0x00, 0xc0, 0x03, 0x0f, 0x0c, 0x0c,
        0x03, 0x00, 0x00, 0xc3, 0x30, 0x0c, 0x00, 0x30,
        0x30, 0xc3, 0xc3, 0x03, 0x0c, 0x00, 0x33, 0x30,
        0xcc, 0x00, 0x03, 0x00, 0xc0, 0x30, 0x30, 0xc0,
        0xc3, 0x33, 0x03, 0x30, 0x03, 0x00, 0x30, 0x03,
        0xc0, 0x00, 0x30, 0x00, 0xf0, 0xc0, 0xc0, 0x00,
        0x00, 0x00, 0x0f, 0xf0, 0xc0, 0xc3, 0x00, 0x0c,
        0x0c, 0x3f, 0xf0, 0x30, 0x03, 0x03, 0x0c, 0x0c,
        0x03, 0x00, 0x03, 0x03, 0xf0, 0x00, 0xc0, 0x33,
        0x30, 0xc0, 0xc3, 0x03, 0x0c, 0x0c, 0x30, 0x30,
        0xc0, 0x00, 0xfc, 0x03, 0x00, 0x30, 0x30, 0xc0,
        0xc3, 0x33, 0x00, 0xc0, 0x30, 0x30, 0x0c, 0x00,
        0xf0, 0x00, 0xc0, 0x03, 0xc0, 0x00, 0x00, 0xcc,
        0x00, 0xc0, 0x00, 0x00, 0xff, 0xc0, 0x33, 0x03,
        0x00, 0x33, 0x30, 0x00, 0x03, 0x00, 0x00, 0x0c,
        0x0f, 0xc0, 0x0c, 0x00, 0x30, 0x00, 0x00, 0x00,
        0x00, 0x30, 0x03, 0xc3, 0x00, 0xc0, 0x0c, 0x00,
        0x00, 0xc3, 0xff, 0x00, 0x0c, 0x30, 0x30, 0x30,
        0x03, 0x03, 0x00, 0x0c, 0x03, 0x00, 0x0c, 0x00,
        0xc0, 0x0f, 0xfc, 0x00, 0xc0, 0x0c, 0x03, 0x3c,
        0x0f, 0xfc, 0x30, 0x30, 0xc0, 0x03, 0x03, 0x0c,
        0x00, 0x30, 0x00, 0xc3, 0xc3, 0x03, 0x00, 0xc0,
        0x00, 0x30, 0xcc, 0x03, 0x00, 0x0c, 0x0c, 0x30,
        0xf0, 0xc0, 0xc3, 0x00, 0x0c, 0xcc, 0x33, 0x00,
        0x00, 0xc0, 0x30, 0x0c, 0x0c, 0x30, 0x30, 0xcc,
        0xc0, 0xcc, 0x00, 0xc0, 0x0c, 0x00, 0xf0, 0x00,
        0x0c, 0x00, 0x3c, 0x30, 0x30, 0x00, 0x00, 0x00,
        0x03, 0xfc, 0x30, 0x30, 0xc0, 0x03, 0x03, 0x0f,
        0xfc, 0x0c, 0x00, 0xc0, 0xc3, 0x03, 0x00, 0xc0,
        0x00, 0xc0, 0xfc, 0x00, 0x30, 0x0c, 0xcc, 0x30,
        0x30, 0xc0, 0xc3, 0x03, 0x0c, 0x0c, 0x30, 0x00,
        0x3f, 0x00, 0xc0, 0x0c, 0x0c, 0x30, 0x30, 0xcc,
        0xc0, 0x30, 0x0c, 0x0c, 0x03, 0x00, 0x3c, 0x00,
        0x30, 0x00, 0xf0, 0x00, 0x00, 0x33, 0x00, 0x00,
        0x00, 0x00, 0x0c, 0xc0, 0xff, 0x03, 0x0f, 0x0c,
        0x30, 0x00, 0x00, 0x30, 0x00, 0x0c, 0x0c, 0xcc,
        0x03, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x30,
        0x00, 0xc0, 0xc0, 0x30, 0x0c, 0x00, 0x30, 0x30,
        0x03, 0x03, 0x03, 0x0c, 0x0c, 0x0c, 0x00, 0xc0,
        0xc0, 0x0c, 0x00, 0x00, 0x03, 0x00, 0x0c, 0x00,
        0x00, 0x00, 0xc0, 0x00, 0x00, 0xc0, 0x03, 0x03,
        0x0c, 0x0c, 0x30, 0x30, 0xc0, 0xc3, 0x00, 0x0c,
        0x00, 0x30, 0x30, 0xc0, 0xc0, 0x30, 0x0c, 0x0c,
        0x30, 0xc0, 0xc0, 0x03, 0x03, 0x0c, 0x0c, 0x30,
        0x30, 0xc0, 0x03, 0x0c, 0x0c, 0x30, 0x30, 0x30,
        0x0c, 0x03, 0x03, 0x03, 0x30, 0x3c, 0xf0, 0xc0,
        0xc0, 0x30, 0x0c, 0x00, 0x3c, 0x00, 0x00, 0xc0,
        0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03,
        0x0c, 0x0c, 0x30, 0x00, 0xc0, 0xc3, 0x00, 0x03,
        0x00, 0x0f, 0xf0, 0xc0, 0xc0, 0x30, 0x00, 0x30,
        0x30, 0xc0, 0x0c, 0x03, 0x33, 0x0c, 0x0c, 0x30,
        0x30, 0xff, 0x00, 0xff, 0x0c, 0x00, 0x00, 0x30,
        0x30, 0xc3, 0x0f, 0x03, 0x30, 0x33, 0x30, 0x33,
        0x00, 0xff, 0x03, 0x00, 0x0f, 0x00, 0x0c, 0x00,
        0x3c, 0x00, 0x00, 0x33, 0x30, 0x00, 0x00, 0x00,
        0x03, 0x30, 0x3f, 0xc0, 0xc3, 0xc3, 0x0c, 0x00,
        0x00, 0x0c, 0x00, 0x03, 0x03, 0x33, 0x00, 0xc0,
        0x03, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x30,
        0x30, 0x0c, 0x03, 0x00, 0x0c, 0x0c, 0x00, 0xc0,
        0xc0, 0xc3, 0x03, 0x03, 0x00, 0x30, 0x30, 0x03,
        0x00, 0x00, 0x00, 0xc0, 0x03, 0x00, 0x00, 0x00,
        0x30, 0x00, 0x00, 0x30, 0x00, 0xc0, 0xc3, 0x03,
        0x0c, 0x0c, 0x30, 0x30, 0xc0, 0x03, 0x00, 0x0c,
        0x0c, 0x30, 0x30, 0x0c, 0x03, 0x03, 0x0c, 0x30,
        0x30, 0x00, 0xc0, 0xc3, 0x03, 0x0c, 0x0c, 0x30,
        0x00, 0xc3, 0x03, 0x0c, 0x0c, 0x0c, 0x03, 0x00,
        0xc0, 0xc0, 0xcc, 0x0f, 0x3c, 0x30, 0x30, 0x0c,
        0x03, 0x00, 0x0f, 0x00, 0x00, 0x30, 0x03, 0xc0,
        0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0xc3, 0x03,
        0x0c, 0x00, 0x30, 0x30, 0xc0, 0x00, 0xc0, 0x03,
        0xfc, 0x30, 0x30, 0x0c, 0x00, 0x0c, 0x0c, 0x30,
        0x03, 0x00, 0xcc, 0xc3, 0x03, 0x0c, 0x0c, 0x3f,
        0xc0, 0x3f, 0xc3, 0x00, 0x00, 0x0c, 0x0c, 0x30,
        0xc3, 0xc0, 0xcc, 0x0c, 0xcc, 0x0c, 0xc0, 0x3f,
        0xc0, 0xc0, 0x03, 0xc0, 0x03, 0x00, 0x0f, 0x00,
        0x00, 0x0c, 0xcc, 0x03, 0x00, 0x00, 0x00, 0xcc,
        0x00, 0xc0, 0x00, 0xf0, 0x3c, 0xc0, 0x00, 0x00,
        0xc0, 0x03, 0x00, 0x0c, 0x00, 0x00, 0x03, 0x00,
        0x00, 0x00, 0x0c, 0x00, 0x00, 0x03, 0xf0, 0x0f,
        0xc0, 0xff, 0xc0, 0xfc, 0x00, 0x30, 0x0f, 0xc0,
        0x3f, 0x00, 0xc0, 0x03, 0xf0, 0x3f, 0x00, 0x00,
        0x00, 0xc0, 0x00, 0x30, 0x00, 0x00, 0x30, 0x00,
        0x30, 0x03, 0xfc, 0x30, 0x30, 0xff, 0x00, 0xfc,
        0x0f, 0xf0, 0x3f, 0xf0, 0xc0, 0x00, 0xff, 0x0c,
        0x0c, 0x0f, 0xc0, 0x3f, 0x03, 0x03, 0x0f, 0xfc,
        0x30, 0x30, 0xc0, 0xc0, 0xfc, 0x0c, 0x00, 0x0f,
        0x30, 0xc0, 0xc0, 0xfc, 0x00, 0xc0, 0x0f, 0xc0,
        0x0c, 0x03, 0x03, 0x0c, 0x0c, 0x03, 0x00, 0xff,
        0xc3, 0xff, 0x00, 0x00, 0x3f, 0xf0, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x0f, 0xf0, 0xff, 0x00, 0xff,
        0x03, 0xfc, 0x0f, 0xf0, 0x30, 0x00, 0x03, 0x0c,
        0x0c, 0x0f, 0xc0, 0xc3, 0x03, 0x03, 0x03, 0xf0,
        0x30, 0x30, 0xc0, 0xc0, 0xfc, 0x0c, 0x00, 0x00,
        0x30, 0xc0, 0x03, 0xfc, 0x00, 0xf0, 0x0f, 0x30,
        0x0c, 0x03, 0xcf, 0x0c, 0x0c, 0x00, 0x30, 0xff,
        0xc0, 0x3f, 0x00, 0xc0, 0x3f, 0x00, 0x00, 0x00,
        0x00, 0x00, 0xc0, 0x00, 0x00, 0x33, 0x00, 0x30,
        0x00, 0x3c, 0x0f, 0x30, 0x00, 0x00, 0x30, 0x00,
        0xc0, 0x03, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00,
        0x03, 0x00, 0x00, 0x00, 0xfc, 0x03, 0xf0, 0x3f,
        0xf0, 0x3f, 0x00, 0x0c, 0x03, 0xf0, 0x0f, 0xc0,
        0x30, 0x00, 0xfc, 0x0f, 0xc0, 0x00, 0x00, 0x30,
        0x00, 0x0c, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00,
        0xff, 0x0c, 0x0c, 0x3f, 0xc0, 0x3f, 0x03, 0xfc,
        0x0f, 0xfc, 0x30, 0x00, 0x3f, 0xc3, 0x03, 0x03,
        0xf0, 0x0f, 0xc0, 0xc0, 0xc3, 0xff, 0x0c, 0x0c,
        0x30, 0x30, 0x3f, 0x03, 0x00, 0x03, 0xcc, 0x30,
        0x30, 0x3f, 0x00, 0x30, 0x03, 0xf0, 0x03, 0x00,
        0xc0, 0xc3, 0x03, 0x00, 0xc0, 0x3f, 0xf0, 0xff,
        0xc0, 0x00, 0x0f, 0xfc, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x03, 0xfc, 0x3f, 0xc0, 0x3f, 0xc0, 0xff,
        0x03, 0xfc, 0x0c, 0x00, 0x00, 0xc3, 0x03, 0x03,
        0xf0, 0x30, 0xc0, 0xc0, 0xc0, 0xfc, 0x0c, 0x0c,
        0x30, 0x30, 0x3f, 0x03, 0x00, 0x00, 0x0c, 0x30,
        0x00, 0xff, 0x00, 0x3c, 0x03, 0xcc, 0x03, 0x00,
        0xf3, 0xc3, 0x03, 0x00, 0x0c, 0x3f, 0xf0, 0x0f,
        0xc0, 0x30, 0x0f, 0xc0, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0xff, 0xfc, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x0f, 0xc0, 0x00, 0x00, 0x00, 0x03,
        0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0xc0, 0x00, 0x03, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0xfc, 0x00, 0x00, 0x00, 0x00, 0x0c,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x3f, 0xff, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x03, 0xf0, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x30, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x3f, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00
    }
};

/*
 * GPIO interrupt handling
 *
 * Author: Julie Zelenski <zelenski@cs.stanford.edu>
 * Updated Thu Feb 15 12:28:12 PST 2024
 */

#include "gpio_interrupt.h"
#include "_gpio_common.h"
#include "assert.h"
#include "gpio_extra.h"
#include "interrupts.h"
#include <stddef.h>

typedef union {
    struct {
        uint32_t cfg[4]; // only 1-2 in use
        uint32_t ctl;
        uint32_t status;
        uint32_t debounce;
    } regs;
    uint8_t padding[0x20];
} gpio_eint_t;

#define GPIO_EINT_BASE       ((gpio_eint_t *)0x2000220)
_Static_assert(&(GPIO_EINT_BASE[GROUP_C].regs.cfg[0])   == (uint32_t *)0x2000240, "PC irq cfg0 reg must be at address 0x2000240");
_Static_assert(&(GPIO_EINT_BASE[GROUP_E].regs.debounce) == (uint32_t *)0x2000298, "PE irq debounce reg must be at address 0x2000298");

typedef struct {
    volatile gpio_eint_t *const eint;
    const int max_pin_index;
    const interrupt_source_t source;
    struct {
        handlerfn_t fn;
        void *aux_data;
    } handlers[GPIO_MAX_PIN_INDEX];
} gpio_int_group_t;

static struct {
    gpio_int_group_t groups[GPIO_NGROUPS];
    bool initialized;
} module = {
    .groups = { {.eint= GPIO_EINT_BASE + GROUP_B, .max_pin_index= GPIO_PB_LAST_INDEX, .source= INTERRUPT_SOURCE_GPIOB},
                {.eint= GPIO_EINT_BASE + GROUP_C, .max_pin_index= GPIO_PC_LAST_INDEX, .source= INTERRUPT_SOURCE_GPIOC},
                {.eint= GPIO_EINT_BASE + GROUP_D, .max_pin_index= GPIO_PD_LAST_INDEX, .source= INTERRUPT_SOURCE_GPIOD},
                {.eint= GPIO_EINT_BASE + GROUP_E, .max_pin_index= GPIO_PE_LAST_INDEX, .source= INTERRUPT_SOURCE_GPIOE},
                {.eint= GPIO_EINT_BASE + GROUP_F, .max_pin_index= GPIO_PF_LAST_INDEX, .source= INTERRUPT_SOURCE_GPIOF},
                {.eint= GPIO_EINT_BASE + GROUP_G, .max_pin_index= GPIO_PG_LAST_INDEX, .source= INTERRUPT_SOURCE_GPIOG},
              },
    .initialized = false,
};

static void dispatch_to_pin(void *group_num);

static gpio_int_group_t *get_int_group(gpio_id_t gpio, int *p_index) {
    gpio_pin_t p = get_group_and_index(gpio);
    *p_index = p.pin_index;
    return &module.groups[p.group];
}

// register dispatch_to_pin with top-level interrupts module
// as handler for all GPIO interrupt sources
// Aux data will be pointer to interrupt group
void gpio_interrupt_init(void) {
    for (int i = 0; i < GPIO_NGROUPS; i++) {
        interrupts_register_handler(module.groups[i].source, dispatch_to_pin, &module.groups[i]);
        interrupts_enable_source(module.groups[i].source);
    }
    module.initialized = true;
}

void gpio_interrupt_register_handler(gpio_id_t gpio, handlerfn_t fn, void *aux_data) {
    if (!module.initialized) error("gpio_interrupt_init() has not been called!\n");
    assert(gpio_id_is_valid(gpio));
    int pin_index;
    gpio_int_group_t *gp = get_int_group(gpio, &pin_index);
    gp->handlers[pin_index].fn = fn;
    gp->handlers[pin_index].aux_data = aux_data;
}

// dispatch_to_pin handler receives all GPIO interrupts and performs second-level dispatch to
// per-pin handlers that have been registered with this module
static void dispatch_to_pin(void *aux_data) {
    gpio_int_group_t *gp = aux_data;
    int pin_index = 31 - __builtin_clz(gp->eint->regs.status);
    gp->handlers[pin_index].fn(gp->handlers[pin_index].aux_data);
}

static void gpio_interrupt_set_enabled(gpio_id_t gpio, bool state) {
 if (!module.initialized) error("gpio_interrupt_init() has not been called!\n");
    assert(gpio_id_is_valid(gpio));
    int pin_index;
    gpio_int_group_t *gp = get_int_group(gpio, &pin_index);
    unsigned int mask = (1 << pin_index);
    if (state) {
        gp->eint->regs.ctl |= mask;
    } else {
        gp->eint->regs.ctl &= ~mask;
    }
}

void gpio_interrupt_enable(gpio_id_t gpio) {
    gpio_interrupt_set_enabled(gpio, true);
}

void gpio_interrupt_disable(gpio_id_t gpio) {
    gpio_interrupt_set_enabled(gpio, false);
}

void gpio_interrupt_clear(gpio_id_t gpio) {
    if (!module.initialized) error("gpio_interrupt_init() has not been called!\n");
    assert(gpio_id_is_valid(gpio));
    int pin_index;
    gpio_int_group_t *gp = get_int_group(gpio, &pin_index);
    unsigned int mask = (1 << pin_index);
    if ((gp->eint->regs.status & mask) != 0) { // is pending
        gp->eint->regs.status |= mask; // write 1 to clear
    }
}

void gpio_interrupt_config(gpio_id_t gpio, gpio_event_t event, bool debounce) {
    if (!module.initialized) error("gpio_interrupt_init() has not been called!\n");
    assert(gpio_id_is_valid(gpio) && event <= GPIO_INTERRUPT_DOUBLE_EDGE);
    int pin_index;
    gpio_int_group_t *gp = get_int_group(gpio, &pin_index);
    int bank =  pin_index / 8;
    int index = pin_index % 8;
    int shift = index * 4;
    unsigned int mask = ((1 << 4) - 1);
    gp->eint->regs.cfg[bank] = (gp->eint->regs.cfg[bank] & ~(mask << shift)) | ((event & mask) << shift);
    gpio_set_function(gpio, GPIO_FN_INTERRUPT); // change pin function to interrupt
    if (debounce) {
        // 32Khz clock, predivide 2^5, will filter to ~1 event per ms
        gp->eint->regs.debounce = (5 << 4) | 0;
    } else {
        // 24Mhz clock, no predivide, no filter
        gp->eint->regs.debounce = (0 << 4) | 1;
    }
    gpio_interrupt_clear(gpio); // cancel any stale event
}

/*
 * This module configures the HDMI hardware
 * Written to drive Synopsis DesignWare HDMI TX controller using in AW D1.
 * Uses TCONTV peripheral of AW D1 to stream pixels to HDMI
 * 
 * Support for classic resolutions: 1080p, 720p, SVGA
 * 
 * Author: Julie Zelenski <zelenski@cs.stanford.edu>
 * Updated: Feb 2024
 */

#include "hdmi.h"
#include "_hdmi_private.h"
#include "assert.h"
#include "ccu.h"
#include "printf.h"
#include <stdbool.h>
#include "timer.h"

typedef union {
    struct {
        uint8_t invidconf;          // V and H sync polarity bits 5&6, data enble input polarity bit 4, HDMI mode bit 3
        uint8_t inhactv[2];         // count of horiz active pixels
        uint8_t inhblank[2];        // count of horiz blank pixels
        uint8_t invactv[2];         // count of vert active lines
        uint8_t invblank;           // count of vert blank lines
        uint8_t hsyncindelay[2];    // count of pixel clock cycles from non-active edge to last valid period
        uint8_t hsyncinwidth[2];    // count of pixel clock cycles
        uint8_t vsyncindelay;       // count of hsync pulses from non-active edge to last valid period
        uint8_t vsyncinwidth;       // count of hsync pulses
        uint8_t infreq[3];          // these fields used for debugging
        uint8_t ctrldur;            // control period minimum duration (min 12 pixel clock cycles)
        uint8_t exctrldur;          // extended control period minimum duration (min 32 pixel clock cycles)
        uint8_t exctrlspac;         // extended control period maximum spacing (max 50 msec)
        uint8_t chpream[3];         // bits to fill channel data lines not used to transmit the preamble
    } regs;
} hdmi_frame_composer_t;

typedef union {
    struct {
        uint8_t sfrdiv;
        uint8_t clkdis;         // clock domain disable
        uint8_t fswrstz;
        uint8_t opctrl;
        uint8_t flowctrl;
     } regs;
} hdmi_main_controller_t;

typedef union {
    struct {
        uint32_t setup;
        uint32_t reserved[6];
        uint32_t port_sel;
        uint32_t gate;
    } regs;
} tcon_top_t;

typedef union {
    struct {
        uint32_t gtcl;
        uint32_t reserved[15];
        uint32_t src_ctl;
        uint32_t reservedB[19];
        uint32_t ctl;
        // next 6 regs are named basic0-basic5 in doc
        struct { uint32_t height:16, width:16; } dimensions[3];
        struct { uint32_t bp:16, total:16; } htiming, vtiming;
        struct { uint32_t vert:16, horiz:16; } sync;
    } regs;
} tcon_tv_t;


struct display_timing {
    hdmi_resolution_id_t id;
    struct {
        uint32_t pixels, front_porch, sync_pulse, back_porch;
    } horiz, vert;
    struct {
        uint32_t clock_rate;
        uint32_t n, m; // dividers
    } pll; // PLL VIDEO0
    struct {
        uint32_t clock_rate;
        uint32_t factor_n, factor_m;
        uint32_t src;
    } tcon;     // TCONTV clock
    struct {
        uint32_t clock_rate;
        uint32_t factor_n, factor_m;
        uint32_t src;
    } de;       // DE clock
};

#define HDMI_FC   ((hdmi_frame_composer_t *)0x5501000)
#define HDMI_MC  ((hdmi_main_controller_t *)0x5504000)
#define TCON_TOP             ((tcon_top_t *)0x5460000)
#define TCON_TV               ((tcon_tv_t *)0x5470000)

_Static_assert(&(HDMI_FC->regs.invblank)  ==  (uint8_t *)0x5501007, "hdmi fc invblank reg must be at address 0x5501007");
_Static_assert(&(HDMI_MC->regs.clkdis)    ==  (uint8_t *)0x5504001, "hdmi mc clkdis reg must be at address 0x5504001");
_Static_assert(&(TCON_TOP->regs.port_sel) == (uint32_t *)0x546001c, "tcon top port_sel reg must be at address 0x546001c");
_Static_assert(&(TCON_TV->regs.src_ctl)   == (uint32_t *)0x5470040, "tcon tv src_ctl eg must be at address 0x5470040");

static struct {
    volatile hdmi_frame_composer_t * const hdmi_fc;
    volatile hdmi_main_controller_t * const hdmi_mc;
    volatile tcon_top_t * const tcon_top;
    volatile tcon_tv_t * const tcon_tv;
    struct display_timing config;
}  module = {
     .hdmi_fc  = HDMI_FC,
     .hdmi_mc  = HDMI_MC,
     .tcon_top = TCON_TOP,
     .tcon_tv  = TCON_TV,
     .config.id = HDMI_INVALID,
};

enum { SRC_PLL_VIDEO0 = 0b00, SRC_PLL_VIDEO0_4X = 0b01 };

// using fixed standard timings (should use edid to negotiate with monitor instead?)
static const struct display_timing avail_resolutions[] = {
           //     {horiz}                {vert}            {pll clock},        {tcon clock},                      {de clock}
    {HDMI_1080P,  {1920,  88,  44, 148}, {1080, 4, 5, 36}, {297000000, 99, 2}, {148500000, 0, 1, SRC_PLL_VIDEO0}, {297000000, 0, 3, SRC_PLL_VIDEO0_4X}},
                                                        // recommended PLL 29700000 factors 99,2 from p.43 D-1 manual
    {HDMI_HD,     {1280, 110,  40, 220}, { 720, 5, 5, 20}, {297000000, 99, 2}, { 74250000, 0, 3, SRC_PLL_VIDEO0}, {297000000, 0, 3, SRC_PLL_VIDEO0_4X}},
    {HDMI_SVGA,   { 800,  40, 128,  88}, { 600, 1, 4, 23}, {120000000, 20, 1}, { 40000000, 0, 2, SRC_PLL_VIDEO0}, {480000000, 0, 0, SRC_PLL_VIDEO0_4X}},
    {HDMI_INVALID, {0}}
};

static bool select_resolution(hdmi_resolution_id_t id);
static void enable_display_clocks(void);
static void hdmi_controller_init(void);
static void tcon_init(void);
static int sun20i_d1_hdmi_phy_config(void);

void hdmi_init(hdmi_resolution_id_t id) {
    if (!select_resolution(id)) {
        error("Unable to init hdmi, resolution id is invalid!\n");
    }
    enable_display_clocks();
    hdmi_controller_init();
    tcon_init();

    // possible to call hdmi_init again to change resolution
    // but must init PHY exactly once
    // (does not need re-init for change in resolution
    // and in fact re-init will cause problems)
    static bool phy_initialized = false;
    if (!phy_initialized) {
        sun20i_d1_hdmi_phy_config();
        phy_initialized = true;
    }
}

static bool select_resolution(hdmi_resolution_id_t id) {
   for (int i = 0; avail_resolutions[i].id != HDMI_INVALID; i++) {
        if (avail_resolutions[i].id == id) {
            module.config = avail_resolutions[i];
            return true;
        }
    }
    return false;
}

hdmi_resolution_id_t hdmi_best_match(int width, int height) {
    hdmi_resolution_id_t chosen = HDMI_INVALID;
    // resolutions listed in order from largest to smallest, choose "tightest" (i.e. smallest that fits)
    for (int i = 0; avail_resolutions[i].id != HDMI_INVALID; i++) {
        if (width <= avail_resolutions[i].horiz.pixels && height <= avail_resolutions[i].vert.pixels) {
            chosen = avail_resolutions[i].id;
        }
    }
    return chosen;
}

int hdmi_get_screen_width(void) {
    if (module.config.id == HDMI_INVALID) error("Must call hdmi_init before using hdmi_get_screen_width()");
    return module.config.horiz.pixels;
}
int hdmi_get_screen_height(void) {
    if (module.config.id == HDMI_INVALID) error("Must call hdmi_init before using hdmi_get_screen_height()");
    return module.config.vert.pixels;
}

// enable all clocks needed for HDMI+TCON+DE2
static void enable_display_clocks(void) {
    long rate;

    rate = ccu_config_pll_dividers(CCU_PLL_VIDEO0_CTRL_REG, module.config.pll.n, module.config.pll.m);
    assert(rate == module.config.pll.clock_rate);
    // hdmi clock, both sub and main (bits 16 and 17)
    ccu_ungate_bus_clock_bits(CCU_HDMI_BGR_REG, 1 << 0, (1 << 16)|(1 << 17) );
    ccu_config_module_clock(CCU_HDMI_24M_CLK_REG, 0, 0, 0);
    // tcon top clock
    ccu_ungate_bus_clock(CCU_DPSS_TOP_BGR_REG);
    // tcon tv clock
    ccu_ungate_bus_clock(CCU_TCONTV_BGR_REG);
    rate = ccu_config_module_clock(CCU_TCONTV_CLK_REG, module.config.tcon.src, module.config.tcon.factor_n, module.config.tcon.factor_m);
    assert(rate == module.config.tcon.clock_rate);
    // de clock
    ccu_ungate_bus_clock(CCU_DE_BGR_REG);
    rate = ccu_config_module_clock(CCU_DE_CLK_REG, module.config.de.src, module.config.de.factor_n, module.config.de.factor_m);
    assert(rate == module.config.de.clock_rate);
}

// HDMI controller registers must be written in 8-bit chunks
static void hdmi_write_short(volatile uint8_t arr[], short val) {
    arr[0] = val & 0xff;  
    arr[1] = val >> 8;
}

#define BLANKING(d) (d.front_porch + d.sync_pulse + d.back_porch)
#define TOTAL(d) (d.pixels + BLANKING(d))

static void hdmi_controller_init(void) {
    // doc Synopsys Designware @ https://people.freebsd.org/~gonzo/arm/iMX6-HDMI.pdf

    // frame controller
    // V and H sync polarity bits 5&6, data enble input polarity bit 4, HDMI mode bit 3    
    module.hdmi_fc->regs.invidconf = (1<<6) | (1<<5) | (1<<4) | (1<<3);

    hdmi_write_short(module.hdmi_fc->regs.inhactv, module.config.horiz.pixels);
    hdmi_write_short(module.hdmi_fc->regs.inhblank, BLANKING(module.config.horiz));
    hdmi_write_short(module.hdmi_fc->regs.hsyncindelay, module.config.horiz.front_porch);
    hdmi_write_short(module.hdmi_fc->regs.hsyncinwidth, module.config.horiz.sync_pulse);
    hdmi_write_short(module.hdmi_fc->regs.invactv, module.config.vert.pixels);
    module.hdmi_fc->regs.invblank = BLANKING(module.config.vert);
    module.hdmi_fc->regs.vsyncindelay = module.config.vert.front_porch;
    module.hdmi_fc->regs.vsyncinwidth = module.config.vert.sync_pulse;

    module.hdmi_fc->regs.ctrldur = 12; 	// spacing set at minimums
    module.hdmi_fc->regs.exctrldur = 32;    // values from linux bridge driver
    module.hdmi_fc->regs.exctrlspac = 1;
    module.hdmi_fc->regs.chpream[0] = 0x0b;
    module.hdmi_fc->regs.chpream[1] = 0x16;
    module.hdmi_fc->regs.chpream[2] = 0x21;

    // main controller
    module.hdmi_mc->regs.clkdis = 0x7c; // enable pixel+tdms clock (disable others)
}

static void tcon_init(void) {
    module.tcon_tv->regs.gtcl = (1 << 31);    // tcon_tv global enable @[31]
    // vertical video start delay is computed by excluding vertical front
    // porch value from total vertical timings
    // See https://lkml.iu.edu/hypermail/linux/kernel/1910.0/06574.html
    uint32_t start_delay = TOTAL(module.config.vert) - (module.config.vert.pixels + module.config.vert.front_porch) - 1;
    module.tcon_tv->regs.ctl = (1 << 31) | (start_delay << 4); // enable tv @[31], delay @[8-4] (@[1] set for blue test data)

    // [0] input resolution, [1] upscaled resolution, [2] output resolution
    for (int i = 0; i < 3; i++) {
        module.tcon_tv->regs.dimensions[i].width = module.config.horiz.pixels - 1;
        module.tcon_tv->regs.dimensions[i].height = module.config.vert.pixels - 1;
    };
    module.tcon_tv->regs.htiming.total = TOTAL(module.config.horiz) - 1;
    module.tcon_tv->regs.htiming.bp = module.config.horiz.sync_pulse + module.config.horiz.back_porch - 1;
    module.tcon_tv->regs.vtiming.total = 2 * TOTAL(module.config.vert);
    module.tcon_tv->regs.vtiming.bp = module.config.vert.sync_pulse + module.config.vert.back_porch - 1;
    module.tcon_tv->regs.sync.horiz = module.config.horiz.sync_pulse - 1;
    module.tcon_tv->regs.sync.vert = module.config.vert.sync_pulse - 1;
    
    // configure tcon_top mux and select tcon tv
    // clear state, config for hdmi, enable
    module.tcon_top->regs.gate &= ~((0xf << 28) | (0xf << 20)); // clear bits @[31-28], [23-20]
    module.tcon_top->regs.gate |= (0x1 << 28) | (0x1 << 20); // from Linux
    module.tcon_top->regs.port_sel &= ~((0x3 << 4) | (0x3 << 0)); // clear bits @[5-4], @[1-0]
    module.tcon_top->regs.port_sel |= (2 << 0); // config mixer0 -> tcon_tv
}

/*
 * Code below drives the HDMI PHY used on the Allwinner D1 SoC. The
 * PHY is responsible for low-level HDMI clock and timing signals.
 * The PHY used for the D1 is custom design by Allwinner. Sadly there 
 * seems to be zero documentation for it. I got the code below from the
 * BSP and linux kernel driver.
 *
 * juliez July 2023
 */

// Everything from here down was taken near-verbatim from linux-sunxi kernel driver
// https://github.com/smaeul/linux/blob/d1/all/drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c

#define AW_PHY_TIMEOUT 1000

static int sun20i_d1_hdmi_phy_enable(void) {
    int i = 0, status = 0;

    //enib -> enldo -> enrcal -> encalog -> enbi[3:0] -> enck -> enp2s[3:0] -> enres -> enresck -> entx[3:0]
    phy_base->phy_ctl4.bits.reg_slv = 4;     //low power voltage 1.08V, default is 3, set 4 as well as pll_ctl0 bit [24:26]
    phy_base->phy_ctl5.bits.enib = 1;
    phy_base->phy_ctl0.bits.enldo = 1;
    phy_base->phy_ctl0.bits.enldo_fs = 1;
    phy_base->phy_ctl5.bits.enrcal = 1;

    phy_base->phy_ctl5.bits.encalog = 1;

    for (i = 0; i < AW_PHY_TIMEOUT; i++) {
        timer_delay_us(5);
        status = phy_base->phy_pll_sts.bits.phy_rcalend2d_status;
        if (status & 0x1) {
            break;
        }
    }
    if ((i == AW_PHY_TIMEOUT) && !status) {
        printf("phy_rcalend2d_status TIMEOUT\n");
        return -1;
    }

    phy_base->phy_ctl0.bits.enbi = 0xF;
    for (i = 0; i < AW_PHY_TIMEOUT; i++) {
        timer_delay_us(5);
        status = phy_base->phy_pll_sts.bits.pll_lock_status;
        if (status & 0x1) {
            break;
        }
    }
    if ((i == AW_PHY_TIMEOUT) && !status) {
        printf("pll_lock_status TIMEOUT\n");
        return -1;
    }

    phy_base->phy_ctl0.bits.enck = 1;
    phy_base->phy_ctl5.bits.enp2s = 0xF;
    phy_base->phy_ctl5.bits.enres = 1;
    phy_base->phy_ctl5.bits.enresck = 1;
    phy_base->phy_ctl0.bits.entx = 0xF;

    for (i = 0; i < AW_PHY_TIMEOUT; i++) {
        timer_delay_us(5);
        status = phy_base->phy_pll_sts.bits.tx_ready_dly_status;
        if (status & 0x1) {
            break;
        }
    }
    if ((i == AW_PHY_TIMEOUT) && !status) {
        printf("tx_ready_status TIMEOUT\n");
        return -1;
    }

    return 0;
}

static int sun20i_d1_hdmi_phy_config(void) {
    int ret;

    /* enable all channel */
    phy_base->phy_ctl5.bits.reg_p1opt = 0xF;

    // phy_reset
    phy_base->phy_ctl0.bits.entx = 0;
    phy_base->phy_ctl5.bits.enresck = 0;
    phy_base->phy_ctl5.bits.enres = 0;
    phy_base->phy_ctl5.bits.enp2s = 0;
    phy_base->phy_ctl0.bits.enck = 0;
    phy_base->phy_ctl0.bits.enbi = 0;
    phy_base->phy_ctl5.bits.encalog = 0;
    phy_base->phy_ctl5.bits.enrcal = 0;
    phy_base->phy_ctl0.bits.enldo_fs = 0;
    phy_base->phy_ctl0.bits.enldo = 0;
    phy_base->phy_ctl5.bits.enib = 0;
    phy_base->pll_ctl1.bits.reset = 1;
    phy_base->pll_ctl1.bits.pwron = 0;
    phy_base->pll_ctl0.bits.envbs = 0;

    // phy_set_mpll
    phy_base->pll_ctl0.bits.cko_sel = 0x3;
    phy_base->pll_ctl0.bits.bypass_ppll = 0x1;
    phy_base->pll_ctl1.bits.drv_ana = 1;
    phy_base->pll_ctl1.bits.ctrl_modle_clksrc = 0x0; //0: PLL_video   1: MPLL
    phy_base->pll_ctl1.bits.sdm_en = 0x0;            //mpll sdm jitter is very large, not used for the time being
    phy_base->pll_ctl1.bits.sckref = 0;        //default value is 1
    phy_base->pll_ctl0.bits.slv = 4;
    phy_base->pll_ctl0.bits.prop_cntrl = 7;   //default value 7
    phy_base->pll_ctl0.bits.gmp_cntrl = 3;    //default value 1
    phy_base->pll_ctl1.bits.ref_cntrl = 0;
    phy_base->pll_ctl0.bits.vcorange = 1;

    // phy_set_div
    phy_base->pll_ctl0.bits.div_pre = 0;      //div7 = n+1
    phy_base->pll_ctl1.bits.pcnt_en = 0;
    phy_base->pll_ctl1.bits.pcnt_n = 1;       //div6 = 1 (pcnt_en=0)    [div6 = n (pcnt_en = 1) note that some multiples are problematic] 4-256
    phy_base->pll_ctl1.bits.pixel_rep = 0;    //div5 = n+1
    phy_base->pll_ctl0.bits.bypass_clrdpth = 0;
    phy_base->pll_ctl0.bits.clr_dpth = 0;     //div4 = 1 (bypass_clrdpth = 0)
    //00: 2    01: 2.5  10: 3   11: 4
    phy_base->pll_ctl0.bits.n_cntrl = 1;      //div
    phy_base->pll_ctl0.bits.div2_ckbit = 0;   //div1 = n+1
    phy_base->pll_ctl0.bits.div2_cktmds = 0;  //div2 = n+1
    phy_base->pll_ctl0.bits.bcr = 0;          //div3    0: [1:10]  1: [1:40]
    phy_base->pll_ctl1.bits.pwron = 1;
    phy_base->pll_ctl1.bits.reset = 0;

    // configure phy
    /* config values taken from table */
    phy_base->phy_ctl1.dwval = ((phy_base->phy_ctl1.dwval & 0xFFC0FFFF) | /* config->phy_ctl1 */ 0x0);
    phy_base->phy_ctl2.dwval = ((phy_base->phy_ctl2.dwval & 0xFF000000) | /* config->phy_ctl2 */ 0x0);
    phy_base->phy_ctl3.dwval = ((phy_base->phy_ctl3.dwval & 0xFFFF0000) | /* config->phy_ctl3 */ 0xFFFF);
    phy_base->phy_ctl4.dwval = ((phy_base->phy_ctl4.dwval & 0xE0000000) | /* config->phy_ctl4 */ 0xC0D0D0D);
    //phy_base->pll_ctl0.dwval |= config->pll_ctl0;
    //phy_base->pll_ctl1.dwval |= config->pll_ctl1;

    // phy_set_clk
    phy_base->phy_ctl6.bits.switch_clkch_data_corresponding = 0;
    phy_base->phy_ctl6.bits.clk_greate0_340m = 0x3FF;
    phy_base->phy_ctl6.bits.clk_greate1_340m = 0x3FF;
    phy_base->phy_ctl6.bits.clk_greate2_340m = 0x0;
    phy_base->phy_ctl7.bits.clk_greate3_340m = 0x0;
    phy_base->phy_ctl7.bits.clk_low_340m = 0x3E0;
    phy_base->phy_ctl6.bits.en_ckdat = 1;       //default value is 0

    // phy_base->phy_ctl2.bits.reg_resdi = 0x18;
    // phy_base->phy_ctl4.bits.reg_slv = 3;         //low power voltage 1.08V, default value is 3

    phy_base->phy_ctl1.bits.res_scktmds = 0;  //
    phy_base->phy_ctl0.bits.reg_csmps = 2;
    phy_base->phy_ctl0.bits.reg_ck_test_sel = 0;  //?
    phy_base->phy_ctl0.bits.reg_ck_sel = 1;
    phy_base->phy_indbg_ctrl.bits.txdata_debugmode = 0;

    // phy_enable
    ret = sun20i_d1_hdmi_phy_enable();
    if (ret)
        return ret;

    phy_base->phy_ctl0.bits.sda_en = 1;
    phy_base->phy_ctl0.bits.scl_en = 1;
    phy_base->phy_ctl0.bits.hpd_en = 1;
    phy_base->phy_ctl0.bits.reg_den = 0xF;
    phy_base->pll_ctl0.bits.envbs = 1;

    return 0;
}

/*
 * High-speed timer used as countdown with interrupt
 *
 * Author: Julie Zelenski <zelenski@cs.stanford.edu>
 */

#include "hstimer.h"
#include <stdint.h>
#include "ccu.h"

// structs defined to match layout of hardware registers
typedef union {
    struct {
        uint32_t ctrl;
        uint32_t intv_lo;   // lower 32 bits of interval (see docs note above)
        uint32_t intv_hi;   // uppper 32 bits of interval
        uint32_t cur_lo;    // lower 32 bits of cur value
        uint32_t cur_hi;    // upper 32 bits of cur value
    } regs;
    uint8_t padding[0x20];
} hstimer_t;

#define TIMER_BASE ((hstimer_t *)0x3008020)
_Static_assert(&(TIMER_BASE[0].regs.ctrl)   == (uint32_t *)0x3008020, "hstimer0 ctrl reg must be at address 0x3008020");
_Static_assert(&(TIMER_BASE[1].regs.ctrl)   == (uint32_t *)0x3008040, "hstimer1 ctrl reg must be at address 0x3008040");

typedef union {
    struct {
        uint32_t irq_en;    // interrupt enable
        uint32_t irq_stas;  // interrupt status
    } regs;
} hstimer_irq_t;

#define INTERRUPT_BASE ((hstimer_irq_t *)0x3008000)
_Static_assert(&(INTERRUPT_BASE->regs.irq_stas) == (uint32_t *)0x3008004, "hstimer irq stas reg must be at address 0x3008004");

static struct {
    volatile hstimer_irq_t *interrupt;
    volatile hstimer_t *timers;
} const module = {
    .interrupt = INTERRUPT_BASE,
    .timers = TIMER_BASE,
};

/* From docs:
    HSTimer0 is a 56-bit counter. The interval value consists of two parts:
    HS_TMR0_INTV_VALUE_LO acts as the bit[31:0] and
    HS_TMR0_INTV_VALUE_HI acts as the bit[55:32].
    To read or write the interval value, HS_TMR0_INTV_LO_REG should be done before HS_TMR0_INTV_HI_REG.
 */
void hstimer_init(hstimer_id_t index, long usecs) {
    if (index != HSTIMER0 && index != HSTIMER1) return;
    long rate = ccu_ungate_bus_clock(CCU_HSTIMER_BGR_REG);   // clock up peripheral
    module.timers[index].regs.ctrl = (0 << 7) | (0 << 4);  // config for normal mode, periodic (not one-shot), prescale = 2^0, disabled
    uint64_t count = (usecs * rate)/(1000*1000); // calculate count based on clock frequency
    module.timers[index].regs.intv_lo = count & 0xffffffff; // must set low before high (see docs note above)
    module.timers[index].regs.intv_hi = count >> 32;
    module.timers[index].regs.ctrl |= (1 << 1);     // reload interval into cur
    module.interrupt->regs.irq_en |= (1 << index);          // enable interrupts
}

void hstimer_enable(hstimer_id_t index) {
    if (index != HSTIMER0 && index != HSTIMER1) return;
    module.timers[index].regs.ctrl |= 1;   // enable will start (or resume) countdown
}

void hstimer_disable(hstimer_id_t index) {
    if (index != HSTIMER0 && index != HSTIMER1) return;
    module.timers[index].regs.ctrl &= ~1; // disable will pause countdown
}

void hstimer_interrupt_clear(hstimer_id_t index) {
    if (index != HSTIMER0 && index != HSTIMER1) return;
    module.interrupt->regs.irq_stas = (1 << index); // write 1 to clear
}

/*
 * Use of PLIC to configure and dispatch external interrupts
 *
 * Author: Julie Zelenski <zelenski@cs.stanford.edu>
 * June 2023
 */

#include "interrupts.h"
#include "assert.h"
#include "mango.h"
#include <stddef.h>
#include "_system.h"

/* Module-private helpers defined in interrupts_asm.s */
unsigned long interrupts_get_mepc(void);
unsigned long interrupts_get_mcause(void);
unsigned long interrupts_get_mtval(void);
void interrupts_set_mtvec(void *);

#define N_SOURCES 256
// structs defined to match layout of hardware registers
typedef union {
    struct {
        uint32_t priority[1024];    // only first 256 used (N_SOURCES)
        uint32_t pending[1024];     // only first 8 used (one pending bit per source)
        uint32_t enable[1024];      // only first 8 used (one enable bit per source)
    } regs;
} source_t;

#define SOURCE_BASE ((source_t *)0x10000000)
_Static_assert(&(SOURCE_BASE->regs.priority[0]) == (uint32_t *)0x10000000, "priority0 reg must be at address 0x10000000");
_Static_assert(&(SOURCE_BASE->regs.pending[1])  == (uint32_t *)0x10001004, "pending1 reg must be at address 0x10001004");

typedef union {
    struct {
        uint32_t ctrl;
        uint32_t threshhold;
        uint32_t claim_complete;
    } regs;
} plic_t;

#define PLIC_BASE ((plic_t *)0x101FFFFC)
_Static_assert(&(PLIC_BASE->regs.claim_complete)   == (uint32_t *)0x10200004, "plic claim reg must be at address 0x10200004");

static struct {
    volatile source_t *sources;
    volatile plic_t *plic;
    struct {
        handlerfn_t fn;
        void *aux_data;
    } handlers[N_SOURCES];
    bool initialized;
} module = {
    .sources = SOURCE_BASE,
    .plic =    PLIC_BASE,
};

static const char *description(unsigned int cause) {
    static const char *table[] = {
        "Instruction address misaligned",
        "Instruction access fault",
        "Illegal instruction",
        "Breakpoint",
        "Load address misaligned",
        "Load access fault",
        "Store/AMO address misaligned",
        "Store/AMO access fault",
        "E-call from U-mode",
        "E-call from S-mode",
        "Reserved (10)",
        "E-call from M-mode", /* external interrupt, M-mode */
        "Instruction page fault",
        "Load page fault",
        "Reserved (14)",
        "Store/AMO page fault",
    };
    int n = sizeof(table)/sizeof(*table);
    return (cause < n ? table[cause] : "Unknown");
}

void _trap_handler(void);

// gcc attribute used to generate prologue/epilogue appropriate for machine interrupt
// https://gcc.gnu.org/onlinedocs/gcc/RISC-V-Function-Attributes.html
__attribute__((interrupt("machine"))) void _trap_handler(void) {
#define EXTERNAL_INTERRUPT ((1L << 63) | 0xb)
    if (interrupts_get_mcause() == EXTERNAL_INTERRUPT) {
        // no need to search pending bits to identify source, claim reg has it
        uint32_t source = module.plic->regs.claim_complete; // read claim_complete to "claim" (atomically clears pending bit)
        module.handlers[source].fn(module.handlers[source].aux_data); // dispatch to registered handler
        module.plic->regs.claim_complete = source;   // write claim_complete to "complete"
    } else {
        sys_report_error("EXCEPTION: %s (mtval 0x%lx, mepc 0x%lx)\n", description(interrupts_get_mcause()), interrupts_get_mtval(), interrupts_get_mepc());
        mango_abort();
    }
}

void interrupts_init(void) {
    if (module.initialized) error("interrupts_init() must be called only once");
    interrupts_global_disable();
    module.plic->regs.ctrl = 0;         // machine mode only
    module.plic->regs.threshhold = 0;   // accept interrupts of any priority
    interrupts_set_mtvec(_trap_handler);            // install trap handler
    for (int i = 0; i < 8; i++) {       // all sources start disabled
        module.sources->regs.pending[i] = 0;
        module.sources->regs.enable[i] = 0;
    }
    for (int i = 0; i < N_SOURCES; i++) {
        module.sources->regs.priority[i] = 0;
        module.handlers[i].fn = NULL;
        module.plic->regs.claim_complete = i; // mark any pending request completed
    }
    module.initialized = true;
}

static bool is_valid_source(interrupt_source_t source) {
    switch (source) {
        case INTERRUPT_SOURCE_UART0...INTERRUPT_SOURCE_UART5:
        case INTERRUPT_SOURCE_TWI0...INTERRUPT_SOURCE_TWI3:
        case INTERRUPT_SOURCE_SPI0...INTERRUPT_SOURCE_SPI1:
        case INTERRUPT_SOURCE_HSTIMER0...INTERRUPT_SOURCE_HSTIMER1:
        case INTERRUPT_SOURCE_GPIOB...INTERRUPT_SOURCE_GPIOG:
            return true;
    }
    return false;
}

static void set_source_enabled(interrupt_source_t source, bool enabled) {
    if (!module.initialized) error("interrupts_init() has not been called!\n");
    if (!is_valid_source(source)) error("request to enable/disable interrupt source that is not valid");
    int bank = source / 32;
    int shift = source % 32;
    if (enabled) {
        module.sources->regs.priority[source] = 1; // priority at 1 (0 is disable, 1 is lowest)
        module.sources->regs.enable[bank] |= (1 << shift);
    } else {
        module.sources->regs.priority[source] = 0;
        module.sources->regs.enable[bank] &= ~(1 << shift);
    }
}

void interrupts_enable_source(interrupt_source_t source) {
    set_source_enabled(source, true);
}

void interrupts_disable_source(interrupt_source_t source) {
    set_source_enabled(source, false);
}

void interrupts_register_handler(interrupt_source_t source, handlerfn_t fn, void *aux_data) {
    if (!module.initialized) error("interrupts_init() has not been called!\n");
    if (!is_valid_source(source)) error("request to register handler for interrupt source that is not valid");
    module.handlers[source].fn = fn;
    module.handlers[source].aux_data = aux_data;
}

/*
 * Asm routines to access CSRs used by interrupts module
 */
 .attribute arch, "rv64im_zicsr"

.globl interrupts_global_enable
interrupts_global_enable:
    li a0,1<<3
    csrs mstatus,a0
    li a0,1<<11
    csrs mie,a0
    ret

.globl interrupts_global_disable
interrupts_global_disable:
    li a0,1<<3
    csrc mstatus,a0
    li a0,1<<11
    csrc mie,a0
    ret

.globl interrupts_get_mcause
interrupts_get_mcause:
    csrr a0,mcause
    ret

.globl interrupts_get_mepc
interrupts_get_mepc:
    csrr a0,mepc
    ret

.globl interrupts_get_mtval
interrupts_get_mtval:
    csrr a0,mtval
    ret

.globl interrupts_set_mtvec
interrupts_set_mtvec:
    csrw mtvec,a0
    ret

/*
 * Mango module
 *
 * Author: Julie Zelenski <zelenski@cs.stanford.edu>
 * Updated: Mon Jan  1 11:50:26 PST 2024
 */

#include "mango.h"
#include "timer.h"
#include "uart.h"

// structs defined to match layout of hardware registers
typedef union {
    struct {
        uint32_t irq_enable;
        uint32_t irq_status;
        uint32_t soft_reset;
        uint32_t reserved;
        uint32_t control;
        uint32_t config;
        uint32_t mode;
        uint32_t output;
    } regs;
} watchdog_t;

#define WATCHDOG_BASE ((watchdog_t *)0x20500A0)
_Static_assert(&(WATCHDOG_BASE->regs.mode)   == (uint32_t *)0x20500b8, "watchdog mode reg must be at address 0x20500b8");

static struct {
    volatile watchdog_t *wdog;
} const module = {
    .wdog = WATCHDOG_BASE
};

void mango_reboot(void) {
    timer_delay_ms(100); // give output time to flush (needed if using uart)
    const int cycles = 1;
    module.wdog->regs.config = 1; // config watchdog for whole system reset
    // mode = 0x16aa in upper 16 bits allows write to lower 15, cycles in bits [7:4], enable 1 in lsb [0]
    module.wdog->regs.mode = 0x16aa << 16 | (cycles << 4) | 1;
    while (1) ;
}

#define DOT_MS 120

static void on_dot(int ndot) {
    mango_actled(LED_ON);
    timer_delay_ms(ndot * DOT_MS);
}
static void off_dot(int ndot) {
    mango_actled(LED_OFF);
    timer_delay_ms(ndot * DOT_MS);
}

void mango_abort(void) {
    // Timing from https://en.wikipedia.org/wiki/SOS
    while (1) {  // S-O-S
        on_dot(1); off_dot(1); on_dot(1); off_dot(1); on_dot(1);
        off_dot(3);
        on_dot(3); off_dot(1); on_dot(3); off_dot(1); on_dot(3);
        off_dot(3);
        on_dot(1); off_dot(1); on_dot(1); off_dot(1); on_dot(1);
        off_dot(7);
    }
}

void mango_actled(enum led_state_t s) {
    static const gpio_id_t led = GPIO_PD18;

    gpio_set_output(led);
    if (s == LED_ON)
        gpio_write(led, 1);
    else if (s == LED_OFF)
        gpio_write(led, 0);
    else if (s == LED_TOGGLE)
        gpio_write(led, !gpio_read(led));
}

/*
 * Lookup table to access key information for a PS/2 scan code.
 *
 * Author: Julie Zelenski <zelenski@cs.stanford.edu>
 */

#include "ps2_keys.h"

#define UNUSED { PS2_KEY_NONE, PS2_KEY_NONE }

// For completeness, array lists all keys on the full PS/2 keyboard.
// Refer to the assign5 writeup for spec on which keys are required
// to be implemented by your keyboard driver.

ps2_key_t const ps2_keys[] = {
    /* scan code */
    /* 00 */      UNUSED,
    /* 01 */   { PS2_KEY_F9, PS2_KEY_F9 },
    /* 02 */      UNUSED,
    /* 03 */   { PS2_KEY_F5, PS2_KEY_F5 },
    /* 04 */   { PS2_KEY_F3, PS2_KEY_F3 },
    /* 05 */   { PS2_KEY_F1, PS2_KEY_F1 },
    /* 06 */   { PS2_KEY_F2, PS2_KEY_F2 },
    /* 07 */   { PS2_KEY_F12, PS2_KEY_F12 },
    /* 08 */      UNUSED,
    /* 09 */   { PS2_KEY_F10, PS2_KEY_F10 },
    /* 0A */   { PS2_KEY_F8,  PS2_KEY_F8 },
    /* 0B */   { PS2_KEY_F6,  PS2_KEY_F6 },
    /* 0C */   { PS2_KEY_F4,  PS2_KEY_F4 },
    /* 0D */   { '\t', '\t' },
    /* 0E */   { '`', '~' },
    /* 0F */      UNUSED,
    /* 10 */      UNUSED,
    /* 11 */   { PS2_KEY_ALT, PS2_KEY_ALT },
    /* 12 */   { PS2_KEY_SHIFT, PS2_KEY_SHIFT },
    /* 13 */      UNUSED,
    /* 14 */   { PS2_KEY_CTRL, PS2_KEY_CTRL },
    /* 15 */   { 'q', 'Q' },
    /* 16 */   { '1', '!' },
    /* 17 */      UNUSED,
    /* 18 */      UNUSED,
    /* 19 */      UNUSED,
    /* 1A */   { 'z', 'Z' },
    /* 1B */   { 's', 'S' },
    /* 1C */   { 'a', 'A' },
    /* 1D */   { 'w', 'W' },
    /* 1E */   { '2', '@' },
    /* 1F */      UNUSED,
    /* 20 */      UNUSED,
    /* 21 */   { 'c', 'C' },
    /* 22 */   { 'x', 'X' },
    /* 23 */   { 'd', 'D' },
    /* 24 */   { 'e', 'E' },
    /* 25 */   { '4', '$' },
    /* 26 */   { '3', '#' },
    /* 27 */      UNUSED,
    /* 28 */      UNUSED,
    /* 29 */   { ' ', ' ' },
    /* 2A */   { 'v', 'V' },
    /* 2B */   { 'f', 'F' },
    /* 2C */   { 't', 'T' },
    /* 2D */   { 'r', 'R' },
    /* 2E */   { '5', '%' },
    /* 2F */      UNUSED,
    /* 30 */      UNUSED,
    /* 31 */   { 'n', 'N' },
    /* 32 */   { 'b', 'B' },
    /* 33 */   { 'h', 'H' },
    /* 34 */   { 'g', 'G' },
    /* 35 */   { 'y', 'Y' },
    /* 36 */   { '6', '^' },
    /* 37 */      UNUSED,
    /* 38 */      UNUSED,
    /* 39 */      UNUSED,
    /* 3A */   { 'm', 'M' },
    /* 3B */   { 'j', 'J' },
    /* 3C */   { 'u', 'U' },
    /* 3D */   { '7', '&' },
    /* 3E */   { '8', '*' },
    /* 3F */      UNUSED,
    /* 40 */      UNUSED,
    /* 41 */   { ',', '<' },
    /* 42 */   { 'k', 'K' },
    /* 43 */   { 'i', 'I' },
    /* 44 */   { 'o', 'O' },
    /* 45 */   { '0', ')' },
    /* 46 */   { '9', '(' },
    /* 47 */      UNUSED,
    /* 48 */      UNUSED,
    /* 49 */   { '.', '>' },
    /* 4A */   { '/', '?' },
    /* 4B */   { 'l', 'L' },
    /* 4C */   { ';', ':' },
    /* 4D */   { 'p', 'P' },
    /* 4E */   { '-', '_' },
    /* 4F */      UNUSED,
    /* 50 */      UNUSED,
    /* 51 */      UNUSED,
    /* 52 */   { '\'', '"' },
    /* 53 */      UNUSED,
    /* 54 */   { '[', '{' },
    /* 55 */   { '=', '+' },
    /* 56 */      UNUSED,
    /* 57 */      UNUSED,
    /* 58 */   { PS2_KEY_CAPS_LOCK, PS2_KEY_CAPS_LOCK },
    /* 59 */   { PS2_KEY_SHIFT, PS2_KEY_SHIFT },
    /* 5A */   { '\n', '\n' },
    /* 5B */   { ']', '}' },
    /* 5C */      UNUSED,
    /* 5D */   { '\\', '|' },
    /* 5E */      UNUSED,
    /* 5F */      UNUSED,
    /* 60 */      UNUSED,
    /* 61 */      UNUSED,
    /* 62 */      UNUSED,
    /* 63 */      UNUSED,
    /* 64 */      UNUSED,
    /* 65 */      UNUSED,
    /* 66 */   { '\b', '\b' },  // delete key produces backspace (= ascii 0x08 = \b)
    /* 67 */      UNUSED,
    /* 68 */      UNUSED,
    /* 69 */   { PS2_KEY_END, '1' },
    /* 6A */      UNUSED,
    /* 6B */   { PS2_KEY_ARROW_LEFT, '4' },
    /* 6C */   { PS2_KEY_HOME, '7' },
    /* 6D */      UNUSED,
    /* 6E */      UNUSED,
    /* 6F */      UNUSED,
    /* 70 */   { PS2_KEY_INSERT, '0' },
    /* 71 */   { PS2_KEY_DELETE, '.' },
    /* 72 */   { PS2_KEY_ARROW_DOWN, '2' },
    /* 73 */   { '5', '5' },
    /* 74 */   { PS2_KEY_ARROW_RIGHT, '6' },
    /* 75 */   { PS2_KEY_ARROW_UP, '8' },
    /* 76 */   { PS2_KEY_ESC, PS2_KEY_ESC },
    /* 77 */   { PS2_KEY_NUM_LOCK, PS2_KEY_NUM_LOCK },
    /* 78 */   { PS2_KEY_F11, PS2_KEY_F11 },
    /* 79 */   { '+', '+' },
    /* 7A */   { PS2_KEY_PAGE_DOWN, '3' },
    /* 7B */   { '-', '-' },
    /* 7C */   { '*', '*' },
    /* 7D */   { PS2_KEY_PAGE_UP, '9' },
    /* 7E */   { PS2_KEY_SCROLL_LOCK, 0 },
    /* 7F */      UNUSED,
    /* 80 */      UNUSED,
    /* 81 */      UNUSED,
    /* 82 */      UNUSED,
    /* 83 */   { PS2_KEY_F7, PS2_KEY_F7 },
};

/*
 * Lookup table to access key information for a PS/2 scan code.
 *
 * Author: Philip Levis <pal@cs.stanford.edu>
 * Author: Pat Hanrahan <hanrahan@cs.stanford.edu>
 */

#include "rand.h"

// From http://stackoverflow.com/questions/1167253/implementation-of-rand
unsigned int rand(void) {
    static unsigned int z1 = 12345, z2 = 12345, z3 = 12345, z4 = 12345;
    unsigned int b;

    b  = ((z1 << 6) ^ z1) >> 13;
    z1 = ((z1 & 4294967294U) << 18) ^ b;
    b  = ((z2 << 2) ^ z2) >> 27;
    z2 = ((z2 & 4294967288U) << 2) ^ b;
    b  = ((z3 << 13) ^ z3) >> 21;
    z3 = ((z3 & 4294967280U) << 7) ^ b;
    b  = ((z4 << 3) ^ z4) >> 12;
    z4 = ((z4 & 4294967168U) << 13) ^ b;
    return (z1 ^ z2 ^ z3 ^ z4);
}

/*
 * Implementation of ringbuffer module.
 *
 * The ring buffer data structure allows lock-free concurrent
 * access by one reader and one writer.
 *
 * Author: Philip Levis <pal@cs.stanford.edu>
 *         Julie Zelenski <zelenski@cs.stanford.edu>
 */

#include "ringbuffer.h"
#include "assert.h"
#include "malloc.h"

#define LENGTH 512

/*
 * A ring buffer is represented using a struct containing a fixed-size array
 * and head and tail fields, which are indexes into the entries[] array.
 * head is the index of the frontmost element (head advances during dequeue)
 * tail is the index of the next position to use (tail advances during enqueue)
 * Both head and tail advance circularly, i.e. index = (index + 1) % LENGTH
 * The ring buffer is empty if tail == head
 * The ring buffer is full if tail + 1 == head
 * (Note: one slot remains permanently empty to distinguish full from empty)
 */

struct ringbuffer {
    int entries[LENGTH];
    int head, tail;
};

rb_t *rb_new(void) {
    rb_t *rb = malloc(sizeof(struct ringbuffer));
    assert(rb != NULL);
    rb->head = rb->tail = 0;
    return rb;
}

bool rb_empty(rb_t *rb) {
    assert(rb != NULL);
    return rb->head == rb->tail;
}

bool rb_full(rb_t *rb) {
    assert(rb != NULL);
    return (rb->tail + 1) % LENGTH == rb->head;
}

/*
 * Note: enqueue is called by writer. enqueue advances rb->tail,
 * no changes to rb->head. This design allows safe concurrent access.
 */
bool rb_enqueue(rb_t *rb, int elem) {
    assert(rb != NULL);
    if (rb_full(rb)) {
        return false;
    }

    rb->entries[rb->tail] = elem;
    rb->tail = (rb->tail + 1) % LENGTH;
    return true;
}

/*
 * Note: dequeue is called by reader. dequeue advances rb->head,
 * no changes to rb->tail. This design allows safe concurrent access.
 */
bool rb_dequeue(rb_t *rb, int *p_elem) {
    assert(rb != NULL && p_elem != NULL);
    if (rb_empty(rb)) {
        return false;
    }

    *p_elem = rb->entries[rb->head];
    rb->head = (rb->head + 1) % LENGTH;
    return true;
}

/*
 /* File: start.s
  * -------------
  * These asm instuctions go first in binary image, they will be
  * the first to be executed in a newly loaded program.
  *
  * Author: Julie Zelenski <zelenski@cs.stanford.edu>
  */

.attribute arch, "rv64im_zicsr"

# Identify this section as the one to go first in binary image
.section ".text.start"

.globl _start
_start:
.cfi_startproc
.cfi_undefined ra           # tell gdb this is start routine

    csrc    mstatus, 1<<3   # global disable interrupts, mstatus.mie = 0
    la      t0,_trap_handler
    csrw    mtvec,t0        # install trap handler
.globl _start_gdb
_start_gdb:                 # entry for gdb will skip csr as not avail in sim
    addi    fp,zero,0       # init fp
    la      sp,__stack_top  # init sp (stack grows down)
    jal     _cstart

hang: j hang
    ret
.cfi_endproc

/*
 * Hardware abstractions for a serial port (UART).
 *
 * Author: Julie Zelenski <zelenski@cs.stanford.edu>
 * Last updated: Wed Dec 27 16:28:18 PST 2023
 */

#include "uart.h"
#include <stddef.h>
#include "assert.h"
#include "ccu.h"
#include "gpio.h"
#include "gpio_extra.h"
#include "interrupts.h"

#define LCR_DLAB            (1 << 7)
#define USR_BUSY            (1 << 0)
#define USR_TX_NOT_FULL     (1 << 1)
#define USR_TX_NOT_EMPTY    (1 << 2)
#define USR_RX_NOT_EMPTY    (1 << 3)

/*
 * D1 uart has 6 UART controllers (UART0, UART1, UART2, UART3, UART4, UART5)
 * Compatible with industry-standard 16450/16550
 * peripheral registers similar to rpi
 * TX/RX fifo, can be interrupt-driven
 */

// structs defined to match layout of hardware registers
typedef union {
    struct {
        union {
            uint32_t rbr;   // receive buffer register
            uint32_t thr;   // transmit holding register
            uint32_t dll;   // divisor latch (LSB)
        };
        union {
            uint32_t dlh;   // divisor latch (MSB)
            uint32_t ier;   // interrupt enable register
        };
        union {
            uint32_t iir;   // interrupt identification register
            uint32_t fcr;   // FIFO control register
        };
        uint32_t lcr;       // line control register
        uint32_t mcr;       // modem control register
        uint32_t lsr;       // line status register
        uint32_t reserved[25];
        uint32_t usr;       // busy status, at offset 0x7c
        uint32_t reserved2[9];
        uint32_t halt;      // at offset 0xa4
    } regs;
    uint8_t padding[0x400];
} uart_t;

#define UART_BASE ((uart_t *)0x02500000)
_Static_assert(&(UART_BASE[0].regs.lcr) == (uint32_t *)0x0250000C, "UART0 lcr reg must be at address 0x0250000C");
_Static_assert(&(UART_BASE[1].regs.dlh) == (uint32_t *)0x02500404, "UART1 dlh reg must be at address 0x02500404");

typedef struct {
    int index;
    gpio_id_t tx, rx;
    unsigned int fn;
} uart_config_t;

static struct {
    volatile uart_t *uart_base, *uart;
    uart_config_t config;
} module = { .uart_base = UART_BASE,
             .uart = NULL, // will be set in uart_init
};

// not published for now, used to do testing of alternate uarts
void uart_reinit_custom(int, gpio_id_t, gpio_id_t, unsigned int);

void uart_reinit_custom(int uart_id, gpio_id_t tx, gpio_id_t rx, unsigned int gpio_fn) {
    if (module.uart) {  // shut down previous if active
        uart_flush();
        gpio_set_function(module.config.tx, GPIO_FN_DISABLED); // disconnect gpio
        gpio_set_pullnone(module.config.tx);
        gpio_set_function(module.config.rx, GPIO_FN_DISABLED);
        gpio_set_pullnone(module.config.rx);
        module.uart = NULL;
    }

    module.config.index = uart_id;
    module.config.tx = tx;
    module.config.rx = rx;
    module.config.fn = gpio_fn;
    module.uart = module.uart_base + module.config.index;

    // clock up peripheral
    // gating bits [0:5], reset bits [16:21]
    uint32_t bit = 1 << module.config.index;
    uint32_t reset = bit << 16;
    ccu_ungate_bus_clock_bits(CCU_UART_BGR_REG, bit, reset);

    // configure GPIOs
    gpio_set_function(module.config.tx, module.config.fn);
    gpio_set_pullup(module.config.tx);
    gpio_set_function(module.config.rx, module.config.fn);
    gpio_set_pullup(module.config.rx);

    // configure baud rate
    uint32_t baud = 115200;
    module.uart->regs.fcr = 1;      // enable TX/RX fifo
    module.uart->regs.halt = 1;     // temporarily disable TX transfer

    uint32_t sys_clock_rate = 24 * 1000000;
    uint32_t udiv = sys_clock_rate / (16 * baud);
    module.uart->regs.lcr |= LCR_DLAB;  // set DLAB = 1 to access DLL/DLH
    module.uart->regs.dll = udiv & 0xff;        // low byte of divisor -> DLL
    module.uart->regs.dlh = (udiv >> 8) & 0xff; // hi byte of divisor -> DLH
    module.uart->regs.lcr &= ~LCR_DLAB; // set DLAB = 0 to access RBR/THR
    module.uart->regs.halt = 0;     // re-enable TX transfer

    // configure data-parity-stop (low 4 bits of LCR)
    uint8_t data = 0b11;    // 8 data
    uint8_t parity = 0b0;   // no parity
    uint8_t stop = 0b0;     // 1 stop
    uint8_t settings = (parity << 3) | (stop << 2) | (data << 0);
    // clear low 4 bits, replace with settings 8-n-1
    module.uart->regs.lcr = (module.uart->regs.lcr & ~0b1111) | settings;

    module.uart->regs.mcr = 0;    // disable modem control
    module.uart->regs.ier = 0;    // disable interrupts by default
}

void uart_init(void) {
    static bool initialized = false;
    if (initialized) error("uart_init() should be called only once.");
    initialized = true;
    module.uart = NULL;
    // default to UART0 on pins PB8+9
    uart_reinit_custom(0, GPIO_PB8, GPIO_PB9, GPIO_FN_ALT6);
    uart_putstring("\n\n\n\n");
}

void uart_use_interrupts(handlerfn_t handler, void *client_data) {
    if (module.uart == NULL) error("uart_init() has not been called!\n");
    interrupt_source_t src = INTERRUPT_SOURCE_UART0 + module.config.index;
    interrupts_register_handler(src, handler, client_data); // install handler
    interrupts_enable_source(src);  // turn on source
    module.uart->regs.ier = 1;      // enable interrupts in uart peripheral
}

unsigned char uart_recv(void) {
    if (module.uart == NULL) error("uart_init() has not been called!\n");
    while (!uart_haschar()) ; // wait for char to arrive
    return module.uart->regs.rbr & 0xFF;
}

void uart_send(unsigned char byte) {
    if (module.uart == NULL) error("uart_init() has not been called!\n");
    while ((module.uart->regs.usr & USR_TX_NOT_FULL) == 0) ;
    module.uart->regs.thr = byte & 0xFF;
}

void uart_flush(void) {
    if (module.uart == NULL) error("uart_init() has not been called!\n");
    while ((module.uart->regs.usr & USR_BUSY) != 0) ;
}

bool uart_haschar(void) {
    if (module.uart == NULL) error("uart_init() has not been called!\n");
    return (module.uart->regs.usr & USR_RX_NOT_EMPTY) != 0;
}

// RE: line endings
// canonial use is '\n' newline as sole line terminator (both read/write)
// but connected terminal may expect to receive a CR-LF sequence from Pi
// and may send a CR to Pi for return/enter key. uart_getchar and uart_putchar
// internally convert chars, client can simply send/receive newline
// Use uart_send/uart_recv to send/receive raw byte, no conversion

int uart_getchar(void) {
    if (module.uart == NULL) error("uart_init() has not been called!\n");
    int ch = uart_recv();
    if (ch == '\r') {
        return '\n';    // convert CR to newline
    }
    return ch;
}

int uart_putchar(int ch) {
    if (module.uart == NULL) error("uart_init() has not been called!\n");
    // convert newline to CR LF sequence by inserting CR
    if (ch == '\n') {
        uart_send('\r');
    }
    uart_send(ch);
    return ch;
}

int uart_putstring(const char *str) {
    if (module.uart == NULL) error("uart_init() has not been called!\n");
    int n = 0;
    while (str[n]) {
        uart_putchar(str[n++]);
    }
    return n;
}

void uart_start_error(void) {
    if (module.uart == NULL) {
        // if uart_init has not been called, there is no serial connection to read/write
        // All calls to uart operations are dead ends (that means no printf/assert!)
        // Force a call to uart_init here to enable reporting of problem
        // (otherwise lack of output is ultra mysterious)
        uart_reinit_custom(0, GPIO_PB8, GPIO_PB9, GPIO_FN_ALT6);
    }
    uart_putstring("\033[31;1m"); // red-bold
}

void uart_end_error(void) {
    uart_putstring("\033[0m\n"); // normal
}