Video Coprocessor (VCP)

Overview

The Video Coprocessor (VCP) is a programmable hardware unit integrated within the Sandpiper VPU. It allows for custom vector processing operations that can execute in parallel with video scanout, making it ideal for effects like palette cycling, raster effects, and procedural graphics.

VCP programs are written using a custom instruction set and uploaded to the coprocessor for execution. The VCP has its own registers and can perform arithmetic, logic, palette manipulation, and conditional branching operations.

Data Structures

Buffer Size Enumeration

enum EVCPBufferSize {
    PRG_128Bytes  = 0,  // 32 instructions
    PRG_256Bytes  = 1,  // 64 instructions
    PRG_512Bytes  = 2,  // 128 instructions
    PRG_1024Bytes = 3,  // 256 instructions
    PRG_2048Bytes = 4,  // 512 instructions
    PRG_4096Bytes = 5   // 1024 instructions
};

Core Functions

VCPUploadProgram

void VCPUploadProgram(struct SPPlatform *ctx, const uint32_t* program, enum EVCPBufferSize size);

Uploads a VCP program to the coprocessor memory.

Parameters:

Notes: The program must be properly formatted using the VCP instruction set macros. The coprocessor must be stopped before uploading a new program.

VCPExecProgram

void VCPExecProgram(struct SPPlatform *ctx, const uint8_t execFlags);

Starts execution of the uploaded VCP program.

Parameters:

Notes: The execFlags parameter controls program execution behavior. Common values: 0x01 for standard execution.

VCPStatus

uint32_t VCPStatus(struct SPPlatform *ctx);

Queries the current status of the VCP.

Parameters:

Returns: 32-bit status word containing execution state, program counter, and error flags

Status Word Format:

Bits [3:0]   - Error code
Bits [7:4]   - Current register
Bits [19:8]  - Program counter
Bits [21:20] - Program size (PRG)
Bits [22]    - Full flag (F)
Bits [23]    - Current flag (C)
Bits [27:24] - Output code

VCP Instruction Set

Command Codes

#define VCPSETBUFFERSIZE  0x0  // Set program buffer size
#define VCPSTARTDMA       0x1  // Start DMA operation
#define VCPEXEC           0x2  // Execute program

Instruction Opcodes

#define VCP_NOOP          0x00  // No operation
#define VCP_LOADIMM       0x01  // Load immediate value
#define VCP_PALWRITE      0x02  // Write to palette
#define VCP_WAITSCANLINE  0x03  // Wait for scanline
#define VCP_WAITPIXEL     0x04  // Wait for pixel position
#define VCP_MATHOP        0x05  // Math operation
#define VCP_JUMP          0x06  // Unconditional jump
#define VCP_CMP           0x07  // Compare
#define VCP_BRANCH        0x08  // Conditional branch
#define VCP_STORE         0x09  // Store to memory
#define VCP_LOAD          0x0A  // Load from memory
#define VCP_READSCANINFO  0x0B  // Read scan position
#define VCP_LOADPC        0x0C  // Load program counter
#define VCP_LOGICOP       0x0D  // Logic operation

Register Definitions

#define VREG_ZERO   0x0   // Constant zero
#define VREG_1      0x1   // General purpose register 1
#define VREG_2      0x2   // General purpose register 2
#define VREG_3      0x3   // General purpose register 3
#define VREG_4      0x4   // General purpose register 4
#define VREG_5      0x5   // General purpose register 5
#define VREG_6      0x6   // General purpose register 6
#define VREG_7      0x7   // General purpose register 7
#define VREG_8      0x8   // General purpose register 8
#define VREG_9      0x9   // General purpose register 9
#define VREG_A      0xA   // General purpose register A
#define VREG_B      0xB   // General purpose register B
#define VREG_C      0xC   // General purpose register C
#define VREG_D      0xD   // General purpose register D
#define VREG_E      0xE   // General purpose register E
#define VREG_F      0xF   // General purpose register F

Condition Codes

#define COND_INV   0x08  // Invert condition (OR with values below)
#define COND_LE    0x01  // Less than or equal (or GT if inverted)
#define COND_LT    0x02  // Less than (or GE if inverted)
#define COND_EQ    0x04  // Equal (or NE if inverted)
#define COND_GT    (COND_LE | COND_INV)
#define COND_GE    (COND_LT | COND_INV)
#define COND_NE    (COND_EQ | COND_INV)

Instruction Macros

Instruction Macros

// Data Movement
vcp_noop()                         // No operation
vcp_ldim(dest, immed)              // Load 24-bit immediate
vcp_mv(dest, src)                  // Move register
vcp_clr(dest)                      // Clear register (set to zero)

// Arithmetic
vcp_radd(dest, src1, src2)         // dest = src1 + src2
vcp_rsub(dest, src1, src2)         // dest = src1 - src2
vcp_rinc(dest, src1)               // dest = src1 + 1
vcp_rdec(dest, src1)               // dest = src1 - 1

// Logical
vcp_rand(dest, src1, src2)         // dest = src1 & src2
vcp_ror(dest, src1, src2)          // dest = src1 | src2
vcp_rxor(dest, src1, src2)         // dest = src1 ^ src2
vcp_rneg(dest, src1)               // dest = ~src1
vcp_rshl(dest, src1, src2)         // dest = src1 << src2
vcp_rshr(dest, src1, src2)         // dest = src1 >> src2
vcp_rasr(dest, src1, src2)         // dest = src1 >>> src2 (arithmetic)

// Control Flow
vcp_cmp(cond, src1, src2)          // Compare and set condition flag
vcp_jump(addrs)                    // Jump to address in register
vcp_jumpim(offset)                 // Jump PC-relative (signed 16-bit)
vcp_branch(addrs)                  // Branch to address if condition true
vcp_branchim(offset)               // Branch PC-relative if condition true

// Memory Operations
vcp_store(addrs, src)              // Store register to memory
vcp_load(addrs, dest)              // Load from memory to register

// VPU Interaction
vcp_pwrt(addrs, src)               // Write to palette[addrs] = src
vcp_wscn(line)                     // Wait for scanline
vcp_wpix(pixel)                    // Wait for pixel position
vcp_scanline_read(dest)            // Read current scanline to register
vcp_scanpixel_read(dest)           // Read current pixel position
vcp_rcmp(dest)                     // Read compare flag
vcp_rctl(dest)                     // Read VPU control register

Example Usage

Simple Palette Cycling Program

#include "vcp.h"

// Palette cycling program (from vcpdemo sample)
uint32_t vcpprogram[32] = {
  vcp_ldim(VREG_1, 0x000000),              // scrolloffset = 0
  vcp_ldim(VREG_2, 0x0000FF),              // mask = 0xFF
  vcp_ldim(VREG_3, 640),                   // endofline = 640
  vcp_ldim(VREG_8, 0x000002),              // scrollspeed = 2
  vcp_ldim(VREG_9, 0x000003),              // shift3 = 3
  vcp_ldim(VREG_4, 0x000006),              // shift6 = 6
  vcp_ldim(VREG_5, 0x000008),              // shift8 = 8
  vcp_ldim(VREG_C, 0x000010),              // shift16 = 16
  vcp_ldim(VREG_B, 0x000080),              // stopline = 128
  vcp_radd(VREG_1, VREG_1, VREG_8),        // scrolloffset += scrollspeed
    
  // Loop: wait for pixel, update palette
  vcp_wpix(VREG_3),                        // wait for endofline
  vcp_scanline_read(VREG_6),               // scanline = $videoscanline
  vcp_cmp(COND_EQ, VREG_6, VREG_B),        // scanline == 128?
  vcp_branchim(0x34),                      // branch.eq idle
    
  vcp_radd(VREG_7, VREG_6, VREG_1),        // t = scanline + scrolloffset
  vcp_rand(VREG_A, VREG_7, VREG_2),        // b = (t >> 0) & 0xFF
  vcp_rshl(VREG_D, VREG_7, VREG_9),        // g = (t << 3) & 0xFF
  vcp_rand(VREG_D, VREG_D, VREG_2),
  vcp_rshl(VREG_D, VREG_D, VREG_5),        // g <<= 8
  vcp_ror(VREG_A, VREG_A, VREG_D),         // color |= g
  vcp_rshl(VREG_D, VREG_7, VREG_4),        // r = (t << 6) & 0xFF
  vcp_rand(VREG_D, VREG_D, VREG_2),
  vcp_rshl(VREG_D, VREG_D, VREG_C),        // r <<= 16
  vcp_ror(VREG_A, VREG_A, VREG_D),         // color |= r
  vcp_pwrt(VREG_ZERO, VREG_A),             // PAL[0] = color
  vcp_jumpim(-0x3C),                       // jmp loop
};

// Upload and execute
VCPUploadProgram(platform, vcpprogram, PRG_128Bytes);
VCPExecProgram(platform, 0x1);

// Check status
uint32_t status = VCPStatus(platform);
printf("VCP Status: 0x%08X\n", status);

Programming Guidelines

Best Practices

Common Patterns

Performance Considerations

Related Documentation