Uses the RP2040's Programmable IO to create a PAL colour video signal. It consists of the PIO program, some "driver" and tools code, a test program and a program for creating colour Look-up Tables (LUTs).
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
rppico-pio-pal/PAL3.pio

199 lines
6.8 KiB

; PIO assembly code for PAL signal generation
; expected input sequence:
; For vertical blank, a 32 bit word is expected for each snchronization symbol,
; containing the length of the sync pulse in the upper 16 bits and the length
; of the black pulse in the lower 16 bits.
; For each line, the complete data to be put out from the DAC is required starting
; from the colour burst. The first word contains the counter value for inserting a
; shortened sample. Each sample is output for four clock cycles, which leads to
; a phase shift of about -PI until the end of a line due to the mismatch between
; the frequency of the carrier generated by the RP2040 and the actual PAL carrier.
; Inserting a shortened sample, lasting only three clock cycles, every 15 clock
; cycles compensates for this.
; each clock cycle lasts about 7.062 ns when the processor is clocked at 141.6 MHz
.program pio_PAL_DAC
; some constants to ease exchanging different implementations
.define PUBLIC bits 4
.define PUBLIC samples 8
.define PUBLIC colourBursts 10
; check whether a vertical sync sequence has to be started
; uP has to deassert sync pin in order to start a sync sequence.
.wrap_target
vBlankSelect:
; set pins to sync level (that's what every line starts with, so it can safely
; be done here)
set PINS, 0 ; 0
; assert line interrupt
irq 0 ; 1
; go to either vertical sync or line drawing code
jmp PIN drawLine ; 2
; output synchronization symbols
syncSymbol:
; For every symbol, one word is read to X register used for two loops.
; The upper 16 bits ([31:16]) determine the number of cycles to hold the output
; at sync level, the remaining 16 bits ([15:0]) the number of cycles
; to hold it at black level. Programmers need to ensure this sums up
; correctly.
; run an initial 2.35 us delay loop to overlap interrupt processing time.
; This is always valid since the sync pulse lasts at least 2.35 us.
set X, 17 [12] ; 3
; delay loop
loLoop2us:
jmp X-- loLoop2us [15] ; 4
; fetch sync delay counter
out X, 16 [15] ; 5
; each delay loop iteration lasts 16 clock cycles.
loLoop:
jmp X-- loLoop [15] ; 6
; set output to black level
set PINS, 4 [15] ; 7
; load black level delay counter
out X, 16 ; 8
; run black level delay loop, here every cycle lasts just one clock cycle
; in order to better match symbol durations
hiLoop:
jmp X-- hiLoop ; 9
; set output to sync level and process the next sync symbol (if there is any),
; otherwise start drawing lines
set PINS, 0
; once all data from the FIFO is consumed sync is over. Delay by two clock
; cycles to compensate for set + irq + jmp
jmp !OSRE syncSymbol [1] ; 10
irq 0 ; 11
drawLine:
; each line must last 283.75 colour carrier cycles - that's about 9062 clock cycles.
; they're distributed as follows:
; irq + jmp: 2
; sync: 672
; back porch0: 138 (0.974556 us) WAS: 122 (0.8545 us)
; colour burst: 320 (10 carrier cycles)
; back porch1: 351 (11 carrier cycles, one of them 31 clock cycles)
; pixel data: 230*32 - 15 = 7345 (230 carrier cycles, 15 of them 31 clock cycles)
; front porch: 234 (should be 1.65us, is 1.652)
; further requirements:
; - sync ~ 4.7 us (665.53 clock cycles), back porch + colour burst ~ 5.7us (807.14 cc)
; - back porch1 delay must be an integer multiple of 32, otherwise colour carrier
; gets out of phase
; sync pulse
; set counter register. We need 666 clock cycles and will get 672 when using
; loops with a delay of 61 clock cycles.
; changed to 30 cycle delay, so 22 repetitions give a delay of 660 cc,
; adding 12 from set.
set X, 21 [11] ; 11
syncLoop:
; output sync value
jmp X-- syncLoop [29] ; 12
; back porch part 0
; should be 900ns which is about 128 clock cycles. To make everything sum up
; correctly we have to use 138 cycles.
; set pin values, following instructions are used to create delay for the
; initial part of back porch (3 + 4 + 4 * 32 + 3 = 138)
set PINS, 4 [2] ; 13
set X, 3 [3] ; 14
backporch0:
jmp X--, backporch0 [31] ; 15
; colour burst and remaining back porch are now sent as part of the pixel
; data stream. Counter is set to 118 (in Arm code, read from first word)
; to have one colour carrier cycle less every 120 samples (15 carrier cycles)
; read counter value into Y so we can always reload it from there
out Y, 32 ; 16
; load X for the first time
mov X, Y ; 17
; delay loop, output samples at a four clock cycle rate until X counts down
; to zero. Then execution goes to the delay3 branch which reloads X and
; keeps the current sample for only three clock cycles before outputting
; the next one.
ccLoop:
jmp X--, delay4 ; 18
delay3:
out PINS, 4 [1] ; 19
mov X, Y ; 20
delay4:
out PINS, 4 ; 21
; run this loop until there's no more data in the FIFO, so the line has been
; drawn completely.
jmp !OSRE, ccLoop [1] ; 22
; add one nop to have the last sample for four clock cycles
nop ; 23
; front porch, set black level so signal can settle
set PINS, 4 ; 24
; front porch; the delays below (set takes 28 cycles, each jmp takes 29)
; we get 232 clock cycles with 8 ops (need 7 loop iterations), plus two
; from the sets gives 234 cycles.
; set counter register
set X, 7 ; 25
fpLoop:
; output black value
jmp X-- fpLoop [28] ; 26
; 5 instructions left
.wrap
; here is some embedded configuration code so the driver code can be more generic
% c-sdk {
static inline void palProgramInit(PIO pio, uint sm, uint offset, uint dacPins, uint vBlankPin) {
// initialize GPIO pins
for(uint pin = 0; pin < pio_PAL_DAC_bits; ++pin)
pio_gpio_init(pio, dacPins + pin);
pio_sm_config c = pio_PAL_DAC_program_get_default_config(offset);
// output & set pins (both are needed as the PIO program uses SET for some known values)
sm_config_set_out_pins(&c, dacPins, pio_PAL_DAC_bits);
sm_config_set_set_pins(&c, dacPins, pio_PAL_DAC_bits);
pio_sm_set_consecutive_pindirs(pio, sm, dacPins, pio_PAL_DAC_bits, true);
// set up jump pin which is used to tell the SM whether it's in vBlank or not and
// how data is to be interpreted.
sm_config_set_jmp_pin(&c, vBlankPin);
// join FIFOs as we only need the TX FIFO
sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_TX);
// clock divider needs to be set to 1
sm_config_set_clkdiv_int_frac(&c, 1, 0);
// configure output shift register; shift left, auto pull enabled, pull threshold
sm_config_set_out_shift(&c, false, true, pio_PAL_DAC_bits * pio_PAL_DAC_samples);
// we also need to configure the input shift register which is used to generate counter values
// > 32; for this to work intuitively, the register has to shift left. Auto push is disabled.
// sm_config_set_in_shift(&c, false, false, 32);
// initialize SM
pio_sm_init(pio, sm, offset, &c);
// the PIO program isn't started here, that's done by analogVideoStart().
}
%}