/ software / apps / colour_terminal / tmds_encode_font_2bpp.S
tmds_encode_font_2bpp.S
  1  #include "hardware/regs/addressmap.h"
  2  #include "hardware/regs/sio.h"
  3  
  4  .syntax unified
  5  .cpu cortex-m0plus
  6  .thumb
  7  
  8  // Using the following:
  9  //
 10  // - A font stored as a 1bpp bitmap, with row 0 of each character stored in
 11  //   one contiguous array, then row 1, etc, where each character is 8 bits
 12  //   wide
 13  //
 14  // - A character buffer
 15  //
 16  // - A colour buffer for each of R, G, B (so 3 planes total), each buffer
 17  //   storing a 2-bit foreground and background colour for each character
 18  //
 19  // Generate encoded TMDS buffers, fast enough to fit all the encode on one
 20  // core, and with small memory footprint (so no framebuffer of any depth!) The
 21  // main trick here is a LUT with an 8 bit index, composed of 4x1bpp pixels
 22  // (the 4 LSBs of the index) and a 2x2-bit palette (the four MSBs of the
 23  // index). Each LUT entry is 4 TMDS symbols, so 2 32-bit words, giving a total
 24  // table size of 2 kB.
 25  
 26  // Offsets suitable for ldr/str (must be <= 0x7c):
 27  #define ACCUM0_OFFS     (SIO_INTERP0_ACCUM0_OFFSET     - SIO_INTERP0_ACCUM0_OFFSET)
 28  #define ACCUM1_OFFS     (SIO_INTERP0_ACCUM1_OFFSET     - SIO_INTERP0_ACCUM0_OFFSET)
 29  #define ACCUM1_ADD_OFFS (SIO_INTERP0_ACCUM1_ADD_OFFSET - SIO_INTERP0_ACCUM0_OFFSET)
 30  #define PEEK0_OFFS      (SIO_INTERP0_PEEK_LANE0_OFFSET - SIO_INTERP0_ACCUM0_OFFSET)
 31  #define PEEK1_OFFS      (SIO_INTERP0_PEEK_LANE1_OFFSET - SIO_INTERP0_ACCUM0_OFFSET)
 32  #define PEEK2_OFFS      (SIO_INTERP0_PEEK_FULL_OFFSET  - SIO_INTERP0_ACCUM0_OFFSET)
 33  #define INTERP1         (SIO_INTERP1_ACCUM0_OFFSET     - SIO_INTERP0_ACCUM0_OFFSET)
 34  
 35  // There is no vertical repeat, so the the budget (ignoring DMA IRQs) is 8000
 36  // cycles per 640 pixels, and there are three symbols to be generated per
 37  // pixel, so 4.17 cyc/pix.
 38  
 39  
 40  // Once in the loop:
 41  // r0 contains character buffer pointer (only updated once per 8 chars)
 42  // r1 contains 8 2-colour 2bpp palettes, enough for 8 characters
 43  // r2 contains output buffer pointer
 44  // r3 contains a mask for the colour lookup bits
 45  // r4-r7 are for scratch + pixels
 46  // r8 contains a pointer to the font bitmap for this scanline.
 47  // r9 contains the TMDS LUT base.
 48  .macro do_char charbuf_offs colour_shift_instr colour_shamt
 49  	// Get 8x font bits for next character, put 4 LSBs in bits 6:3 of r4 (so
 50  	// scaled to 8-byte LUT entries), and 4 MSBs in bits 6:3 of r6.
 51  	ldrb r4, [r0, #\charbuf_offs]                                     // 2
 52  	add r4, r8                                                        // 1
 53  	ldrb r4, [r4]                                                     // 2
 54  	lsrs r6, r4, #4                                                   // 1
 55  	lsls r6, #3                                                       // 1
 56  	lsls r4, #28                                                      // 1
 57  	lsrs r4, #25                                                      // 1
 58  
 59  	// Get colour bits, add to TMDS LUT base and font bits
 60  	\colour_shift_instr r5, r1, #\colour_shamt                        // 1
 61  	ands r5, r3                                                       // 1
 62  	add r5, r9                                                        // 1
 63  	add r4, r5                                                        // 1
 64  	add r6, r5                                                        // 1
 65  
 66  	// Look up and write out 8 TMDS symbols
 67  	ldmia r4, {r4, r5}                                                // 3
 68  	ldmia r6, {r6, r7}                                                // 3
 69  	stmia r2!, {r4-r7}                                                // 5
 70  .endm
 71  
 72  
 73  // r0 is character buffer
 74  // r1 is colour buffer
 75  // r2 is output TMDS buffer
 76  // r3 is pixel count
 77  // First stack argument is the font bitmap for this scanline.
 78  
 79  .section .scratch_x.tmds_encode_font_2bpp, "ax"
 80  .global tmds_encode_font_2bpp
 81  .type tmds_encode_font_2bpp,%function
 82  .thumb_func
 83  tmds_encode_font_2bpp:
 84  	push {r4-r7, lr}
 85  	mov r4, r8
 86  	mov r5, r9
 87  	mov r6, r10
 88  	push {r4-r6}
 89  
 90  	lsls r3, #1
 91  	add r3, r2
 92  	mov ip, r3
 93  	ldr r3, =(0xf0 * 8)
 94  
 95  	ldr r7, [sp, #32] // 8 words saved, so 32-byte offset to first stack argument
 96  	mov r8, r7
 97  	ldr r7, =palettised_1bpp_tables
 98  	mov r9, r7
 99  
100  	mov r10, r1
101  
102  	b 2f
103  1:
104  	mov r4, r10
105  	ldmia r4!, {r1}
106  	mov r10, r4
107  	do_char 0 lsls 7
108  	do_char 1 lsls 3
109  	do_char 2 lsrs 1
110  	do_char 3 lsrs 5
111  	do_char 4 lsrs 9
112  	do_char 5 lsrs 13
113  	do_char 6 lsrs 17
114  	do_char 7 lsrs 21
115  	adds r0, #8
116  2:
117  	cmp r2, ip
118  	blo 1b
119  
120  	pop {r4-r6}
121  	mov r8, r4
122  	mov r9, r5
123  	mov r10, r6
124  	pop {r4-r7, pc}
125  
126  
127  // Table generation:
128  //	levels_2bpp_even = [0x05, 0x50, 0xaf, 0xfa]
129  //	levels_2bpp_odd  = [0x04, 0x51, 0xae, 0xfb]
130  //	
131  //	def level(bg, fg, x, pix):
132  //		index = fg if pix & 1 << x else bg
133  //		return (levels_2bpp_odd if x & 1 else levels_2bpp_even)[index]
134  //	
135  //	for background in range(4):
136  //		for foreground in range(4):
137  //			print(f"// background, foreground = {background:02b}, {foreground:02b}")
138  //			for pixrun in range(16):
139  //				sym = list(enc.encode(level(background, foreground, x, pixrun), 0, 1) for x in range(4))
140  //				assert(enc.imbalance == 0)
141  //				print(f".word 0x{sym[1] << 10 | sym[0]:05x}, 0x{sym[3] << 10 | sym[2]:05x} // {pixrun:04b}")
142  
143  .section .scratch_x.palettised_1bpp_tables, "a"
144  .align 2
145  palettised_1bpp_tables:
146  	// background, foreground = 00, 00
147  	.word 0x7f103, 0x7f103 // 0000
148  	.word 0x7f103, 0x7f103 // 0001
149  	.word 0x7f103, 0x7f103 // 0010
150  	.word 0x7f103, 0x7f103 // 0011
151  	.word 0x7f103, 0x7f103 // 0100
152  	.word 0x7f103, 0x7f103 // 0101
153  	.word 0x7f103, 0x7f103 // 0110
154  	.word 0x7f103, 0x7f103 // 0111
155  	.word 0x7f103, 0x7f103 // 1000
156  	.word 0x7f103, 0x7f103 // 1001
157  	.word 0x7f103, 0x7f103 // 1010
158  	.word 0x7f103, 0x7f103 // 1011
159  	.word 0x7f103, 0x7f103 // 1100
160  	.word 0x7f103, 0x7f103 // 1101
161  	.word 0x7f103, 0x7f103 // 1110
162  	.word 0x7f103, 0x7f103 // 1111
163  	// background, foreground = 00, 01
164  	.word 0x7f103, 0x7f103 // 0000
165  	.word 0x7f130, 0x7f103 // 0001
166  	.word 0x73d03, 0x7f103 // 0010
167  	.word 0x73d30, 0x7f103 // 0011
168  	.word 0x7f103, 0x7f130 // 0100
169  	.word 0x7f130, 0x7f130 // 0101
170  	.word 0x73d03, 0x7f130 // 0110
171  	.word 0x73d30, 0x7f130 // 0111
172  	.word 0x7f103, 0x73d03 // 1000
173  	.word 0x7f130, 0x73d03 // 1001
174  	.word 0x73d03, 0x73d03 // 1010
175  	.word 0x73d30, 0x73d03 // 1011
176  	.word 0x7f103, 0x73d30 // 1100
177  	.word 0x7f130, 0x73d30 // 1101
178  	.word 0x73d03, 0x73d30 // 1110
179  	.word 0x73d30, 0x73d30 // 1111
180  	// background, foreground = 00, 10
181  	.word 0x7f103, 0x7f103 // 0000
182  	.word 0x7f230, 0x7f103 // 0001
183  	.word 0xb3d03, 0x7f103 // 0010
184  	.word 0xb3e30, 0x7f103 // 0011
185  	.word 0x7f103, 0x7f230 // 0100
186  	.word 0x7f230, 0x7f230 // 0101
187  	.word 0xb3d03, 0x7f230 // 0110
188  	.word 0xb3e30, 0x7f230 // 0111
189  	.word 0x7f103, 0xb3d03 // 1000
190  	.word 0x7f230, 0xb3d03 // 1001
191  	.word 0xb3d03, 0xb3d03 // 1010
192  	.word 0xb3e30, 0xb3d03 // 1011
193  	.word 0x7f103, 0xb3e30 // 1100
194  	.word 0x7f230, 0xb3e30 // 1101
195  	.word 0xb3d03, 0xb3e30 // 1110
196  	.word 0xb3e30, 0xb3e30 // 1111
197  	// background, foreground = 00, 11
198  	.word 0x7f103, 0x7f103 // 0000
199  	.word 0x7f203, 0x7f103 // 0001
200  	.word 0xbf103, 0x7f103 // 0010
201  	.word 0xbf203, 0x7f103 // 0011
202  	.word 0x7f103, 0x7f203 // 0100
203  	.word 0x7f203, 0x7f203 // 0101
204  	.word 0xbf103, 0x7f203 // 0110
205  	.word 0xbf203, 0x7f203 // 0111
206  	.word 0x7f103, 0xbf103 // 1000
207  	.word 0x7f203, 0xbf103 // 1001
208  	.word 0xbf103, 0xbf103 // 1010
209  	.word 0xbf203, 0xbf103 // 1011
210  	.word 0x7f103, 0xbf203 // 1100
211  	.word 0x7f203, 0xbf203 // 1101
212  	.word 0xbf103, 0xbf203 // 1110
213  	.word 0xbf203, 0xbf203 // 1111
214  	// background, foreground = 01, 00
215  	.word 0x73d30, 0x73d30 // 0000
216  	.word 0x73d03, 0x73d30 // 0001
217  	.word 0x7f130, 0x73d30 // 0010
218  	.word 0x7f103, 0x73d30 // 0011
219  	.word 0x73d30, 0x73d03 // 0100
220  	.word 0x73d03, 0x73d03 // 0101
221  	.word 0x7f130, 0x73d03 // 0110
222  	.word 0x7f103, 0x73d03 // 0111
223  	.word 0x73d30, 0x7f130 // 1000
224  	.word 0x73d03, 0x7f130 // 1001
225  	.word 0x7f130, 0x7f130 // 1010
226  	.word 0x7f103, 0x7f130 // 1011
227  	.word 0x73d30, 0x7f103 // 1100
228  	.word 0x73d03, 0x7f103 // 1101
229  	.word 0x7f130, 0x7f103 // 1110
230  	.word 0x7f103, 0x7f103 // 1111
231  	// background, foreground = 01, 01
232  	.word 0x73d30, 0x73d30 // 0000
233  	.word 0x73d30, 0x73d30 // 0001
234  	.word 0x73d30, 0x73d30 // 0010
235  	.word 0x73d30, 0x73d30 // 0011
236  	.word 0x73d30, 0x73d30 // 0100
237  	.word 0x73d30, 0x73d30 // 0101
238  	.word 0x73d30, 0x73d30 // 0110
239  	.word 0x73d30, 0x73d30 // 0111
240  	.word 0x73d30, 0x73d30 // 1000
241  	.word 0x73d30, 0x73d30 // 1001
242  	.word 0x73d30, 0x73d30 // 1010
243  	.word 0x73d30, 0x73d30 // 1011
244  	.word 0x73d30, 0x73d30 // 1100
245  	.word 0x73d30, 0x73d30 // 1101
246  	.word 0x73d30, 0x73d30 // 1110
247  	.word 0x73d30, 0x73d30 // 1111
248  	// background, foreground = 01, 10
249  	.word 0x73d30, 0x73d30 // 0000
250  	.word 0x73e30, 0x73d30 // 0001
251  	.word 0xb3d30, 0x73d30 // 0010
252  	.word 0xb3e30, 0x73d30 // 0011
253  	.word 0x73d30, 0x73e30 // 0100
254  	.word 0x73e30, 0x73e30 // 0101
255  	.word 0xb3d30, 0x73e30 // 0110
256  	.word 0xb3e30, 0x73e30 // 0111
257  	.word 0x73d30, 0xb3d30 // 1000
258  	.word 0x73e30, 0xb3d30 // 1001
259  	.word 0xb3d30, 0xb3d30 // 1010
260  	.word 0xb3e30, 0xb3d30 // 1011
261  	.word 0x73d30, 0xb3e30 // 1100
262  	.word 0x73e30, 0xb3e30 // 1101
263  	.word 0xb3d30, 0xb3e30 // 1110
264  	.word 0xb3e30, 0xb3e30 // 1111
265  	// background, foreground = 01, 11
266  	.word 0x73d30, 0x73d30 // 0000
267  	.word 0x73e03, 0x73d30 // 0001
268  	.word 0xbf130, 0x73d30 // 0010
269  	.word 0xbf203, 0x73d30 // 0011
270  	.word 0x73d30, 0x73e03 // 0100
271  	.word 0x73e03, 0x73e03 // 0101
272  	.word 0xbf130, 0x73e03 // 0110
273  	.word 0xbf203, 0x73e03 // 0111
274  	.word 0x73d30, 0xbf130 // 1000
275  	.word 0x73e03, 0xbf130 // 1001
276  	.word 0xbf130, 0xbf130 // 1010
277  	.word 0xbf203, 0xbf130 // 1011
278  	.word 0x73d30, 0xbf203 // 1100
279  	.word 0x73e03, 0xbf203 // 1101
280  	.word 0xbf130, 0xbf203 // 1110
281  	.word 0xbf203, 0xbf203 // 1111
282  	// background, foreground = 10, 00
283  	.word 0xb3e30, 0xb3e30 // 0000
284  	.word 0xb3d03, 0xb3e30 // 0001
285  	.word 0x7f230, 0xb3e30 // 0010
286  	.word 0x7f103, 0xb3e30 // 0011
287  	.word 0xb3e30, 0xb3d03 // 0100
288  	.word 0xb3d03, 0xb3d03 // 0101
289  	.word 0x7f230, 0xb3d03 // 0110
290  	.word 0x7f103, 0xb3d03 // 0111
291  	.word 0xb3e30, 0x7f230 // 1000
292  	.word 0xb3d03, 0x7f230 // 1001
293  	.word 0x7f230, 0x7f230 // 1010
294  	.word 0x7f103, 0x7f230 // 1011
295  	.word 0xb3e30, 0x7f103 // 1100
296  	.word 0xb3d03, 0x7f103 // 1101
297  	.word 0x7f230, 0x7f103 // 1110
298  	.word 0x7f103, 0x7f103 // 1111
299  	// background, foreground = 10, 01
300  	.word 0xb3e30, 0xb3e30 // 0000
301  	.word 0xb3d30, 0xb3e30 // 0001
302  	.word 0x73e30, 0xb3e30 // 0010
303  	.word 0x73d30, 0xb3e30 // 0011
304  	.word 0xb3e30, 0xb3d30 // 0100
305  	.word 0xb3d30, 0xb3d30 // 0101
306  	.word 0x73e30, 0xb3d30 // 0110
307  	.word 0x73d30, 0xb3d30 // 0111
308  	.word 0xb3e30, 0x73e30 // 1000
309  	.word 0xb3d30, 0x73e30 // 1001
310  	.word 0x73e30, 0x73e30 // 1010
311  	.word 0x73d30, 0x73e30 // 1011
312  	.word 0xb3e30, 0x73d30 // 1100
313  	.word 0xb3d30, 0x73d30 // 1101
314  	.word 0x73e30, 0x73d30 // 1110
315  	.word 0x73d30, 0x73d30 // 1111
316  	// background, foreground = 10, 10
317  	.word 0xb3e30, 0xb3e30 // 0000
318  	.word 0xb3e30, 0xb3e30 // 0001
319  	.word 0xb3e30, 0xb3e30 // 0010
320  	.word 0xb3e30, 0xb3e30 // 0011
321  	.word 0xb3e30, 0xb3e30 // 0100
322  	.word 0xb3e30, 0xb3e30 // 0101
323  	.word 0xb3e30, 0xb3e30 // 0110
324  	.word 0xb3e30, 0xb3e30 // 0111
325  	.word 0xb3e30, 0xb3e30 // 1000
326  	.word 0xb3e30, 0xb3e30 // 1001
327  	.word 0xb3e30, 0xb3e30 // 1010
328  	.word 0xb3e30, 0xb3e30 // 1011
329  	.word 0xb3e30, 0xb3e30 // 1100
330  	.word 0xb3e30, 0xb3e30 // 1101
331  	.word 0xb3e30, 0xb3e30 // 1110
332  	.word 0xb3e30, 0xb3e30 // 1111
333  	// background, foreground = 10, 11
334  	.word 0xb3e30, 0xb3e30 // 0000
335  	.word 0xb3e03, 0xb3e30 // 0001
336  	.word 0xbf230, 0xb3e30 // 0010
337  	.word 0xbf203, 0xb3e30 // 0011
338  	.word 0xb3e30, 0xb3e03 // 0100
339  	.word 0xb3e03, 0xb3e03 // 0101
340  	.word 0xbf230, 0xb3e03 // 0110
341  	.word 0xbf203, 0xb3e03 // 0111
342  	.word 0xb3e30, 0xbf230 // 1000
343  	.word 0xb3e03, 0xbf230 // 1001
344  	.word 0xbf230, 0xbf230 // 1010
345  	.word 0xbf203, 0xbf230 // 1011
346  	.word 0xb3e30, 0xbf203 // 1100
347  	.word 0xb3e03, 0xbf203 // 1101
348  	.word 0xbf230, 0xbf203 // 1110
349  	.word 0xbf203, 0xbf203 // 1111
350  	// background, foreground = 11, 00
351  	.word 0xbf203, 0xbf203 // 0000
352  	.word 0xbf103, 0xbf203 // 0001
353  	.word 0x7f203, 0xbf203 // 0010
354  	.word 0x7f103, 0xbf203 // 0011
355  	.word 0xbf203, 0xbf103 // 0100
356  	.word 0xbf103, 0xbf103 // 0101
357  	.word 0x7f203, 0xbf103 // 0110
358  	.word 0x7f103, 0xbf103 // 0111
359  	.word 0xbf203, 0x7f203 // 1000
360  	.word 0xbf103, 0x7f203 // 1001
361  	.word 0x7f203, 0x7f203 // 1010
362  	.word 0x7f103, 0x7f203 // 1011
363  	.word 0xbf203, 0x7f103 // 1100
364  	.word 0xbf103, 0x7f103 // 1101
365  	.word 0x7f203, 0x7f103 // 1110
366  	.word 0x7f103, 0x7f103 // 1111
367  	// background, foreground = 11, 01
368  	.word 0xbf203, 0xbf203 // 0000
369  	.word 0xbf130, 0xbf203 // 0001
370  	.word 0x73e03, 0xbf203 // 0010
371  	.word 0x73d30, 0xbf203 // 0011
372  	.word 0xbf203, 0xbf130 // 0100
373  	.word 0xbf130, 0xbf130 // 0101
374  	.word 0x73e03, 0xbf130 // 0110
375  	.word 0x73d30, 0xbf130 // 0111
376  	.word 0xbf203, 0x73e03 // 1000
377  	.word 0xbf130, 0x73e03 // 1001
378  	.word 0x73e03, 0x73e03 // 1010
379  	.word 0x73d30, 0x73e03 // 1011
380  	.word 0xbf203, 0x73d30 // 1100
381  	.word 0xbf130, 0x73d30 // 1101
382  	.word 0x73e03, 0x73d30 // 1110
383  	.word 0x73d30, 0x73d30 // 1111
384  	// background, foreground = 11, 10
385  	.word 0xbf203, 0xbf203 // 0000
386  	.word 0xbf230, 0xbf203 // 0001
387  	.word 0xb3e03, 0xbf203 // 0010
388  	.word 0xb3e30, 0xbf203 // 0011
389  	.word 0xbf203, 0xbf230 // 0100
390  	.word 0xbf230, 0xbf230 // 0101
391  	.word 0xb3e03, 0xbf230 // 0110
392  	.word 0xb3e30, 0xbf230 // 0111
393  	.word 0xbf203, 0xb3e03 // 1000
394  	.word 0xbf230, 0xb3e03 // 1001
395  	.word 0xb3e03, 0xb3e03 // 1010
396  	.word 0xb3e30, 0xb3e03 // 1011
397  	.word 0xbf203, 0xb3e30 // 1100
398  	.word 0xbf230, 0xb3e30 // 1101
399  	.word 0xb3e03, 0xb3e30 // 1110
400  	.word 0xb3e30, 0xb3e30 // 1111
401  	// background, foreground = 11, 11
402  	.word 0xbf203, 0xbf203 // 0000
403  	.word 0xbf203, 0xbf203 // 0001
404  	.word 0xbf203, 0xbf203 // 0010
405  	.word 0xbf203, 0xbf203 // 0011
406  	.word 0xbf203, 0xbf203 // 0100
407  	.word 0xbf203, 0xbf203 // 0101
408  	.word 0xbf203, 0xbf203 // 0110
409  	.word 0xbf203, 0xbf203 // 0111
410  	.word 0xbf203, 0xbf203 // 1000
411  	.word 0xbf203, 0xbf203 // 1001
412  	.word 0xbf203, 0xbf203 // 1010
413  	.word 0xbf203, 0xbf203 // 1011
414  	.word 0xbf203, 0xbf203 // 1100
415  	.word 0xbf203, 0xbf203 // 1101
416  	.word 0xbf203, 0xbf203 // 1110
417  	.word 0xbf203, 0xbf203 // 1111