tmds_encode_font_2bpp.S
1 #include "hardware/regs/addressmap.h" 2 #include "hardware/regs/sio.h" 3 4 .syntax unified 5 .cpu cortex-m0plus 6 .thumb 7 8 // Using the following: 9 // 10 // - A font stored as a 1bpp bitmap, with row 0 of each character stored in 11 // one contiguous array, then row 1, etc, where each character is 8 bits 12 // wide 13 // 14 // - A character buffer 15 // 16 // - A colour buffer for each of R, G, B (so 3 planes total), each buffer 17 // storing a 2-bit foreground and background colour for each character 18 // 19 // Generate encoded TMDS buffers, fast enough to fit all the encode on one 20 // core, and with small memory footprint (so no framebuffer of any depth!) The 21 // main trick here is a LUT with an 8 bit index, composed of 4x1bpp pixels 22 // (the 4 LSBs of the index) and a 2x2-bit palette (the four MSBs of the 23 // index). Each LUT entry is 4 TMDS symbols, so 2 32-bit words, giving a total 24 // table size of 2 kB. 25 26 // Offsets suitable for ldr/str (must be <= 0x7c): 27 #define ACCUM0_OFFS (SIO_INTERP0_ACCUM0_OFFSET - SIO_INTERP0_ACCUM0_OFFSET) 28 #define ACCUM1_OFFS (SIO_INTERP0_ACCUM1_OFFSET - SIO_INTERP0_ACCUM0_OFFSET) 29 #define ACCUM1_ADD_OFFS (SIO_INTERP0_ACCUM1_ADD_OFFSET - SIO_INTERP0_ACCUM0_OFFSET) 30 #define PEEK0_OFFS (SIO_INTERP0_PEEK_LANE0_OFFSET - SIO_INTERP0_ACCUM0_OFFSET) 31 #define PEEK1_OFFS (SIO_INTERP0_PEEK_LANE1_OFFSET - SIO_INTERP0_ACCUM0_OFFSET) 32 #define PEEK2_OFFS (SIO_INTERP0_PEEK_FULL_OFFSET - SIO_INTERP0_ACCUM0_OFFSET) 33 #define INTERP1 (SIO_INTERP1_ACCUM0_OFFSET - SIO_INTERP0_ACCUM0_OFFSET) 34 35 // There is no vertical repeat, so the the budget (ignoring DMA IRQs) is 8000 36 // cycles per 640 pixels, and there are three symbols to be generated per 37 // pixel, so 4.17 cyc/pix. 38 39 40 // Once in the loop: 41 // r0 contains character buffer pointer (only updated once per 8 chars) 42 // r1 contains 8 2-colour 2bpp palettes, enough for 8 characters 43 // r2 contains output buffer pointer 44 // r3 contains a mask for the colour lookup bits 45 // r4-r7 are for scratch + pixels 46 // r8 contains a pointer to the font bitmap for this scanline. 47 // r9 contains the TMDS LUT base. 48 .macro do_char charbuf_offs colour_shift_instr colour_shamt 49 // Get 8x font bits for next character, put 4 LSBs in bits 6:3 of r4 (so 50 // scaled to 8-byte LUT entries), and 4 MSBs in bits 6:3 of r6. 51 ldrb r4, [r0, #\charbuf_offs] // 2 52 add r4, r8 // 1 53 ldrb r4, [r4] // 2 54 lsrs r6, r4, #4 // 1 55 lsls r6, #3 // 1 56 lsls r4, #28 // 1 57 lsrs r4, #25 // 1 58 59 // Get colour bits, add to TMDS LUT base and font bits 60 \colour_shift_instr r5, r1, #\colour_shamt // 1 61 ands r5, r3 // 1 62 add r5, r9 // 1 63 add r4, r5 // 1 64 add r6, r5 // 1 65 66 // Look up and write out 8 TMDS symbols 67 ldmia r4, {r4, r5} // 3 68 ldmia r6, {r6, r7} // 3 69 stmia r2!, {r4-r7} // 5 70 .endm 71 72 73 // r0 is character buffer 74 // r1 is colour buffer 75 // r2 is output TMDS buffer 76 // r3 is pixel count 77 // First stack argument is the font bitmap for this scanline. 78 79 .section .scratch_x.tmds_encode_font_2bpp, "ax" 80 .global tmds_encode_font_2bpp 81 .type tmds_encode_font_2bpp,%function 82 .thumb_func 83 tmds_encode_font_2bpp: 84 push {r4-r7, lr} 85 mov r4, r8 86 mov r5, r9 87 mov r6, r10 88 push {r4-r6} 89 90 lsls r3, #1 91 add r3, r2 92 mov ip, r3 93 ldr r3, =(0xf0 * 8) 94 95 ldr r7, [sp, #32] // 8 words saved, so 32-byte offset to first stack argument 96 mov r8, r7 97 ldr r7, =palettised_1bpp_tables 98 mov r9, r7 99 100 mov r10, r1 101 102 b 2f 103 1: 104 mov r4, r10 105 ldmia r4!, {r1} 106 mov r10, r4 107 do_char 0 lsls 7 108 do_char 1 lsls 3 109 do_char 2 lsrs 1 110 do_char 3 lsrs 5 111 do_char 4 lsrs 9 112 do_char 5 lsrs 13 113 do_char 6 lsrs 17 114 do_char 7 lsrs 21 115 adds r0, #8 116 2: 117 cmp r2, ip 118 blo 1b 119 120 pop {r4-r6} 121 mov r8, r4 122 mov r9, r5 123 mov r10, r6 124 pop {r4-r7, pc} 125 126 127 // Table generation: 128 // levels_2bpp_even = [0x05, 0x50, 0xaf, 0xfa] 129 // levels_2bpp_odd = [0x04, 0x51, 0xae, 0xfb] 130 // 131 // def level(bg, fg, x, pix): 132 // index = fg if pix & 1 << x else bg 133 // return (levels_2bpp_odd if x & 1 else levels_2bpp_even)[index] 134 // 135 // for background in range(4): 136 // for foreground in range(4): 137 // print(f"// background, foreground = {background:02b}, {foreground:02b}") 138 // for pixrun in range(16): 139 // sym = list(enc.encode(level(background, foreground, x, pixrun), 0, 1) for x in range(4)) 140 // assert(enc.imbalance == 0) 141 // print(f".word 0x{sym[1] << 10 | sym[0]:05x}, 0x{sym[3] << 10 | sym[2]:05x} // {pixrun:04b}") 142 143 .section .scratch_x.palettised_1bpp_tables, "a" 144 .align 2 145 palettised_1bpp_tables: 146 // background, foreground = 00, 00 147 .word 0x7f103, 0x7f103 // 0000 148 .word 0x7f103, 0x7f103 // 0001 149 .word 0x7f103, 0x7f103 // 0010 150 .word 0x7f103, 0x7f103 // 0011 151 .word 0x7f103, 0x7f103 // 0100 152 .word 0x7f103, 0x7f103 // 0101 153 .word 0x7f103, 0x7f103 // 0110 154 .word 0x7f103, 0x7f103 // 0111 155 .word 0x7f103, 0x7f103 // 1000 156 .word 0x7f103, 0x7f103 // 1001 157 .word 0x7f103, 0x7f103 // 1010 158 .word 0x7f103, 0x7f103 // 1011 159 .word 0x7f103, 0x7f103 // 1100 160 .word 0x7f103, 0x7f103 // 1101 161 .word 0x7f103, 0x7f103 // 1110 162 .word 0x7f103, 0x7f103 // 1111 163 // background, foreground = 00, 01 164 .word 0x7f103, 0x7f103 // 0000 165 .word 0x7f130, 0x7f103 // 0001 166 .word 0x73d03, 0x7f103 // 0010 167 .word 0x73d30, 0x7f103 // 0011 168 .word 0x7f103, 0x7f130 // 0100 169 .word 0x7f130, 0x7f130 // 0101 170 .word 0x73d03, 0x7f130 // 0110 171 .word 0x73d30, 0x7f130 // 0111 172 .word 0x7f103, 0x73d03 // 1000 173 .word 0x7f130, 0x73d03 // 1001 174 .word 0x73d03, 0x73d03 // 1010 175 .word 0x73d30, 0x73d03 // 1011 176 .word 0x7f103, 0x73d30 // 1100 177 .word 0x7f130, 0x73d30 // 1101 178 .word 0x73d03, 0x73d30 // 1110 179 .word 0x73d30, 0x73d30 // 1111 180 // background, foreground = 00, 10 181 .word 0x7f103, 0x7f103 // 0000 182 .word 0x7f230, 0x7f103 // 0001 183 .word 0xb3d03, 0x7f103 // 0010 184 .word 0xb3e30, 0x7f103 // 0011 185 .word 0x7f103, 0x7f230 // 0100 186 .word 0x7f230, 0x7f230 // 0101 187 .word 0xb3d03, 0x7f230 // 0110 188 .word 0xb3e30, 0x7f230 // 0111 189 .word 0x7f103, 0xb3d03 // 1000 190 .word 0x7f230, 0xb3d03 // 1001 191 .word 0xb3d03, 0xb3d03 // 1010 192 .word 0xb3e30, 0xb3d03 // 1011 193 .word 0x7f103, 0xb3e30 // 1100 194 .word 0x7f230, 0xb3e30 // 1101 195 .word 0xb3d03, 0xb3e30 // 1110 196 .word 0xb3e30, 0xb3e30 // 1111 197 // background, foreground = 00, 11 198 .word 0x7f103, 0x7f103 // 0000 199 .word 0x7f203, 0x7f103 // 0001 200 .word 0xbf103, 0x7f103 // 0010 201 .word 0xbf203, 0x7f103 // 0011 202 .word 0x7f103, 0x7f203 // 0100 203 .word 0x7f203, 0x7f203 // 0101 204 .word 0xbf103, 0x7f203 // 0110 205 .word 0xbf203, 0x7f203 // 0111 206 .word 0x7f103, 0xbf103 // 1000 207 .word 0x7f203, 0xbf103 // 1001 208 .word 0xbf103, 0xbf103 // 1010 209 .word 0xbf203, 0xbf103 // 1011 210 .word 0x7f103, 0xbf203 // 1100 211 .word 0x7f203, 0xbf203 // 1101 212 .word 0xbf103, 0xbf203 // 1110 213 .word 0xbf203, 0xbf203 // 1111 214 // background, foreground = 01, 00 215 .word 0x73d30, 0x73d30 // 0000 216 .word 0x73d03, 0x73d30 // 0001 217 .word 0x7f130, 0x73d30 // 0010 218 .word 0x7f103, 0x73d30 // 0011 219 .word 0x73d30, 0x73d03 // 0100 220 .word 0x73d03, 0x73d03 // 0101 221 .word 0x7f130, 0x73d03 // 0110 222 .word 0x7f103, 0x73d03 // 0111 223 .word 0x73d30, 0x7f130 // 1000 224 .word 0x73d03, 0x7f130 // 1001 225 .word 0x7f130, 0x7f130 // 1010 226 .word 0x7f103, 0x7f130 // 1011 227 .word 0x73d30, 0x7f103 // 1100 228 .word 0x73d03, 0x7f103 // 1101 229 .word 0x7f130, 0x7f103 // 1110 230 .word 0x7f103, 0x7f103 // 1111 231 // background, foreground = 01, 01 232 .word 0x73d30, 0x73d30 // 0000 233 .word 0x73d30, 0x73d30 // 0001 234 .word 0x73d30, 0x73d30 // 0010 235 .word 0x73d30, 0x73d30 // 0011 236 .word 0x73d30, 0x73d30 // 0100 237 .word 0x73d30, 0x73d30 // 0101 238 .word 0x73d30, 0x73d30 // 0110 239 .word 0x73d30, 0x73d30 // 0111 240 .word 0x73d30, 0x73d30 // 1000 241 .word 0x73d30, 0x73d30 // 1001 242 .word 0x73d30, 0x73d30 // 1010 243 .word 0x73d30, 0x73d30 // 1011 244 .word 0x73d30, 0x73d30 // 1100 245 .word 0x73d30, 0x73d30 // 1101 246 .word 0x73d30, 0x73d30 // 1110 247 .word 0x73d30, 0x73d30 // 1111 248 // background, foreground = 01, 10 249 .word 0x73d30, 0x73d30 // 0000 250 .word 0x73e30, 0x73d30 // 0001 251 .word 0xb3d30, 0x73d30 // 0010 252 .word 0xb3e30, 0x73d30 // 0011 253 .word 0x73d30, 0x73e30 // 0100 254 .word 0x73e30, 0x73e30 // 0101 255 .word 0xb3d30, 0x73e30 // 0110 256 .word 0xb3e30, 0x73e30 // 0111 257 .word 0x73d30, 0xb3d30 // 1000 258 .word 0x73e30, 0xb3d30 // 1001 259 .word 0xb3d30, 0xb3d30 // 1010 260 .word 0xb3e30, 0xb3d30 // 1011 261 .word 0x73d30, 0xb3e30 // 1100 262 .word 0x73e30, 0xb3e30 // 1101 263 .word 0xb3d30, 0xb3e30 // 1110 264 .word 0xb3e30, 0xb3e30 // 1111 265 // background, foreground = 01, 11 266 .word 0x73d30, 0x73d30 // 0000 267 .word 0x73e03, 0x73d30 // 0001 268 .word 0xbf130, 0x73d30 // 0010 269 .word 0xbf203, 0x73d30 // 0011 270 .word 0x73d30, 0x73e03 // 0100 271 .word 0x73e03, 0x73e03 // 0101 272 .word 0xbf130, 0x73e03 // 0110 273 .word 0xbf203, 0x73e03 // 0111 274 .word 0x73d30, 0xbf130 // 1000 275 .word 0x73e03, 0xbf130 // 1001 276 .word 0xbf130, 0xbf130 // 1010 277 .word 0xbf203, 0xbf130 // 1011 278 .word 0x73d30, 0xbf203 // 1100 279 .word 0x73e03, 0xbf203 // 1101 280 .word 0xbf130, 0xbf203 // 1110 281 .word 0xbf203, 0xbf203 // 1111 282 // background, foreground = 10, 00 283 .word 0xb3e30, 0xb3e30 // 0000 284 .word 0xb3d03, 0xb3e30 // 0001 285 .word 0x7f230, 0xb3e30 // 0010 286 .word 0x7f103, 0xb3e30 // 0011 287 .word 0xb3e30, 0xb3d03 // 0100 288 .word 0xb3d03, 0xb3d03 // 0101 289 .word 0x7f230, 0xb3d03 // 0110 290 .word 0x7f103, 0xb3d03 // 0111 291 .word 0xb3e30, 0x7f230 // 1000 292 .word 0xb3d03, 0x7f230 // 1001 293 .word 0x7f230, 0x7f230 // 1010 294 .word 0x7f103, 0x7f230 // 1011 295 .word 0xb3e30, 0x7f103 // 1100 296 .word 0xb3d03, 0x7f103 // 1101 297 .word 0x7f230, 0x7f103 // 1110 298 .word 0x7f103, 0x7f103 // 1111 299 // background, foreground = 10, 01 300 .word 0xb3e30, 0xb3e30 // 0000 301 .word 0xb3d30, 0xb3e30 // 0001 302 .word 0x73e30, 0xb3e30 // 0010 303 .word 0x73d30, 0xb3e30 // 0011 304 .word 0xb3e30, 0xb3d30 // 0100 305 .word 0xb3d30, 0xb3d30 // 0101 306 .word 0x73e30, 0xb3d30 // 0110 307 .word 0x73d30, 0xb3d30 // 0111 308 .word 0xb3e30, 0x73e30 // 1000 309 .word 0xb3d30, 0x73e30 // 1001 310 .word 0x73e30, 0x73e30 // 1010 311 .word 0x73d30, 0x73e30 // 1011 312 .word 0xb3e30, 0x73d30 // 1100 313 .word 0xb3d30, 0x73d30 // 1101 314 .word 0x73e30, 0x73d30 // 1110 315 .word 0x73d30, 0x73d30 // 1111 316 // background, foreground = 10, 10 317 .word 0xb3e30, 0xb3e30 // 0000 318 .word 0xb3e30, 0xb3e30 // 0001 319 .word 0xb3e30, 0xb3e30 // 0010 320 .word 0xb3e30, 0xb3e30 // 0011 321 .word 0xb3e30, 0xb3e30 // 0100 322 .word 0xb3e30, 0xb3e30 // 0101 323 .word 0xb3e30, 0xb3e30 // 0110 324 .word 0xb3e30, 0xb3e30 // 0111 325 .word 0xb3e30, 0xb3e30 // 1000 326 .word 0xb3e30, 0xb3e30 // 1001 327 .word 0xb3e30, 0xb3e30 // 1010 328 .word 0xb3e30, 0xb3e30 // 1011 329 .word 0xb3e30, 0xb3e30 // 1100 330 .word 0xb3e30, 0xb3e30 // 1101 331 .word 0xb3e30, 0xb3e30 // 1110 332 .word 0xb3e30, 0xb3e30 // 1111 333 // background, foreground = 10, 11 334 .word 0xb3e30, 0xb3e30 // 0000 335 .word 0xb3e03, 0xb3e30 // 0001 336 .word 0xbf230, 0xb3e30 // 0010 337 .word 0xbf203, 0xb3e30 // 0011 338 .word 0xb3e30, 0xb3e03 // 0100 339 .word 0xb3e03, 0xb3e03 // 0101 340 .word 0xbf230, 0xb3e03 // 0110 341 .word 0xbf203, 0xb3e03 // 0111 342 .word 0xb3e30, 0xbf230 // 1000 343 .word 0xb3e03, 0xbf230 // 1001 344 .word 0xbf230, 0xbf230 // 1010 345 .word 0xbf203, 0xbf230 // 1011 346 .word 0xb3e30, 0xbf203 // 1100 347 .word 0xb3e03, 0xbf203 // 1101 348 .word 0xbf230, 0xbf203 // 1110 349 .word 0xbf203, 0xbf203 // 1111 350 // background, foreground = 11, 00 351 .word 0xbf203, 0xbf203 // 0000 352 .word 0xbf103, 0xbf203 // 0001 353 .word 0x7f203, 0xbf203 // 0010 354 .word 0x7f103, 0xbf203 // 0011 355 .word 0xbf203, 0xbf103 // 0100 356 .word 0xbf103, 0xbf103 // 0101 357 .word 0x7f203, 0xbf103 // 0110 358 .word 0x7f103, 0xbf103 // 0111 359 .word 0xbf203, 0x7f203 // 1000 360 .word 0xbf103, 0x7f203 // 1001 361 .word 0x7f203, 0x7f203 // 1010 362 .word 0x7f103, 0x7f203 // 1011 363 .word 0xbf203, 0x7f103 // 1100 364 .word 0xbf103, 0x7f103 // 1101 365 .word 0x7f203, 0x7f103 // 1110 366 .word 0x7f103, 0x7f103 // 1111 367 // background, foreground = 11, 01 368 .word 0xbf203, 0xbf203 // 0000 369 .word 0xbf130, 0xbf203 // 0001 370 .word 0x73e03, 0xbf203 // 0010 371 .word 0x73d30, 0xbf203 // 0011 372 .word 0xbf203, 0xbf130 // 0100 373 .word 0xbf130, 0xbf130 // 0101 374 .word 0x73e03, 0xbf130 // 0110 375 .word 0x73d30, 0xbf130 // 0111 376 .word 0xbf203, 0x73e03 // 1000 377 .word 0xbf130, 0x73e03 // 1001 378 .word 0x73e03, 0x73e03 // 1010 379 .word 0x73d30, 0x73e03 // 1011 380 .word 0xbf203, 0x73d30 // 1100 381 .word 0xbf130, 0x73d30 // 1101 382 .word 0x73e03, 0x73d30 // 1110 383 .word 0x73d30, 0x73d30 // 1111 384 // background, foreground = 11, 10 385 .word 0xbf203, 0xbf203 // 0000 386 .word 0xbf230, 0xbf203 // 0001 387 .word 0xb3e03, 0xbf203 // 0010 388 .word 0xb3e30, 0xbf203 // 0011 389 .word 0xbf203, 0xbf230 // 0100 390 .word 0xbf230, 0xbf230 // 0101 391 .word 0xb3e03, 0xbf230 // 0110 392 .word 0xb3e30, 0xbf230 // 0111 393 .word 0xbf203, 0xb3e03 // 1000 394 .word 0xbf230, 0xb3e03 // 1001 395 .word 0xb3e03, 0xb3e03 // 1010 396 .word 0xb3e30, 0xb3e03 // 1011 397 .word 0xbf203, 0xb3e30 // 1100 398 .word 0xbf230, 0xb3e30 // 1101 399 .word 0xb3e03, 0xb3e30 // 1110 400 .word 0xb3e30, 0xb3e30 // 1111 401 // background, foreground = 11, 11 402 .word 0xbf203, 0xbf203 // 0000 403 .word 0xbf203, 0xbf203 // 0001 404 .word 0xbf203, 0xbf203 // 0010 405 .word 0xbf203, 0xbf203 // 0011 406 .word 0xbf203, 0xbf203 // 0100 407 .word 0xbf203, 0xbf203 // 0101 408 .word 0xbf203, 0xbf203 // 0110 409 .word 0xbf203, 0xbf203 // 0111 410 .word 0xbf203, 0xbf203 // 1000 411 .word 0xbf203, 0xbf203 // 1001 412 .word 0xbf203, 0xbf203 // 1010 413 .word 0xbf203, 0xbf203 // 1011 414 .word 0xbf203, 0xbf203 // 1100 415 .word 0xbf203, 0xbf203 // 1101 416 .word 0xbf203, 0xbf203 // 1110 417 .word 0xbf203, 0xbf203 // 1111