/ driver-SPI-dragonmint-t1.c
driver-SPI-dragonmint-t1.c
1 /* 2 * cgminer SPI driver for Dragonmint T1 devices 3 * 4 * Copyright 2013, 2014 Zefir Kurtisi <zefir.kurtisi@gmail.com> 5 * Copyright 2018 Con Kolivas <kernel@kolivas.org> 6 * 7 * This program is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License as published by the Free 9 * Software Foundation; either version 3 of the License, or (at your option) 10 * any later version. See COPYING for more details. 11 */ 12 13 #include <stdlib.h> 14 #include <assert.h> 15 #include <fcntl.h> 16 #include <limits.h> 17 #include <unistd.h> 18 #include <stdbool.h> 19 #include <pthread.h> 20 21 #include "logging.h" 22 #include "miner.h" 23 #include "util.h" 24 25 #include "dragonmint_t1.h" 26 #include "dm_temp_ctrl.h" 27 #include "dm_fan_ctrl.h" 28 29 #include "sys/time.h" 30 31 #define T1_FANSPEED_INIT (opt_T1_target) 32 #define T1_TEMP_TARGET_INIT (60) 33 #define T1_TEMP_TARGET_RUN (75) 34 35 struct T1_chain *chain[MAX_CHAIN_NUM]; 36 uint8_t chain_mask; 37 38 uint16_t T1Pll[MCOMPAT_CONFIG_MAX_CHAIN_NUM]; 39 40 /* FAN CTRL */ 41 //dragonmint_fan_temp_s g_fan_ctrl; 42 static volatile uint8_t g_debug_stats[MAX_CHAIN_NUM]; 43 44 static int total_chains; 45 static int chains_tuned; 46 47 static dragonmint_reg_ctrl_t s_reg_ctrl; 48 49 hardware_version_e g_hwver; 50 //dragonmint_type_e g_type; 51 int g_reset_delay = 0xffff; 52 char volShowLog[MAX_CHAIN_NUM][256]; 53 /* one global board_selector and spi context is enough */ 54 //static struct board_selector *board_selector; 55 56 static int spi_status_err_cnt = 0; 57 58 static pthread_t fan_tid; 59 60 /* 61 * for now, we have one global config, defaulting values: 62 * - ref_clk 16MHz / sys_clk 800MHz 63 * - 2000 kHz SPI clock 64 */ 65 struct T1_config_options T1_config_options = { 66 .ref_clk_khz = 16000, .sys_clk_khz = 800000, .spi_clk_khz = 2000, 67 }; 68 69 /* override values with --bitmine-t1-options ref:sys:spi: - use 0 for default */ 70 static struct T1_config_options *parsed_config_options; 71 72 int chain_plug[MAX_CHAIN_NUM]; 73 int chain_flag[MAX_CHAIN_NUM]; 74 75 #define LOG_VOL_PREFIX "/tmp/log/volAnalys" 76 void dragonmint_log_record(int cid, void* log, int len) 77 { 78 FILE* fd; 79 char fileName[128] = {0}; 80 81 sprintf(fileName, "%s%d.log", LOG_VOL_PREFIX, cid); 82 fd = fopen(fileName, "w+"); 83 if (fd == NULL){ 84 //applog(LOG_ERR, "Open log File%d Failed!%d", cid, errno); 85 applog(LOG_ERR, "Open log File%d Failed!%s", cid, strerror(errno)); 86 return; 87 } 88 89 fwrite(log, len, 1, fd); 90 fflush(fd); 91 fclose(fd); 92 } 93 94 static void wq_enqueue(struct thr_info *thr, struct T1_chain *t1) 95 { 96 struct work *work = get_work(thr, thr->id); 97 struct work_queue *wq; 98 struct work_ent *we; 99 int rolls = 0; 100 101 wq = &t1->active_wq; 102 103 while (42) { 104 we = cgmalloc(sizeof(*we)); 105 106 we->work = work; 107 INIT_LIST_HEAD(&we->head); 108 109 mutex_lock(&t1->lock); 110 list_add_tail(&we->head, &wq->head); 111 wq->num_elems++; 112 mutex_unlock(&t1->lock); 113 114 if (wq->num_elems >= t1->num_active_chips * 2) { 115 break; 116 } 117 if (rolls > work->drv_rolllimit) { 118 work = get_work(thr, thr->id); 119 continue; 120 } 121 work = make_clone(work); 122 roll_work(work); 123 } 124 } 125 126 static struct work *wq_dequeue(struct T1_chain *t1, bool sig) 127 { 128 struct work_ent *we; 129 struct work *work = NULL; 130 struct work_queue *wq = &t1->active_wq; 131 132 if (wq == NULL) 133 return NULL; 134 135 /* Sleep only a small duration if there is no work queued in case it's 136 * still refilling rather than we have no upstream work. */ 137 if (unlikely(!wq->num_elems && sig)) 138 cgsleep_ms(10); 139 140 mutex_lock(&t1->lock); 141 if (likely(wq->num_elems > 0)) { 142 we = list_entry(wq->head.next, struct work_ent, head); 143 work = we->work; 144 145 list_del(&we->head); 146 free(we); 147 wq->num_elems--; 148 } 149 if (sig) 150 pthread_cond_signal(&t1->cond); 151 mutex_unlock(&t1->lock); 152 153 return work; 154 } 155 156 /********** driver interface */ 157 void exit_T1_chain(struct T1_chain *t1) 158 { 159 if (t1 == NULL) 160 return; 161 free(t1->chips); 162 t1->chips = NULL; 163 chain[t1->chain_id] = NULL; 164 chain_flag[t1->chain_id] = 0; 165 166 mcompat_set_led(t1->chain_id, LED_OFF); 167 mcompat_set_power_en(t1->chain_id, 0); 168 169 free(t1); 170 } 171 172 static void get_temperatures(struct T1_chain *t1) 173 { 174 int i; 175 int temp[MAX_CHIP_NUM] = {0}; 176 177 mcompat_get_chip_temp(t1->chain_id, temp); 178 179 for (i = 0; i < t1->num_active_chips; i++) 180 t1->chips[i].temp = temp[i]; 181 } 182 183 static void get_voltages(struct T1_chain *t1) 184 { 185 int i; 186 187 //configure for vsensor 188 mcompat_configure_tvsensor(t1->chain_id, CMD_ADDR_BROADCAST, 0); 189 for (i = 0; i < t1->num_active_chips; i++) 190 dragonmint_check_voltage(t1, i + 1, &s_reg_ctrl); 191 192 //configure for tsensor 193 mcompat_configure_tvsensor(t1->chain_id, CMD_ADDR_BROADCAST, 1); 194 195 dragonmint_get_voltage_stats(t1, &s_reg_ctrl); 196 } 197 198 static bool prechain_detect(struct T1_chain *t1, int idxpll) 199 { 200 int pll_lv_to_setspi; 201 int pll_lv_to_setvid; 202 int chain_id = t1->chain_id; 203 204 assert(pll_lv_to_setvid < idxpll); 205 206 cgsleep_us(1000); 207 208 t1->pll = 0; 209 t1->base_pll = idxpll; 210 211 if (opt_T1auto) { 212 /* Start tuning at a different voltage depending on tuning 213 * strategy. */ 214 if (opt_T1_performance) 215 opt_T1Vol[chain_id] = TUNE_VOLT_START_PER; 216 else if (opt_T1_efficient) 217 opt_T1Vol[chain_id] = TUNE_VOLT_START_EFF; 218 else 219 opt_T1Vol[chain_id] = TUNE_VOLT_START_BAL; 220 } 221 222 pll_lv_to_setspi = T1_ConfigT1PLLClock(T1_PLL_SETSPI); 223 if (!t1_set_pll(t1, CMD_ADDR_BROADCAST, pll_lv_to_setspi)) 224 return false; 225 226 /* Using 390K spi speed at first and raising to 1.5M at 310M PLL 227 * to avoid spi failure on 150M PLL */ 228 applog(LOG_NOTICE, "chain%d: spi speed set to 1.5M", chain_id); 229 mcompat_set_spi_speed(chain_id, SPI_SPEED_1562K); 230 cgsleep_ms(10); 231 232 pll_lv_to_setvid = T1_ConfigT1PLLClock(T1_PLL_SETVID); 233 if (!t1_set_pll(t1, CMD_ADDR_BROADCAST, pll_lv_to_setvid)) 234 return false; 235 236 /* Set voltage down at this point to avoid massive power draw as we 237 * increase frequency */ 238 if (!opt_T1auto && opt_T1VID[chain_id]) { 239 /* If opt_T1VID values are set in non-auto mode, we use those 240 * from the config. */ 241 mcompat_set_vid_by_step(chain_id, t1->iVid, opt_T1VID[chain_id]); 242 t1->iVid = opt_T1VID[chain_id]; 243 } else { 244 t1->iVid = mcompat_find_chain_vid(chain_id, t1->num_active_chips, 245 STARTUP_VID, opt_T1Vol[chain_id]); 246 } 247 248 if (!t1_set_pll(t1, CMD_ADDR_BROADCAST, idxpll)) 249 return false; 250 251 /* Now fine tune voltage to the target level as voltage will have 252 * changed due to changing frequency */ 253 if (opt_T1auto || !opt_T1VID[chain_id]) { 254 t1->iVid = mcompat_find_chain_vid(chain_id, t1->num_active_chips, 255 t1->iVid, opt_T1Vol[chain_id]); 256 } 257 258 /* Read chip voltages */ 259 get_voltages(t1); 260 applog(LOG_NOTICE, "chain%d: volt = %.1f, vid = %d after calibration", chain_id, 261 s_reg_ctrl.average_vol[chain_id], t1->iVid); 262 263 return true; 264 } 265 266 /* 267 * BIST_START works only once after HW reset, on subsequent calls it 268 * returns 0 as number of chips. 269 */ 270 static int chain_detect(struct T1_chain *t1) 271 { 272 int cid = t1->chain_id; 273 uint8_t n_chips = mcompat_cmd_bist_start(cid, CMD_ADDR_BROADCAST); 274 275 if (unlikely(n_chips == 0 || n_chips > MAX_CHIP_NUM)){ 276 write_miner_ageing_status(AGEING_BIST_START_FAILED); 277 return 0; 278 } 279 280 applog(LOG_WARNING, "%d: detected %d chips", cid, n_chips); 281 282 cgsleep_ms(10); 283 /* 284 if (!mcompat_cmd_bist_collect(cid, CMD_ADDR_BROADCAST)) 285 { 286 applog(LOG_WARNING, "bist collect fail"); 287 return 0; 288 } 289 */ 290 291 applog(LOG_WARNING, "collect core success"); 292 293 return n_chips; 294 } 295 296 static bool prepare_T1(struct T1_chain *t1, int chain_id) 297 { 298 uint8_t buffer[4] = {}; 299 bool ret = false; 300 301 //spi speed init 302 applog(LOG_NOTICE, "chain%d: spi speed set to 390K", chain_id); 303 mcompat_set_spi_speed(chain_id, T1_SPI_SPEED_DEF); 304 cgsleep_ms(10); 305 306 if (!dm_cmd_resetall(chain_id, CMD_ADDR_BROADCAST, buffer)) { 307 applog(LOG_ERR, "failed to reset chain %d!", chain_id); 308 goto out; 309 } 310 if (CMD_TYPE_T1 != (buffer[0] & 0xf0)) { 311 applog(LOG_ERR, "incompatible chip type %02X for chain %d!", buffer[0] & 0xf0, chain_id); 312 goto out; 313 } 314 315 t1->num_chips = chain_detect(t1); 316 cgsleep_ms(10); 317 318 if ((t1->num_chips <= 0) || (t1->num_chips > MAX_CHIP_NUM)){ 319 spi_status_err_cnt++; 320 321 if (chain_id == (MAX_CHAIN_NUM - 1)){ 322 if (spi_status_err_cnt >= MAX_CHAIN_NUM){ 323 write_miner_ageing_status(AGEING_ALL_SPI_STATUS_ERROR); 324 } 325 if ((spi_status_err_cnt >= 1) && (spi_status_err_cnt < MAX_CHAIN_NUM)){ 326 write_miner_ageing_status(AGEING_SPI_STATUS_ERROR); 327 } 328 } 329 goto out; 330 } 331 332 if (chain_id == (MAX_CHAIN_NUM - 1)){ 333 if (spi_status_err_cnt >= MAX_CHAIN_NUM){ 334 write_miner_ageing_status(AGEING_ALL_SPI_STATUS_ERROR); 335 } 336 if ((spi_status_err_cnt >= 1) && (spi_status_err_cnt < MAX_CHAIN_NUM)){ 337 write_miner_ageing_status(AGEING_SPI_STATUS_ERROR); 338 } 339 } 340 341 /* override max number of active chips if requested */ 342 t1->num_active_chips = t1->num_chips; 343 if (T1_config_options.override_chip_num > 0 && 344 t1->num_chips > T1_config_options.override_chip_num) { 345 t1->num_active_chips = T1_config_options.override_chip_num; 346 applog(LOG_WARNING, "%d: limiting chain to %d chips", 347 chain_id, t1->num_active_chips); 348 } 349 350 /* Free this in case we are re-initialising a chain */ 351 free(t1->chips); 352 t1->chips = cgcalloc(t1->num_active_chips, sizeof(struct T1_chip)); 353 ret = true; 354 out: 355 return ret; 356 } 357 358 static struct T1_chain *pre_init_T1_chain(int chain_id) 359 { 360 struct T1_chain *t1 = cgcalloc(sizeof(*t1), 1); 361 362 applog(LOG_INFO, "pre %d: T1 init chain", chain_id); 363 364 t1->chain_id = chain_id; 365 if (!prepare_T1(t1, chain_id)) { 366 exit_T1_chain(t1); 367 t1 = NULL; 368 } 369 return t1; 370 } 371 372 static bool init_T1_chain(struct T1_chain *t1) 373 { 374 int i; 375 uint8_t src_reg[REG_LENGTH] = {0}; 376 uint8_t reg[REG_LENGTH] = {0}; 377 int chain_id = t1->chain_id; 378 int num_chips; 379 bool ret = false; 380 381 applog(LOG_INFO, "%d: T1 init chain", chain_id); 382 383 applog(LOG_NOTICE, "chain%d: spi speed set to 6.25M", chain_id); 384 mcompat_set_spi_speed(chain_id, SPI_SPEED_6250K); 385 cgsleep_ms(1); 386 387 #ifdef USE_BISTMASK 388 dm_cmd_resetbist(chain_id, CMD_ADDR_BROADCAST, reg); 389 //cgsleep_ms(120); 390 sleep(1); 391 392 //bist mask 393 mcompat_cmd_read_register(chain_id, 0x01, reg, REG_LENGTH); 394 memcpy(src_reg, reg, REG_LENGTH); 395 src_reg[7] = src_reg[7] | 0x10; 396 mcompat_cmd_write_register(chain_id, CMD_ADDR_BROADCAST, src_reg, REG_LENGTH); 397 cgsleep_us(200); 398 #endif 399 400 applog(LOG_DEBUG, "%d: T1 init chain", chain_id); 401 402 num_chips = chain_detect(t1); 403 cgsleep_ms(10); 404 405 if (num_chips != 0 && num_chips != t1->num_chips) { 406 applog(LOG_WARNING, "T1 %d: Num chips failure", chain_id); 407 goto out; 408 } 409 410 if (!mcompat_cmd_bist_fix(chain_id, CMD_ADDR_BROADCAST)) { 411 write_miner_ageing_status(AGEING_BIST_FIX_FAILED); 412 goto out; 413 } 414 415 cgsleep_us(200); 416 417 #if 0 418 sprintf(volShowLog[chain_id], "+ %2d | %8f | %8f | %8f |",chain_id, \ 419 s_reg_ctrl.highest_vol[chain_id],s_reg_ctrl.average_vol[chain_id],s_reg_ctrl.lowest_vol[chain_id]); 420 dragonmint_log_record(chain_id, volShowLog[chain_id], strlen(volShowLog[0])); 421 #endif 422 applog(LOG_WARNING, 423 "Chain %d Voltage information. Highest Vol:%.0f, Average Vol:%.0f, Lowest Vol:%.0f", 424 chain_id, s_reg_ctrl.highest_vol[chain_id], s_reg_ctrl.average_vol[chain_id], 425 s_reg_ctrl.lowest_vol[chain_id]); 426 427 /* Reset value in case we are re-initialising */ 428 t1->num_cores = 0; 429 for (i = 0; i < t1->num_active_chips; i++) 430 check_chip(t1, i); 431 432 applog(LOG_WARNING, "%d: found %d chips with total %d active cores", 433 chain_id, t1->num_active_chips, t1->num_cores); 434 435 if (!opt_T1auto) 436 t1->VidOptimal = t1->pllOptimal = true; 437 438 ret = true; 439 out: 440 return ret; 441 } 442 443 /* Asynchronous work generation since get_work is a blocking function */ 444 static void *T1_work_thread(void *arg) 445 { 446 struct cgpu_info *cgpu = arg; 447 struct T1_chain *t1 = cgpu->device_data; 448 char tname[16]; 449 450 sprintf(tname, "T1_%dwork", t1->chain_id); 451 RenameThread(tname); 452 453 mutex_lock(&t1->lock); 454 455 while (!pthread_cond_wait(&t1->cond, &t1->lock)) { 456 mutex_unlock(&t1->lock); 457 458 /* Only start filling the queue once we're 1/3 empty */ 459 if (t1->active_wq.num_elems < t1->num_active_chips * 4 / 3) 460 wq_enqueue(cgpu->thr[0], t1); 461 462 mutex_lock(&t1->lock); 463 } 464 465 return NULL; 466 } 467 468 static void start_T1_chain(int cid, int retries) 469 { 470 mcompat_set_reset(cid, 1); 471 sleep(retries); 472 mcompat_set_power_en(cid, 1); 473 sleep(retries); 474 mcompat_set_reset(cid, 0); 475 sleep(retries); 476 mcompat_set_start_en(cid, 1); 477 sleep(retries); 478 mcompat_set_reset(cid, 1); 479 sleep(retries); 480 } 481 482 static bool detect_T1_chain(void) 483 { 484 int i, retries, chain_num = 0, chip_num = 0, iPll; 485 c_temp_cfg tmp_cfg; 486 487 applog(LOG_NOTICE, "T1: checking T1 chain"); 488 489 for(i = 0; i < MAX_CHAIN_NUM; i++) { 490 if (chain_plug[i] != 1) 491 continue; 492 chain_num++; 493 } 494 495 496 /* Go back and try chains that have failed after cycling through all of 497 * them. */ 498 for (retries = 0; retries < 3; retries++) { 499 for (i = 0; i < MAX_CHAIN_NUM; i++) { 500 if (chain_plug[i] != 1) 501 continue; 502 if (chain[i]) 503 continue; 504 start_T1_chain(i, retries); 505 506 /* pre-init chain */ 507 if ((chain[i] = pre_init_T1_chain(i))) { 508 chain_flag[i] = 1; 509 if (chain[i]->num_chips > chip_num) 510 chip_num = chain[i]->num_chips; 511 } 512 } 513 } 514 515 // reinit platform with real chain number and chip number 516 applog(LOG_NOTICE, "platform re-init: chain_num(%d), chip_num(%d)", chain_num, chip_num); 517 sys_platform_exit(); 518 sys_platform_init(PLATFORM_ZYNQ_HUB_G19, MCOMPAT_LIB_MINER_TYPE_T1, chain_num, chip_num); 519 520 for (i = 0; i < MAX_CHAIN_NUM; i++) { 521 if (chain_plug[i] != 1) 522 continue; 523 if (chain[i] == NULL){ 524 applog(LOG_ERR, "init %d T1 chain fail", i); 525 continue; 526 } 527 528 // re-config spi speed after platform init 529 mcompat_set_spi_speed(i, T1_SPI_SPEED_DEF); 530 cgsleep_ms(10); 531 532 mcompat_cfg_tsadc_divider(i, PLL_Clk_12Mhz[0].speedMHz); 533 } 534 535 // init temp ctrl 536 dm_tempctrl_get_defcfg(&tmp_cfg); 537 /* Set initial target temperature lower for more reliable startup */ 538 tmp_cfg.tmp_target = T1_TEMP_TARGET_INIT; // target temperature 539 dm_tempctrl_init(&tmp_cfg); 540 541 // start fan ctrl thread 542 c_fan_cfg fan_cfg; 543 dm_fanctrl_get_defcfg(&fan_cfg); 544 fan_cfg.preheat = false; // disable preheat 545 fan_cfg.fan_speed = T1_FANSPEED_INIT; 546 dm_fanctrl_init(&fan_cfg); 547 // dm_fanctrl_init(NULL); // using default cfg 548 pthread_create(&fan_tid, NULL, dm_fanctrl_thread, NULL); 549 550 for(i = 0; i < MAX_CHAIN_NUM; i++) { 551 if (chain_flag[i] != 1) 552 continue; 553 if (!prechain_detect(chain[i], T1Pll[i])) { 554 chain_flag[i] = 0; 555 exit_T1_chain(chain[i]); 556 } 557 } 558 559 for(i = 0; i < MAX_CHAIN_NUM; i++) { 560 if (chain_flag[i] != 1) 561 continue; 562 563 if (!init_T1_chain(chain[i])) { 564 exit_T1_chain(chain[i]); 565 applog(LOG_ERR, "init %d T1 chain fail", i); 566 chain_flag[i] = 0; 567 continue; 568 } 569 } 570 571 for(i = 0; i < MAX_CHAIN_NUM; i++) { 572 struct cgpu_info *cgpu; 573 struct T1_chain *t1; 574 pthread_t pth; 575 576 if (chain_flag[i] != 1) 577 continue; 578 579 total_chains++; 580 cgpu = cgcalloc(sizeof(*cgpu), 1); 581 cgpu->drv = &dragonmintT1_drv; 582 cgpu->name = "DragonmintT1.SingleChain"; 583 cgpu->threads = 1; 584 cgpu->chainNum = i; 585 cgpu->device_data = t1 = chain[i]; 586 cgtime(&cgpu->dev_start_tv); 587 t1->lastshare = cgpu->dev_start_tv.tv_sec; 588 589 iPll = T1Pll[i]; 590 591 if ((chain[i]->num_chips <= MAX_CHIP_NUM) && (chain[i]->num_cores <= MAX_CORES)) { 592 cgpu->mhs_av = (double)PLL_Clk_12Mhz[iPll].speedMHz * 2ull * (chain[i]->num_cores); 593 } else { 594 cgpu->mhs_av = 0; 595 chain_flag[i] = 0; 596 } 597 598 chain[i]->cgpu = cgpu; 599 cgpu->device_id = i; 600 add_cgpu(cgpu); 601 602 mcompat_set_led(i, LED_ON); 603 applog(LOG_WARNING, "Detected the %d T1 chain with %d chips / %d cores", 604 i, chain[i]->num_active_chips, chain[i]->num_cores); 605 606 INIT_LIST_HEAD(&t1->active_wq.head); 607 608 mutex_init(&t1->lock); 609 pthread_cond_init(&t1->cond, NULL); 610 pthread_create(&pth, NULL, T1_work_thread, cgpu); 611 } 612 613 if (!total_chains) 614 return false; 615 616 /* Now adjust target temperature for runtime setting */ 617 tmp_cfg.tmp_target = T1_TEMP_TARGET_RUN; 618 dm_tempctrl_set(&tmp_cfg); 619 620 return true; 621 } 622 623 /* Probe SPI channel and register chip chain */ 624 void T1_detect(bool hotplug) 625 { 626 int i; 627 628 if (hotplug) 629 return; 630 631 /* parse bimine-t1-options */ 632 if (opt_dragonmint_t1_options != NULL && parsed_config_options == NULL) { 633 int ref_clk = 0; 634 int sys_clk = 0; 635 int spi_clk = 0; 636 int override_chip_num = 0; 637 int wiper = 0; 638 639 sscanf(opt_dragonmint_t1_options, "%d:%d:%d:%d:%d", 640 &ref_clk, &sys_clk, &spi_clk, &override_chip_num, 641 &wiper); 642 if (ref_clk != 0) 643 T1_config_options.ref_clk_khz = ref_clk; 644 if (sys_clk != 0) { 645 if (sys_clk < 100000) 646 quit(1, "system clock must be above 100MHz"); 647 T1_config_options.sys_clk_khz = sys_clk; 648 } 649 if (spi_clk != 0) 650 T1_config_options.spi_clk_khz = spi_clk; 651 if (override_chip_num != 0) 652 T1_config_options.override_chip_num = override_chip_num; 653 if (wiper != 0) 654 T1_config_options.wiper = wiper; 655 656 /* config options are global, scan them once */ 657 parsed_config_options = &T1_config_options; 658 } 659 660 applog(LOG_DEBUG, "T1 detect"); 661 memset(&s_reg_ctrl,0,sizeof(s_reg_ctrl)); 662 663 g_hwver = dragonmint_get_hwver(); 664 // g_type = dragonmint_get_miner_type(); 665 666 // FIXME: get correct hwver and chain num to init platform 667 sys_platform_init(PLATFORM_ZYNQ_HUB_G19, MCOMPAT_LIB_MINER_TYPE_T1, MAX_CHAIN_NUM, MAX_CHIP_NUM); 668 669 applog(LOG_NOTICE, "vid type detected: %d", misc_get_vid_type()); 670 671 // set fan speed high to get to a lower startup temperature 672 dm_fanctrl_set_fan_speed(T1_FANSPEED_INIT); 673 674 //dragonmint_miner_init_voltage_flag(); 675 676 for (i = MAX_CHAIN_NUM - 1; i >= 0; i--) { 677 if (mcompat_get_plug(i) == 0) { 678 chain_plug[i] = 1; 679 applog(LOG_INFO, "chain:%d the plat is inserted", i); 680 } else { 681 applog(LOG_INFO, "chain:%d the plat is not inserted", i); 682 write_miner_ageing_status(AGEING_PLUG_STATUS_ERROR); 683 } 684 } 685 686 /* If hardware version is g19, continue init cgminer. Else power off*/ 687 if (HARDWARE_VERSION_G19 == g_hwver) { 688 applog(LOG_INFO, "The hardware version is G19"); 689 for (i = 0; i < MAX_CHAIN_NUM; i++) { 690 if (chain_plug[i] != 1) 691 continue; 692 693 /* Sets initial voltage to very high to get chips 694 * initialised. */ 695 mcompat_set_vid(i, STARTUP_VID); 696 } 697 } else if (HARDWARE_VERSION_G9 == g_hwver) { 698 applog(LOG_INFO, "The hardware version is G9"); 699 mcompat_set_vid(0, STARTUP_VID); 700 } else { 701 for(i = 0; i < MAX_CHAIN_NUM; i++) { 702 applog(LOG_ERR, "Unknown hwver, chain%d power down", i); 703 mcompat_chain_power_down(i); 704 } 705 write_miner_ageing_status(AGEING_HW_VERSION_ERROR); 706 return; 707 } 708 709 for(i = 0; i < MAX_CHAIN_NUM; ++i) { 710 int pll = DEFAULT_PLL; 711 712 /* Tune voltage to highest frequency in Performance mode, and 713 * lowest frequency in efficient mode. */ 714 if (opt_T1auto) { 715 if (opt_T1_performance) 716 pll = MAX_PLL; 717 else if (opt_T1_efficient) 718 pll = MIN_PLL; 719 } else 720 pll = opt_T1Pll[i]; 721 T1Pll[i] = T1_ConfigT1PLLClock(pll); 722 } 723 724 if (detect_T1_chain()) { 725 if (misc_get_vid_type() == MCOMPAT_LIB_VID_I2C_TYPE) 726 set_timeout_on_i2c(30); 727 applog(LOG_WARNING, "T1 detect finish"); 728 } 729 } 730 731 /* Exit cgminer on failure, allowing systemd watchdog to restart */ 732 static void reinit_T1_chain(struct T1_chain *t1, int cid) 733 { 734 bool success = false; 735 struct timeval now; 736 int i; 737 738 applog(LOG_WARNING, "T1: %d attempting to re-initialise!", cid); 739 for (i = 0; i < 3; i++) { 740 start_T1_chain(cid, i); 741 if (prepare_T1(t1, cid)) { 742 success = true; 743 break; 744 } 745 } 746 if (!success) { 747 applog(LOG_EMERG, "T1: %d FAILED TO PREPARE, SHUTTING DOWN", cid); 748 raise_cgminer(); 749 } 750 if (!prechain_detect(t1, T1Pll[cid])) { 751 applog(LOG_EMERG, "T1: %d FAILED TO PRECHAIN DETECT, SHUTTING DOWN", cid); 752 raise_cgminer(); 753 } 754 if (!init_T1_chain(t1)) { 755 applog(LOG_EMERG, "T1: %d FAILED TO INIT, SHUTTING DOWN", cid); 756 raise_cgminer(); 757 } 758 cgtime(&now); 759 t1->lastshare = now.tv_sec; 760 } 761 762 #define VOLTAGE_UPDATE_INT 121 763 #define WRITE_CONFIG_TIME 60 764 #define CHECK_DISABLE_TIME 59 765 766 #if 0 767 char szShowLog[MAX_CHAIN_NUM][MAX_CHIP_NUM][256] = {0}; 768 #define LOG_FILE_PREFIX "/tmp/log/analys" 769 770 char cLevelError1[3] = "!"; 771 char cLevelError2[3] = "#"; 772 char cLevelError3[3] = "$"; 773 char cLevelError4[3] = "%"; 774 char cLevelError5[3] = "*"; 775 char cLevelNormal[3] = "+"; 776 777 void Dragonmint_Log_Save(struct T1_chip *chip,int nChip,int nChain) 778 { 779 char szInNormal[8] = {}; 780 781 if (chip->hw_errors > 0){ 782 strcat(szInNormal,cLevelError1); 783 } 784 if (chip->stales > 0){ 785 strcat(szInNormal,cLevelError2); 786 } 787 if (chip->num_cores < 32){ 788 strcat(szInNormal,cLevelError4); 789 } 790 if ((chip->nVol > 440) || (chip->nVol < 360)){ 791 strcat(szInNormal,cLevelError5); 792 } 793 794 if ((chip->hw_errors == 0) && (chip->stales == 0) && ((chip->nVol < 440) && (chip->nVol > 360)) && (chip->num_cores == 32)){ 795 strcat(szInNormal,cLevelNormal); 796 } 797 798 sprintf(szShowLog[nChain][nChip], "\n%-8s|%32d|%8d|%8d|%8d|%8d|%8d|%8d|%8d",szInNormal,chip->nonces_found, 799 chip->hw_errors, chip->stales,chip->temp,chip->nVol,chip->num_cores,nChip,nChain); 800 } 801 802 void dragonmint_log_print(int cid, void* log, int len) 803 { 804 FILE* fd; 805 char fileName[128] = {0}; 806 807 sprintf(fileName, "%s%d.log", LOG_FILE_PREFIX, cid); 808 fd = fopen(fileName, "w+"); 809 if (fd == NULL){ 810 applog(LOG_ERR, "Open log File%d Failed!", cid); 811 return; 812 } 813 814 fwrite(log, len, 1, fd); 815 fflush(fd); 816 fclose(fd); 817 } 818 #endif 819 820 /* Invalidate all statistics during buffer underruns and while hardware isn't 821 * running at its optimal temperature. */ 822 static void reset_tune(struct T1_chain *t1) 823 { 824 struct timeval now; 825 826 cgtime(&now); 827 copy_time(&t1->cycle_start, &now); 828 t1->cycles = 0; 829 /* Reset last share time since the hardware could genuinely not be 830 * able to generate shares during extended underruns. */ 831 t1->lastshare = now.tv_sec; 832 } 833 834 static void T1_set_optimal_vid(struct T1_chain *t1, int cid) 835 { 836 double best = 0, product[T1_VID_TUNE_RANGE] = {}; 837 int i, vid = t1->iVid; 838 839 for (i = 0; i < T1_VID_TUNE_RANGE; i++) { 840 product[i] = t1->vidproduct[i]; 841 /* In efficient mode divide by the square of the voltage */ 842 if (opt_T1_efficient && t1->vidvol[i]) 843 product[i] /= (double)(t1->vidvol[i] * t1->vidvol[i]); 844 } 845 846 for (i = 0; i < T1_VID_TUNE_RANGE; i++) { 847 if (!t1->vidproduct[i]) 848 continue; 849 applog(LOG_ERR, "vid%d: product=%.5f, hwerr=%.5f", 850 i, t1->vidproduct[i], t1->vidhwerr[i]); 851 /* Allow up to 1% drop for the sake of lower voltage */ 852 if (!best || (product[i] > best * 0.99 && t1->vidhwerr[i] < 0.2)) { 853 best = product[i]; 854 vid = i; 855 } 856 /* Reset values for clean reuse */ 857 t1->vidproduct[i] = 0; 858 } 859 860 t1->optimalVid = vid; 861 t1->VidOptimal = true; 862 863 mcompat_set_vid_by_step(cid, t1->iVid, vid); 864 t1->iVid = vid; 865 866 get_voltages(t1); 867 /* Store the optimal voltage for readjusting after PLL changes */ 868 t1->optimal_vol = s_reg_ctrl.average_vol[cid]; 869 870 /* Set opt_T1VID for saving to config file */ 871 opt_T1VID[cid] = t1->iVid; 872 873 for (i = 0; i < T1_PLL_TUNE_RANGE; i++) 874 t1->pllvid[i] = t1->iVid; 875 } 876 877 /* This returns the best absolute product */ 878 static double T1_best_vid_product(struct T1_chain *t1) 879 { 880 double best = 0; 881 int i; 882 883 for (i = 0; i < T1_VID_TUNE_RANGE; i++) { 884 if (t1->vidproduct[i] > best) 885 best = t1->vidproduct[i]; 886 } 887 return best; 888 } 889 890 static void T1_set_optimal_pll(struct T1_chain *t1, int cid) 891 { 892 double best = 0, product[T1_PLL_TUNE_RANGE] = {}; 893 int i, pll = t1->pll, best_offset = 0; 894 895 for (i = 0; i < T1_PLL_TUNE_RANGE; i++) { 896 product[i] = t1->pllproduct[i]; 897 /* In efficient mode divide by the frequency */ 898 if (opt_T1_efficient) 899 product[i] /= (double)PLL_Clk_12Mhz[i + T1_PLL_TUNE_MIN].speedMHz; 900 } 901 902 for (i = 0; i < T1_PLL_TUNE_RANGE; i++) { 903 if (!t1->pllproduct[i]) 904 continue; 905 applog(LOG_ERR, "pll%d: product=%.5f, hwerr=%.5f", 906 i + T1_PLL_TUNE_MIN, t1->pllproduct[i], t1->pllhwerr[i]); 907 if (!best || (product[i] > best && t1->pllhwerr[i] < 0.2)) { 908 best = product[i]; 909 pll = i + T1_PLL_TUNE_MIN; 910 best_offset = i; 911 } 912 t1->pllproduct[i] = 0; 913 } 914 915 t1->pllOptimal = true; 916 917 t1_set_pll(t1, CMD_ADDR_BROADCAST, pll); 918 t1->base_pll = t1->pll; 919 920 /* Set opt_T1Pll for saving to config file */ 921 opt_T1Pll[cid] = PLL_Clk_12Mhz[t1->pll].speedMHz; 922 923 /* Readjust iVid if we changed it during tuning */ 924 if (t1->iVid != t1->pllvid[best_offset]) { 925 mcompat_set_vid_by_step(cid, t1->iVid, t1->pllvid[best_offset]); 926 t1->iVid = t1->pllvid[best_offset]; 927 opt_T1VID[cid] = t1->iVid; 928 } 929 } 930 931 static double T1_best_pll_product(struct T1_chain *t1) 932 { 933 double best = 0; 934 int i; 935 936 for (i = 0; i < T1_PLL_TUNE_RANGE; i++) { 937 if (t1->pllproduct[i] > best) 938 best = t1->pllproduct[i]; 939 } 940 return best; 941 } 942 943 static void T1_save_config(void) 944 { 945 FILE *fcfg; 946 947 fcfg = fopen("/config/cgminer.conf", "w"); 948 if (unlikely(fcfg == NULL)) { 949 applog(LOG_ERR, "Failed to open /config/cgminer.conf for writing!"); 950 return; 951 } 952 953 write_config(fcfg); 954 fflush(fcfg); 955 fclose(fcfg); 956 } 957 958 static void T1_tune_complete(struct T1_chain *t1, int cid) 959 { 960 int i; 961 962 applog(LOG_WARNING, "T1 %d tuning complete, optimal VID %d PLL %d", cid, 963 t1->iVid, t1->pll); 964 t1->sampling = false; 965 /* Reset hw error count to ignore noise during tuning. */ 966 for(i = 0; i < t1->num_active_chips; ++i) 967 t1->chips[i].hw_errors = 0; 968 if (++chains_tuned < total_chains) 969 return; 970 971 applog(LOG_WARNING, "Tuning complete, saving results to config file"); 972 /* Change t1auto to false to save to config to disable tuning 973 * on next run. */ 974 opt_T1auto = false; 975 T1_save_config(); 976 /* Reset all stats after tuning */ 977 zero_stats(); 978 } 979 980 static void T1_tune(struct T1_chain *t1, int cid) 981 { 982 double product, tdiff, best, hw_rate; 983 int offset, i, hwerr, hw_diff; 984 struct timeval now; 985 986 cgtime(&now); 987 988 if (t1->pllOptimal) 989 return; 990 991 if (unlikely(!t1->cycle_start.tv_sec)) { 992 copy_time(&t1->cycle_start, &now); 993 t1->cycles = 0; 994 return; 995 } 996 997 if (t1->cycles < T1_CYCLES_CHAIN) 998 return; 999 1000 tdiff = ms_tdiff(&now, &t1->cycle_start); 1001 product = (double)t1->cycles / tdiff; 1002 1003 // hwerr stat. 1004 hwerr = 0; 1005 for(i = 0; i < t1->num_active_chips; ++i) 1006 hwerr += t1->chips[i].hw_errors; 1007 hw_diff = hwerr - t1->hw_errors; 1008 t1->hw_errors = hwerr; 1009 hw_rate = (double) hw_diff / tdiff; 1010 1011 applog(LOG_NOTICE, "Chain %d cycles %d, hw %d, vid %d, pll %d, %.1fms product %f, hwrate %f", 1012 cid, t1->cycles, hw_diff, t1->iVid, t1->pll, tdiff, product, hw_rate); 1013 reset_tune(t1); 1014 1015 if (!t1->sampling) { 1016 /* Discard the first lot of samples due to changing diff on 1017 * startup and possible init times invalidating data. */ 1018 t1->sampling = true; 1019 return; 1020 } 1021 1022 if (t1->VidOptimal) 1023 goto tune_freq; 1024 1025 best = T1_best_vid_product(t1); 1026 t1->vidproduct[t1->iVid] = product; 1027 t1->vidhwerr[t1->iVid] = hw_rate; 1028 /* Plenty of time has passed since we set this VID so reading the 1029 * voltage will be accurate here */ 1030 get_voltages(t1); 1031 t1->vidvol[t1->iVid] = s_reg_ctrl.average_vol[cid]; 1032 1033 /* Don't keep going lower voltage in Performance mode if there's been 1034 * a large drop in product as any further may be unstable */ 1035 if (t1->iVid < T1_VID_MAX && (!opt_T1_performance || product > best * 0.9)) { 1036 /* We don't need great accuracy here so no need to delay after 1037 * setting VID */ 1038 mcompat_set_vid(cid, ++t1->iVid); 1039 get_voltages(t1); 1040 if (s_reg_ctrl.average_vol[cid] > TUNE_VOLT_STOP) { 1041 applog(LOG_NOTICE, "Chain% d testing iVid %d avg voltage %.0f", 1042 cid, t1->iVid, s_reg_ctrl.average_vol[cid]); 1043 return; 1044 } 1045 } 1046 1047 /* Now find the iVid that corresponds with highest product */ 1048 T1_set_optimal_vid(t1, cid); 1049 applog(LOG_WARNING, "T1 %d optimal iVid set to %d, beginning freq tune", 1050 cid, t1->iVid); 1051 return; 1052 1053 tune_freq: 1054 best = T1_best_pll_product(t1); 1055 offset = t1->pll - T1_PLL_TUNE_MIN; 1056 t1->pllproduct[offset] = product; 1057 t1->pllhwerr[offset] = hw_rate; 1058 if (t1->pll < T1_PLL_TUNE_MAX && product > best) { 1059 /* Only keep increasing frequency if product has been 1060 * increasing. */ 1061 t1->base_pll = ++t1->pll; 1062 applog(LOG_NOTICE, "Chain %d testing pll %d", cid, t1->pll); 1063 T1_SetT1PLLClock(t1, t1->pll, 0); 1064 if (t1->iVid > T1_VID_MIN) { 1065 /* Vid was set a long time ago so it should be 1066 * accurate to read voltages. */ 1067 get_voltages(t1); 1068 if (s_reg_ctrl.average_vol[cid] < t1->optimal_vol - 2) { 1069 applog(LOG_WARNING, "Chain %d dropping VID to %d due to PLL %d lowering voltage to %.0f", 1070 cid, --t1->iVid, t1->pll, s_reg_ctrl.average_vol[cid]); 1071 mcompat_set_vid(cid, t1->iVid); 1072 } 1073 } 1074 /* Store the iVid associated with this pll */ 1075 t1->pllvid[offset] = t1->iVid; 1076 return; 1077 } 1078 1079 T1_set_optimal_pll(t1, cid); 1080 applog(LOG_WARNING, "Chain %d optimal pll set to %d Mhz", 1081 cid, PLL_Clk_12Mhz[t1->pll].speedMHz); 1082 1083 T1_tune_complete(t1, cid); 1084 } 1085 1086 #define MAX_NONCE_SLEEP (100) 1087 #define T1_THROTTLE_INTERVAL (5) 1088 #define T1_RAISE_INTERVAL (15) 1089 1090 static void t1_throttle(struct T1_chain *t1, int cid) 1091 { 1092 time_t now; 1093 1094 /* Chain will have been shut down by the time we get to zero but it's 1095 * possible with complete fan failures. */ 1096 if (t1->pll <= T1_PLL_MIN) 1097 return; 1098 1099 /* Only throttle further after 5 second intervals */ 1100 now = time(NULL); 1101 if (now - t1->throttled < T1_THROTTLE_INTERVAL) 1102 return; 1103 t1->throttled = now; 1104 1105 applog(LOG_WARNING, "T1 %d Chain throttling to %d MHz for overheat!", cid , 1106 PLL_Clk_12Mhz[--t1->pll].speedMHz); 1107 T1_SetT1PLLClock(t1, t1->pll, 0); 1108 } 1109 1110 static void t1_raise(struct T1_chain *t1, int cid) 1111 { 1112 time_t now = time(NULL); 1113 1114 /* Same as throttling, but wait 15s before increasing frequency */ 1115 if (now - t1->throttled < T1_RAISE_INTERVAL) 1116 return; 1117 t1->throttled = now; 1118 1119 applog(LOG_WARNING, "T1 %d Chain increasing frequency to %d MHz from throttle due to cooldown", 1120 cid, PLL_Clk_12Mhz[++t1->pll].speedMHz); 1121 T1_SetT1PLLClock(t1, t1->pll, 0); 1122 1123 /* If we're back to base pll then throttling has ceased */ 1124 if (t1->pll >= t1->base_pll) { 1125 t1->throttled = 0; 1126 /* Reset all the values in case we started throttling in the 1127 * middle of tuning, rendering all the values inval. */ 1128 reset_tune(t1); 1129 } 1130 } 1131 1132 #define MAX_CMD_FAILS (0) 1133 #define MAX_CMD_RESETS (50) 1134 1135 static int g_cmd_fails[MAX_CHAIN_NUM]; 1136 static int g_cmd_resets[MAX_CHAIN_NUM]; 1137 1138 static void T1_overheated_blinking(int cid) 1139 { 1140 // block thread and blink led 1141 while (42) { 1142 mcompat_set_led(cid, LED_OFF); 1143 cgsleep_ms(500); 1144 mcompat_set_led(cid, LED_ON); 1145 cgsleep_ms(500); 1146 } 1147 } 1148 1149 static int64_t T1_scanwork(struct thr_info *thr) 1150 { 1151 struct cgpu_info *cgpu = thr->cgpu; 1152 struct T1_chain *t1 = cgpu->device_data; 1153 int cid = t1->chain_id, i; 1154 struct T1_chip *chip; 1155 int64_t hashes = 0; 1156 uint32_t nonce; 1157 uint8_t chip_id; 1158 uint8_t job_id; 1159 uint16_t micro_job_id; 1160 uint8_t reg[REG_LENGTH] = {0}; 1161 int timer_sleep = 10; 1162 int slept = 0; 1163 bool nononce = false; 1164 int chain_temp_status; 1165 #ifdef USE_AUTONONCE 1166 bool autononce = true; 1167 #endif 1168 struct timeval now; 1169 1170 if (unlikely((t1->num_cores == 0) || (t1->num_cores > MAX_CORES))) { 1171 cgpu->deven = DEV_DISABLED; 1172 return -1; 1173 } 1174 1175 /* Spurious wakeups are harmless */ 1176 pthread_cond_signal(&t1->cond); 1177 1178 cgtime(&now); 1179 1180 /* Poll queued results. A full nonce range takes about 200ms to scan so 1181 * we're unlikely to need more work until then. Poll every 10ms for up 1182 * to 100ms using a reentrant function to avoid having any latency from 1183 * processing received nonces and avoid sleeping too long. */ 1184 while (true) { 1185 struct work *work; 1186 1187 if (!get_nonce(t1, (uint8_t*)&nonce, &chip_id, &job_id, (uint8_t*)µ_job_id)) { 1188 if (timer_sleep < MAX_NONCE_SLEEP && !thr->work_restart && !nononce) { 1189 /* Do not sleep more than twice if no results */ 1190 nononce = true; 1191 slept += cgsleep_ms_r(&t1->cgt, timer_sleep); 1192 timer_sleep += 10; 1193 continue; 1194 } 1195 /* Disable any further sleeps */ 1196 timer_sleep = MAX_NONCE_SLEEP; 1197 #ifdef USE_AUTONONCE 1198 if (autononce) { 1199 /* Check once more after autononce is disabled 1200 * in case we just missed the last one */ 1201 mcompat_cmd_auto_nonce(cid, 0, REG_LENGTH); // disable auto get nonce 1202 autononce = false; 1203 continue; 1204 } else 1205 #endif 1206 break; 1207 } 1208 nononce = false; 1209 nonce = bswap_32(nonce); 1210 chip = &t1->chips[chip_id - 1]; 1211 if (nonce == chip->last_nonce) { 1212 applog(LOG_INFO, "%d: chip %d: duplicate nonce.", cid, chip_id); 1213 chip->dupes++; 1214 continue; 1215 } 1216 1217 if (chip_id < 1 || chip_id > t1->num_active_chips) { 1218 applog(LOG_WARNING, "%d: wrong chip_id %d", cid, chip_id); 1219 continue; 1220 } 1221 if (job_id < 1 || job_id > 4) { 1222 applog(LOG_WARNING, "%d: chip %d: result has wrong ""job_id %d", cid, chip_id, job_id); 1223 continue; 1224 } 1225 chip->last_nonce = nonce; 1226 work = chip->work[job_id - 1]; 1227 if (work == NULL) { 1228 /* already been flushed => stale */ 1229 applog(LOG_INFO, "%d: chip %d: stale nonce 0x%08x", cid, chip_id, nonce); 1230 chip->stales++; 1231 continue; 1232 } 1233 work->micro_job_id = micro_job_id; 1234 memcpy(work->data, &(work->pool->vmask_001[micro_job_id]), 4); 1235 1236 if (!submit_nonce(thr, work, nonce)) { 1237 applog(LOG_INFO, "%d: chip %d: invalid nonce 0x%08x", cid, chip_id, nonce); 1238 applog(LOG_INFO, "micro_job_id %d", micro_job_id); 1239 chip->hw_errors++; 1240 continue; 1241 } 1242 applog(LOG_INFO, "YEAH: %d: chip %d / job_id %d: nonce 0x%08x", cid, chip_id, job_id, nonce); 1243 chip->nonces_found++; 1244 hashes += work->device_diff; 1245 t1->cycles++; 1246 t1->lastshare = now.tv_sec; 1247 } 1248 1249 if (unlikely(now.tv_sec - t1->lastshare > 300)) { 1250 applog(LOG_EMERG, "T1 chain %d not producing shares for more than 5 mins.", 1251 cid); 1252 reinit_T1_chain(t1, cid); 1253 } 1254 1255 cgsleep_prepare_r(&t1->cgt); 1256 1257 if (thr->work_restart) { 1258 if (!dm_cmd_resetjob(cid, CMD_ADDR_BROADCAST, reg)) 1259 applog(LOG_WARNING, "T1 %d clear work failed", cid); 1260 1261 /* Flush the work chips were currently hashing */ 1262 for (i = 0; i < t1->num_active_chips; i++) { 1263 int j; 1264 1265 struct T1_chip *chip = &t1->chips[i]; 1266 for (j = 0; j < 2; j++) { 1267 if (!chip->work[j]) 1268 continue; 1269 //applog(LOG_DEBUG, "%d: flushing chip %d, work %d: 0x%p", 1270 // cid, i, j + 1, work); 1271 free_work(chip->work[j]); 1272 } 1273 chip->last_queued_id = 0; 1274 } 1275 1276 /* Flush any work in the driver queue */ 1277 while (t1->active_wq.num_elems > 0) { 1278 /* We don't want to signal wq_dequeue to get more 1279 * work until we've emptied the queue */ 1280 struct work *work = wq_dequeue(t1, false); 1281 1282 free_work(work); 1283 } 1284 pthread_cond_signal(&t1->cond); 1285 /* Reset tuning parameters since dropping work on block change 1286 * can adversely affect hashrate */ 1287 reset_tune(t1); 1288 } 1289 1290 /* Clean spi buffer before read 0a reg */ 1291 hub_spi_clean_chain(cid); 1292 1293 if (thr->work_restart || mcompat_cmd_read_register(cid, MAX_CHIP_NUM >> 1, reg, REG_LENGTH)) { 1294 uint8_t qstate = reg[9] & 0x03; 1295 1296 /* Clear counter */ 1297 g_cmd_fails[cid] = 0; 1298 g_cmd_resets[cid] = 0; 1299 1300 /* qstate will always be 0x0 when work_restart is set */ 1301 if (qstate != 0x03) { 1302 if (qstate == 0x0) { 1303 for (i = t1->num_active_chips; i > 0; i--) { 1304 struct T1_chip *chip = &t1->chips[i - 1]; 1305 struct work *work = wq_dequeue(t1, true); 1306 uint8_t c = i; 1307 1308 if (unlikely(!work)) { 1309 reset_tune(t1); 1310 applog(LOG_WARNING, "T1 %d main work underrun", cid); 1311 cgsleep_ms(10); 1312 break; 1313 } 1314 if (set_work(t1, c, work, 0)) 1315 chip->nonce_ranges_done++; 1316 } 1317 } 1318 1319 //applog(LOG_NOTICE, "qstate is not 0x0,the number of work is %d. \t", t1->active_wq.num_elems); 1320 for (i = t1->num_active_chips; i > 0; i--) { 1321 struct T1_chip *chip = &t1->chips[i - 1]; 1322 struct work *work = wq_dequeue(t1, true); 1323 uint8_t c = i; 1324 1325 if (unlikely(!work)) { 1326 /* Demote this message since it's the 1327 * backup queue and not critical */ 1328 applog(LOG_INFO, "T1 %d backup work underrun", cid); 1329 break; 1330 } 1331 if (set_work(t1, c, work, 0)) 1332 chip->nonce_ranges_done++; 1333 } 1334 } 1335 } else { 1336 g_cmd_fails[cid]++; 1337 if (g_cmd_fails[cid] > MAX_CMD_FAILS) { 1338 // TODO: replaced with mcompat_spi_reset() 1339 applog(LOG_ERR, "Chain %d reset spihub", cid); 1340 hub_spi_clean_chain(cid); 1341 g_cmd_resets[cid]++; 1342 if (g_cmd_resets[cid] > MAX_CMD_RESETS) { 1343 applog(LOG_ERR, "Chain %d is not working due to multiple resets.", 1344 cid); 1345 reinit_T1_chain(t1, cid); 1346 } 1347 } 1348 } 1349 1350 /* Temperature control */ 1351 chain_temp_status = dm_tempctrl_update_chain_temp(cid); 1352 1353 cgpu->temp_min = (double)g_chain_tmp[cid].tmp_lo; 1354 cgpu->temp_max = (double)g_chain_tmp[cid].tmp_hi; 1355 cgpu->temp = (double)g_chain_tmp[cid].tmp_avg; 1356 1357 if (chain_temp_status == TEMP_SHUTDOWN) { 1358 // shut down chain 1359 applog(LOG_ERR, "DANGEROUS TEMPERATURE(%.0f): power down chain %d", 1360 cgpu->temp_max, cid); 1361 /* Only time we power down is for dangerous overheat */ 1362 mcompat_chain_power_down(cid); 1363 cgpu->status = LIFE_DEAD; 1364 cgtime(&thr->sick); 1365 1366 /* Function doesn't currently return */ 1367 T1_overheated_blinking(cid); 1368 } else if (chain_temp_status == TEMP_WARNING || 1369 (g_chain_tmp[cid].optimal && g_fan_cfg.fan_speed > opt_T1_target)) 1370 t1_throttle(t1, cid); 1371 else if (t1->throttled && chain_temp_status == TEMP_NORMAL && 1372 g_fan_cfg.fan_speed < opt_T1_target) 1373 t1_raise(t1, cid); 1374 1375 /* Tuning */ 1376 if (!thr->work_restart && !t1->throttled && opt_T1auto) 1377 T1_tune(t1, cid); 1378 1379 /* read chip temperatures and voltages */ 1380 if (g_debug_stats[cid]) { 1381 cgsleep_ms(1); 1382 get_temperatures(t1); 1383 get_voltages(t1); 1384 g_debug_stats[cid] = 0; 1385 } 1386 1387 #ifdef USE_AUTONONCE 1388 mcompat_cmd_auto_nonce(cid, 1, REG_LENGTH); // enable auto get nonce 1389 #endif 1390 1391 /* in case of no progress, prevent busy looping */ 1392 pthread_cond_signal(&t1->cond); 1393 1394 /* If we haven't slept at least 10ms we are at risk of polling too 1395 * often and being CPU bound. Either the chain is too busy or internal 1396 * clock based sleep behaviour may be off so revert to simple usleep. */ 1397 if (slept < 10) 1398 usleep((10 - slept) * 1000); 1399 1400 return hashes * 0x100000000ull; 1401 } 1402 1403 static int chains_shutdown; 1404 1405 /* Shut down the chains gracefully. We do not want to power them down as it 1406 * makes the next start unreliable, so we decrease power usage to a minimum. */ 1407 static void T1_shutdown(struct thr_info *thr) 1408 { 1409 struct cgpu_info *cgpu = thr->cgpu; 1410 struct T1_chain *t1 = cgpu->device_data; 1411 int cid = t1->chain_id; 1412 1413 mcompat_set_spi_speed(cid, T1_SPI_SPEED_DEF); 1414 1415 /* Set a very low frequency. */ 1416 t1_set_pll(t1, CMD_ADDR_BROADCAST, 0); 1417 1418 /* Set a very low voltage */ 1419 mcompat_set_vid_by_step(cid, t1->iVid, T1_VID_MAX); 1420 1421 /* Confirm we have actually reset the chains */ 1422 if (!mcompat_set_reset(cid, 0)) { 1423 applog(LOG_ERR, "Failed to reset chain %d on shutdown", cid); 1424 return; 1425 } 1426 /* Only once all chains are successfully shut down is it safe to turn 1427 * down fan speed */ 1428 if (++chains_shutdown < total_chains) 1429 return; 1430 1431 pthread_cancel(fan_tid); 1432 dm_fanctrl_set_fan_speed(FAN_SPEED_PREHEAT); 1433 } 1434 1435 static void T1_get_statline_before(char *buf, size_t len, struct cgpu_info *cgpu) 1436 { 1437 struct T1_chain *t1 = cgpu->device_data; 1438 char temp[10]; 1439 if (cgpu->temp != 0) 1440 snprintf(temp, 9, "%2.0fC", cgpu->temp); 1441 tailsprintf(buf, len, " %2d:%2d/%3d %s", 1442 t1->chain_id, t1->num_active_chips, t1->num_cores, 1443 cgpu->temp == 0 ? " " : temp); 1444 } 1445 1446 static struct api_data *T1_api_stats(struct cgpu_info *cgpu) 1447 { 1448 struct T1_chain *t1 = cgpu->device_data; 1449 int fan_speed = g_fan_cfg.fan_speed; 1450 unsigned long long int chipmap = 0; 1451 struct api_data *root = NULL; 1452 bool fake; 1453 int i; 1454 char s[32]; 1455 1456 ROOT_ADD_API(int, "Chain ID", t1->chain_id, false); 1457 ROOT_ADD_API(int, "Num chips", t1->num_chips, false); 1458 ROOT_ADD_API(int, "Num cores", t1->num_cores, false); 1459 ROOT_ADD_API(int, "Num active chips", t1->num_active_chips, false); 1460 ROOT_ADD_API(int, "Chain skew", t1->chain_skew, false); 1461 ROOT_ADD_API(double, "Temp max", cgpu->temp_max, false); 1462 ROOT_ADD_API(double, "Temp min", cgpu->temp_min, false); 1463 ROOT_ADD_API(int, "Fan duty", fan_speed, true); 1464 ROOT_ADD_API(int, "iVid", t1->iVid, false); 1465 ROOT_ADD_API(int, "PLL", t1->pll, false); 1466 ROOT_ADD_API(double, "Voltage Max", s_reg_ctrl.highest_vol[t1->chain_id], false); 1467 ROOT_ADD_API(double, "Voltage Min", s_reg_ctrl.lowest_vol[t1->chain_id], false); 1468 ROOT_ADD_API(double, "Voltage Avg", s_reg_ctrl.average_vol[t1->chain_id], false); 1469 ROOT_ADD_API(bool, "VidOptimal", t1->VidOptimal, false); 1470 ROOT_ADD_API(bool, "pllOptimal", t1->pllOptimal, false); 1471 ROOT_ADD_API(int, "Chain num", cgpu->chainNum, false); 1472 ROOT_ADD_API(double, "MHS av", cgpu->mhs_av, false); 1473 ROOT_ADD_API(bool, "Disabled", t1->disabled, false); 1474 fake = !!t1->throttled; 1475 ROOT_ADD_API(bool, "Throttled", fake, true); 1476 for (i = 0; i < t1->num_chips; i++) { 1477 if (!t1->chips[i].disabled) 1478 chipmap |= 1 << i; 1479 } 1480 sprintf(s, "%Lx", chipmap); 1481 ROOT_ADD_API(string, "Enabled chips", s[0], true); 1482 ROOT_ADD_API(double, "Temp", cgpu->temp, false); 1483 1484 for (i = 0; i < t1->num_chips; i++) { 1485 sprintf(s, "%02d HW errors", i); 1486 ROOT_ADD_API(int, s, t1->chips[i].hw_errors, true); 1487 sprintf(s, "%02d Stales", i); 1488 ROOT_ADD_API(int, s, t1->chips[i].stales, true); 1489 sprintf(s, "%02d Duplicates", i); 1490 ROOT_ADD_API(int, s, t1->chips[i].dupes, true); 1491 sprintf(s, "%02d Nonces found", i); 1492 ROOT_ADD_API(int, s, t1->chips[i].nonces_found, true); 1493 sprintf(s, "%02d Nonce ranges", i); 1494 ROOT_ADD_API(int, s, t1->chips[i].nonce_ranges_done, true); 1495 sprintf(s, "%02d Cooldown", i); 1496 ROOT_ADD_API(int, s, t1->chips[i].cooldown_begin, true); 1497 sprintf(s, "%02d Fail count", i); 1498 ROOT_ADD_API(int, s, t1->chips[i].fail_count, true); 1499 sprintf(s, "%02d Fail reset", i); 1500 ROOT_ADD_API(int, s, t1->chips[i].fail_reset, true); 1501 sprintf(s, "%02d Temp", i); 1502 ROOT_ADD_API(int, s, t1->chips[i].temp, true); 1503 sprintf(s, "%02d nVol", i); 1504 ROOT_ADD_API(int, s, t1->chips[i].nVol, true); 1505 } 1506 return root; 1507 } 1508 1509 static struct api_data *T1_api_debug(struct cgpu_info *cgpu) 1510 { 1511 struct T1_chain *t1 = cgpu->device_data; 1512 int timeout = 1000; 1513 1514 g_debug_stats[t1->chain_id] = 1; 1515 1516 // Wait for g_debug_stats cleared or timeout 1517 while (g_debug_stats[t1->chain_id] && timeout) { 1518 timeout -= 10; 1519 cgsleep_ms(10); 1520 } 1521 1522 return T1_api_stats(cgpu); 1523 } 1524 1525 struct device_drv dragonmintT1_drv = { 1526 .drv_id = DRIVER_dragonmintT1, 1527 .dname = "DragonmintT1", 1528 .name = "DT1", 1529 .drv_detect = T1_detect, 1530 /* Set to lowest diff we can reliably use to get accurate hashrates. */ 1531 .max_diff = 129, 1532 1533 .hash_work = hash_driver_work, 1534 .scanwork = T1_scanwork, 1535 .thread_shutdown = T1_shutdown, 1536 .get_api_stats = T1_api_stats, 1537 .get_api_debug = T1_api_debug, 1538 .get_statline_before = T1_get_statline_before, 1539 };