/ driver-modminer.c
driver-modminer.c
   1  /*
   2   * Copyright 2012-2013 Andrew Smith
   3   * Copyright 2012 Luke Dashjr
   4   *
   5   * This program is free software; you can redistribute it and/or modify it
   6   * under the terms of the GNU General Public License as published by the Free
   7   * Software Foundation; either version 3 of the License, or (at your option)
   8   * any later version.  See COPYING for more details.
   9   */
  10  
  11  #include "config.h"
  12  
  13  #include <stdarg.h>
  14  #include <stdio.h>
  15  #include <unistd.h>
  16  #include <math.h>
  17  
  18  #include "logging.h"
  19  #include "miner.h"
  20  #include "usbutils.h"
  21  #include "fpgautils.h"
  22  #include "util.h"
  23  
  24  #define BITSTREAM_FILENAME "fpgaminer_top_fixed7_197MHz.ncd"
  25  #define BISTREAM_USER_ID "\2\4$B"
  26  
  27  #define BITSTREAM_MAGIC_0 0
  28  #define BITSTREAM_MAGIC_1 9
  29  
  30  #define MODMINER_CUTOFF_TEMP 60.0
  31  #define MODMINER_OVERHEAT_TEMP 50.0
  32  #define MODMINER_RECOVER_TEMP 46.5
  33  #define MODMINER_TEMP_UP_LIMIT 47.0
  34  
  35  #define MODMINER_HW_ERROR_PERCENT 0.75
  36  
  37  // How many seconds of no nonces means there's something wrong
  38  // First time - drop the clock and see if it revives
  39  // Second time - (and it didn't revive) disable it
  40  #define ITS_DEAD_JIM 300
  41  
  42  // N.B. in the latest firmware the limit is 250
  43  // however the voltage/temperature risks preclude that
  44  #define MODMINER_MAX_CLOCK 230
  45  #define MODMINER_DEF_CLOCK 200
  46  #define MODMINER_MIN_CLOCK 160
  47  
  48  #define MODMINER_CLOCK_UP 2
  49  #define MODMINER_CLOCK_SET 0
  50  #define MODMINER_CLOCK_DOWN -2
  51  // = 0 means OVERHEAT doesn't affect the clock
  52  #define MODMINER_CLOCK_OVERHEAT 0
  53  #define MODMINER_CLOCK_DEAD -6
  54  #define MODMINER_CLOCK_CUTOFF -10
  55  
  56  // Commands
  57  #define MODMINER_PING "\x00"
  58  #define MODMINER_GET_VERSION "\x01"
  59  #define MODMINER_FPGA_COUNT "\x02"
  60  // Commands + require FPGAid
  61  #define MODMINER_GET_IDCODE '\x03'
  62  #define MODMINER_GET_USERCODE '\x04'
  63  #define MODMINER_PROGRAM '\x05'
  64  #define MODMINER_SET_CLOCK '\x06'
  65  #define MODMINER_READ_CLOCK '\x07'
  66  #define MODMINER_SEND_WORK '\x08'
  67  #define MODMINER_CHECK_WORK '\x09'
  68  // One byte temperature reply
  69  #define MODMINER_TEMP1 '\x0a'
  70  // Two byte temperature reply
  71  #define MODMINER_TEMP2 '\x0d'
  72  
  73  // +6 bytes
  74  #define MODMINER_SET_REG '\x0b'
  75  // +2 bytes
  76  #define MODMINER_GET_REG '\x0c'
  77  
  78  #define FPGAID_ALL 4
  79  
  80  // Maximum how many good shares in a row means clock up
  81  // 96 is ~34m22s at 200MH/s
  82  #define MODMINER_TRY_UP 96
  83  // Initially how many good shares in a row means clock up
  84  // This is doubled each down clock until it reaches MODMINER_TRY_UP
  85  // 6 is ~2m9s at 200MH/s
  86  #define MODMINER_EARLY_UP 6
  87  // Limit when reducing shares_to_good
  88  #define MODMINER_MIN_BACK 12
  89  
  90  // 45 noops sent when detecting, in case the device was left in "start job" reading
  91  static const char NOOP[] = MODMINER_PING "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff";
  92  
  93  static void do_ping(struct cgpu_info *modminer)
  94  {
  95  	char buf[0x100+1];
  96  	int err, amount;
  97  
  98  	// Don't care if it fails
  99  	err = usb_write(modminer, (char *)NOOP, sizeof(NOOP)-1, &amount, C_PING);
 100  	applog(LOG_DEBUG, "%s%u: flush noop got %d err %d",
 101  		modminer->drv->name, modminer->fpgaid, amount, err);
 102  
 103  	// Clear any outstanding data
 104  	while ((err = usb_read_once(modminer, buf, sizeof(buf)-1, &amount, C_CLEAR)) == 0 && amount > 0)
 105  		applog(LOG_DEBUG, "%s%u: clear got %d",
 106  			modminer->drv->name, modminer->fpgaid, amount);
 107  
 108  	applog(LOG_DEBUG, "%s%u: final clear got %d err %d",
 109  		modminer->drv->name, modminer->fpgaid, amount, err);
 110  }
 111  
 112  static struct cgpu_info *modminer_detect_one(struct libusb_device *dev, struct usb_find_devices *found)
 113  {
 114  	char buf[0x100+1];
 115  	char *devname = NULL;
 116  	char devpath[32];
 117  	int err, i, amount;
 118  	bool added = false;
 119  
 120  	struct cgpu_info *modminer = usb_alloc_cgpu(&modminer_drv, 1);
 121  
 122  	modminer->modminer_mutex = calloc(1, sizeof(*(modminer->modminer_mutex)));
 123  	mutex_init(modminer->modminer_mutex);
 124  	modminer->fpgaid = (char)0;
 125  
 126  	if (!usb_init(modminer, dev, found))
 127  		goto shin;
 128  
 129  	do_ping(modminer);
 130  
 131  	if ((err = usb_write(modminer, MODMINER_GET_VERSION, 1, &amount, C_REQUESTVERSION)) < 0 || amount != 1) {
 132  		applog(LOG_ERR, "%s detect (%s) send version request failed (%d:%d)",
 133  			modminer->drv->dname, modminer->device_path, amount, err);
 134  		goto unshin;
 135  	}
 136  
 137  	if ((err = usb_read_once(modminer, buf, sizeof(buf)-1, &amount, C_GETVERSION)) < 0 || amount < 1) {
 138  		if (err < 0)
 139  			applog(LOG_ERR, "%s detect (%s) no version reply (%d)",
 140  				modminer->drv->dname, modminer->device_path, err);
 141  		else
 142  			applog(LOG_ERR, "%s detect (%s) empty version reply (%d)",
 143  				modminer->drv->dname, modminer->device_path, amount);
 144  
 145  		applog(LOG_DEBUG, "%s detect (%s) check the firmware",
 146  				modminer->drv->dname, modminer->device_path);
 147  
 148  		goto unshin;
 149  	}
 150  	buf[amount] = '\0';
 151  	devname = strdup(buf);
 152  	applog(LOG_DEBUG, "%s (%s) identified as: %s", modminer->drv->dname, modminer->device_path, devname);
 153  
 154  	if ((err = usb_write(modminer, MODMINER_FPGA_COUNT, 1, &amount, C_REQUESTFPGACOUNT) < 0 || amount != 1)) {
 155  		applog(LOG_ERR, "%s detect (%s) FPGA count request failed (%d:%d)",
 156  			modminer->drv->dname, modminer->device_path, amount, err);
 157  		goto unshin;
 158  	}
 159  
 160  	if ((err = usb_read(modminer, buf, 1, &amount, C_GETFPGACOUNT)) < 0 || amount != 1) {
 161  		applog(LOG_ERR, "%s detect (%s) no FPGA count reply (%d:%d)",
 162  			modminer->drv->dname, modminer->device_path, amount, err);
 163  		goto unshin;
 164  	}
 165  
 166  	// TODO: flag it use 1 byte temp if it is an old firmware
 167  	// can detect with modminer->cgusb->serial ?
 168  
 169  	if (buf[0] == 0) {
 170  		applog(LOG_ERR, "%s detect (%s) zero FPGA count from %s",
 171  			modminer->drv->dname, modminer->device_path, devname);
 172  		goto unshin;
 173  	}
 174  
 175  	if (buf[0] < 1 || buf[0] > 4) {
 176  		applog(LOG_ERR, "%s detect (%s) invalid FPGA count (%u) from %s",
 177  			modminer->drv->dname, modminer->device_path, buf[0], devname);
 178  		goto unshin;
 179  	}
 180  
 181  	applog(LOG_DEBUG, "%s (%s) %s has %u FPGAs",
 182  		modminer->drv->dname, modminer->device_path, devname, buf[0]);
 183  
 184  	modminer->name = devname;
 185  
 186  	// TODO: test with 1 board missing in the middle and each end
 187  	// to see how that affects the sequence numbers
 188  	for (i = 0; i < buf[0]; i++) {
 189  		struct cgpu_info *tmp = usb_copy_cgpu(modminer);
 190  
 191  		sprintf(devpath, "%d:%d:%d",
 192  			(int)(modminer->usbinfo.bus_number),
 193  			(int)(modminer->usbinfo.device_address),
 194  			i);
 195  
 196  		tmp->device_path = strdup(devpath);
 197  
 198  		// Only the first copy gets the already used stats
 199  		if (added)
 200  			tmp->usbinfo.usbstat = USB_NOSTAT;
 201  
 202  		tmp->fpgaid = (char)i;
 203  		tmp->modminer_mutex = modminer->modminer_mutex;
 204  
 205  		if (!add_cgpu(tmp)) {
 206  			tmp = usb_free_cgpu(tmp);
 207  			goto unshin;
 208  		}
 209  
 210  		update_usb_stats(tmp);
 211  
 212  		added = true;
 213  	}
 214  
 215  	modminer = usb_free_cgpu(modminer);
 216  
 217  	return modminer;
 218  
 219  unshin:
 220  	if (!added)
 221  		usb_uninit(modminer);
 222  
 223  shin:
 224  	if (!added) {
 225  		free(modminer->modminer_mutex);
 226  		modminer->modminer_mutex = NULL;
 227  	}
 228  
 229  	modminer = usb_free_cgpu(modminer);
 230  
 231  	if (added)
 232  		return modminer;
 233  	else
 234  		return NULL;
 235  }
 236  
 237  static void modminer_detect(bool __maybe_unused hotplug)
 238  {
 239  	usb_detect(&modminer_drv, modminer_detect_one);
 240  }
 241  
 242  static bool get_expect(struct cgpu_info *modminer, FILE *f, char c)
 243  {
 244  	char buf;
 245  
 246  	if (fread(&buf, 1, 1, f) != 1) {
 247  		applog(LOG_ERR, "%s%u: Error (%d) reading bitstream (%c)",
 248  				modminer->drv->name, modminer->device_id, errno, c);
 249  		return false;
 250  	}
 251  
 252  	if (buf != c) {
 253  		applog(LOG_ERR, "%s%u: bitstream code mismatch (%c)",
 254  				modminer->drv->name, modminer->device_id, c);
 255  		return false;
 256  	}
 257  
 258  	return true;
 259  }
 260  
 261  static bool get_info(struct cgpu_info *modminer, FILE *f, char *buf, int bufsiz, const char *name)
 262  {
 263  	unsigned char siz[2];
 264  	int len;
 265  
 266  	if (fread(siz, 2, 1, f) != 1) {
 267  		applog(LOG_ERR, "%s%u: Error (%d) reading bitstream '%s' len",
 268  			modminer->drv->name, modminer->device_id, errno, name);
 269  		return false;
 270  	}
 271  
 272  	len = siz[0] * 256 + siz[1];
 273  
 274  	if (len >= bufsiz) {
 275  		applog(LOG_ERR, "%s%u: Bitstream '%s' len too large (%d)",
 276  			modminer->drv->name, modminer->device_id, name, len);
 277  		return false;
 278  	}
 279  
 280  	if (fread(buf, len, 1, f) != 1) {
 281  		applog(LOG_ERR, "%s%u: Error (%d) reading bitstream '%s'",
 282  			modminer->drv->name, modminer->device_id, errno, name);
 283  		return false;
 284  	}
 285  
 286  	buf[len] = '\0';
 287  
 288  	return true;
 289  }
 290  
 291  #define USE_DEFAULT_TIMEOUT 0
 292  
 293  // mutex must always be locked before calling
 294  static bool get_status_timeout(struct cgpu_info *modminer, char *msg, unsigned int timeout, enum usb_cmds cmd)
 295  {
 296  	int err, amount;
 297  	char buf[1];
 298  
 299  	if (timeout == USE_DEFAULT_TIMEOUT)
 300  		err = usb_read(modminer, buf, 1, &amount, cmd);
 301  	else
 302  		err = usb_read_timeout(modminer, buf, 1, &amount, timeout, cmd);
 303  
 304  	if (err < 0 || amount != 1) {
 305  		mutex_unlock(modminer->modminer_mutex);
 306  
 307  		applog(LOG_ERR, "%s%u: Error (%d:%d) getting %s reply",
 308  			modminer->drv->name, modminer->device_id, amount, err, msg);
 309  
 310  		return false;
 311  	}
 312  
 313  	if (buf[0] != 1) {
 314  		mutex_unlock(modminer->modminer_mutex);
 315  
 316  		applog(LOG_ERR, "%s%u: Error, invalid %s reply (was %d should be 1)",
 317  			modminer->drv->name, modminer->device_id, msg, buf[0]);
 318  
 319  		return false;
 320  	}
 321  
 322  	return true;
 323  }
 324  
 325  // mutex must always be locked before calling
 326  static bool get_status(struct cgpu_info *modminer, char *msg, enum usb_cmds cmd)
 327  {
 328  	return get_status_timeout(modminer, msg, USE_DEFAULT_TIMEOUT, cmd);
 329  }
 330  
 331  static bool modminer_fpga_upload_bitstream(struct cgpu_info *modminer)
 332  {
 333  	const char *bsfile = BITSTREAM_FILENAME;
 334  	char buf[0x100], *p;
 335  	char devmsg[64];
 336  	unsigned char *ubuf = (unsigned char *)buf;
 337  	unsigned long totlen, len;
 338  	size_t buflen, remaining;
 339  	float nextmsg, upto;
 340  	char fpgaid = FPGAID_ALL;
 341  	int err, amount, tries;
 342  	char *ptr;
 343  
 344  	FILE *f = open_bitstream("modminer", bsfile);
 345  	if (!f) {
 346  		mutex_unlock(modminer->modminer_mutex);
 347  
 348  		applog(LOG_ERR, "%s%u: Error (%d) opening bitstream file %s",
 349  			modminer->drv->name, modminer->device_id, errno, bsfile);
 350  
 351  		return false;
 352  	}
 353  
 354  	if (fread(buf, 2, 1, f) != 1) {
 355  		mutex_unlock(modminer->modminer_mutex);
 356  
 357  		applog(LOG_ERR, "%s%u: Error (%d) reading bitstream magic",
 358  			modminer->drv->name, modminer->device_id, errno);
 359  
 360  		goto dame;
 361  	}
 362  
 363  	if (buf[0] != BITSTREAM_MAGIC_0 || buf[1] != BITSTREAM_MAGIC_1) {
 364  		mutex_unlock(modminer->modminer_mutex);
 365  
 366  		applog(LOG_ERR, "%s%u: bitstream has incorrect magic (%u,%u) instead of (%u,%u)",
 367  			modminer->drv->name, modminer->device_id,
 368  			buf[0], buf[1],
 369  			BITSTREAM_MAGIC_0, BITSTREAM_MAGIC_1);
 370  
 371  		goto dame;
 372  	}
 373  
 374  	if (fseek(f, 11L, SEEK_CUR)) {
 375  		mutex_unlock(modminer->modminer_mutex);
 376  
 377  		applog(LOG_ERR, "%s%u: Error (%d) bitstream seek failed",
 378  			modminer->drv->name, modminer->device_id, errno);
 379  
 380  		goto dame;
 381  	}
 382  
 383  	if (!get_expect(modminer, f, 'a'))
 384  		goto undame;
 385  
 386  	if (!get_info(modminer, f, buf, sizeof(buf), "Design name"))
 387  		goto undame;
 388  
 389  	applog(LOG_DEBUG, "%s%u: bitstream file '%s' info:",
 390  		modminer->drv->name, modminer->device_id, bsfile);
 391  
 392  	applog(LOG_DEBUG, " Design name: '%s'", buf);
 393  
 394  	p = strrchr(buf, ';') ? : buf;
 395  	p = strrchr(buf, '=') ? : p;
 396  	if (p[0] == '=')
 397  		p++;
 398  
 399  	unsigned long fwusercode = (unsigned long)strtoll(p, &p, 16);
 400  
 401  	if (p[0] != '\0') {
 402  		mutex_unlock(modminer->modminer_mutex);
 403  
 404  		applog(LOG_ERR, "%s%u: Bad usercode in bitstream file",
 405  			modminer->drv->name, modminer->device_id);
 406  
 407  		goto dame;
 408  	}
 409  
 410  	if (fwusercode == 0xffffffff) {
 411  		mutex_unlock(modminer->modminer_mutex);
 412  
 413  		applog(LOG_ERR, "%s%u: bitstream doesn't support user code",
 414  			modminer->drv->name, modminer->device_id);
 415  
 416  		goto dame;
 417  	}
 418  
 419  	applog(LOG_DEBUG, " Version: %lu, build %lu", (fwusercode >> 8) & 0xff, fwusercode & 0xff);
 420  
 421  	if (!get_expect(modminer, f, 'b'))
 422  		goto undame;
 423  
 424  	if (!get_info(modminer, f, buf, sizeof(buf), "Part number"))
 425  		goto undame;
 426  
 427  	applog(LOG_DEBUG, " Part number: '%s'", buf);
 428  
 429  	if (!get_expect(modminer, f, 'c'))
 430  		goto undame;
 431  
 432  	if (!get_info(modminer, f, buf, sizeof(buf), "Build date"))
 433  		goto undame;
 434  
 435  	applog(LOG_DEBUG, " Build date: '%s'", buf);
 436  
 437  	if (!get_expect(modminer, f, 'd'))
 438  		goto undame;
 439  
 440  	if (!get_info(modminer, f, buf, sizeof(buf), "Build time"))
 441  		goto undame;
 442  
 443  	applog(LOG_DEBUG, " Build time: '%s'", buf);
 444  
 445  	if (!get_expect(modminer, f, 'e'))
 446  		goto undame;
 447  
 448  	if (fread(buf, 4, 1, f) != 1) {
 449  		mutex_unlock(modminer->modminer_mutex);
 450  
 451  		applog(LOG_ERR, "%s%u: Error (%d) reading bitstream data len",
 452  			modminer->drv->name, modminer->device_id, errno);
 453  
 454  		goto dame;
 455  	}
 456  
 457  	len = ((unsigned long)ubuf[0] << 24) | ((unsigned long)ubuf[1] << 16) | (ubuf[2] << 8) | ubuf[3];
 458  	applog(LOG_DEBUG, " Bitstream size: %lu", len);
 459  
 460  	strcpy(devmsg, modminer->device_path);
 461  	ptr = strrchr(devmsg, ':');
 462  	if (ptr)
 463  		*ptr = '\0';
 464  
 465  	applog(LOG_WARNING, "%s%u: Programming all FPGA on %s ... Mining will not start until complete",
 466  		modminer->drv->name, modminer->device_id, devmsg);
 467  
 468  	buf[0] = MODMINER_PROGRAM;
 469  	buf[1] = fpgaid;
 470  	buf[2] = (len >>  0) & 0xff;
 471  	buf[3] = (len >>  8) & 0xff;
 472  	buf[4] = (len >> 16) & 0xff;
 473  	buf[5] = (len >> 24) & 0xff;
 474  
 475  	if ((err = usb_write(modminer, buf, 6, &amount, C_STARTPROGRAM)) < 0 || amount != 6) {
 476  		mutex_unlock(modminer->modminer_mutex);
 477  
 478  		applog(LOG_ERR, "%s%u: Program init failed (%d:%d)",
 479  			modminer->drv->name, modminer->device_id, amount, err);
 480  
 481  		goto dame;
 482  	}
 483  
 484  	if (!get_status(modminer, "initialise", C_STARTPROGRAMSTATUS))
 485  		goto undame;
 486  
 487  // It must be 32 bytes according to MCU legacy.c
 488  #define WRITE_SIZE 32
 489  
 490  	totlen = len;
 491  	nextmsg = 0.1;
 492  	while (len > 0) {
 493  		buflen = len < WRITE_SIZE ? len : WRITE_SIZE;
 494  		if (fread(buf, buflen, 1, f) != 1) {
 495  			mutex_unlock(modminer->modminer_mutex);
 496  
 497  			applog(LOG_ERR, "%s%u: bitstream file read error %d (%lu bytes left)",
 498  				modminer->drv->name, modminer->device_id, errno, len);
 499  
 500  			goto dame;
 501  		}
 502  
 503  		tries = 0;
 504  		ptr = buf;
 505  		remaining = buflen;
 506  		while ((err = usb_write(modminer, ptr, remaining, &amount, C_PROGRAM)) < 0 || amount != (int)remaining) {
 507  			if (err == LIBUSB_ERROR_TIMEOUT && amount > 0 && ++tries < 4) {
 508  				remaining -= amount;
 509  				ptr += amount;
 510  
 511  				if (opt_debug)
 512  					applog(LOG_DEBUG, "%s%u: Program timeout (%d:%d) sent %d tries %d",
 513  						modminer->drv->name, modminer->device_id,
 514  						amount, err, (int)remaining, tries);
 515  
 516  				if (!get_status(modminer, "write status", C_PROGRAMSTATUS2))
 517  					goto dame;
 518  
 519  			} else {
 520  				mutex_unlock(modminer->modminer_mutex);
 521  
 522  				applog(LOG_ERR, "%s%u: Program failed (%d:%d) sent %d",
 523  					modminer->drv->name, modminer->device_id, amount, err, (int)remaining);
 524  
 525  				goto dame;
 526  			}
 527  		}
 528  
 529  		if (!get_status(modminer, "write status", C_PROGRAMSTATUS))
 530  			goto dame;
 531  
 532  		len -= buflen;
 533  
 534  		upto = (float)(totlen - len) / (float)(totlen);
 535  		if (upto >= nextmsg) {
 536  			applog(LOG_WARNING,
 537  				"%s%u: Programming %.1f%% (%lu out of %lu)",
 538  				modminer->drv->name, modminer->device_id, upto*100, (totlen - len), totlen);
 539  
 540  			nextmsg += 0.1;
 541  		}
 542  	}
 543  
 544  	if (!get_status(modminer, "final status", C_FINALPROGRAMSTATUS))
 545  		goto undame;
 546  
 547  	applog(LOG_WARNING, "%s%u: Programming completed for all FPGA on %s",
 548  		modminer->drv->name, modminer->device_id, devmsg);
 549  
 550  	// Give it a 2/3s delay after programming
 551  	cgsleep_ms(666);
 552  
 553  	usb_set_dev_start(modminer);
 554  
 555  	return true;
 556  undame:
 557  	;
 558  	mutex_unlock(modminer->modminer_mutex);
 559  	;
 560  dame:
 561  	fclose(f);
 562  	return false;
 563  }
 564  
 565  static bool modminer_fpga_prepare(struct thr_info *thr)
 566  {
 567  //	struct cgpu_info *modminer = thr->cgpu;
 568  	struct modminer_fpga_state *state;
 569  
 570  	state = thr->cgpu_data = calloc(1, sizeof(struct modminer_fpga_state));
 571  	state->shares_to_good = MODMINER_EARLY_UP;
 572  	state->overheated = false;
 573  
 574  	return true;
 575  }
 576  
 577  /*
 578   * Clocking rules:
 579   *	If device exceeds cutoff or overheat temp - stop sending work until it cools
 580   *		decrease the clock by MODMINER_CLOCK_CUTOFF/MODMINER_CLOCK_OVERHEAT
 581   *		for when it restarts
 582   *		with MODMINER_CLOCK_OVERHEAT=0 basically says that temp shouldn't
 583   *		affect the clock unless we reach CUTOFF
 584   *
 585   *	If device overheats
 586   *		set shares_to_good back to MODMINER_MIN_BACK
 587   *		to speed up clock recovery if temp drop doesnt help
 588   *
 589   * When to clock down:
 590   *	If device gets MODMINER_HW_ERROR_PERCENT errors since last clock up or down
 591   *		if clock is <= default it requires 2 HW to do this test
 592   *		if clock is > default it only requires 1 HW to do this test
 593   *			also double shares_to_good
 594   *
 595   * When to clock up:
 596   *	If device gets shares_to_good good shares in a row
 597   *		and temp < MODMINER_TEMP_UP_LIMIT
 598   *
 599   * N.B. clock must always be a multiple of 2
 600   */
 601  static const char *clocknodev = "clock failed - no device";
 602  static const char *clockoldwork = "clock already changed for this work";
 603  static const char *clocktoolow = "clock too low";
 604  static const char *clocktoohi = "clock too high";
 605  static const char *clocksetfail = "clock set command failed";
 606  static const char *clockreplyfail = "clock reply failed";
 607  
 608  static const char *modminer_delta_clock(struct thr_info *thr, int delta, bool temp, bool force)
 609  {
 610  	struct cgpu_info *modminer = thr->cgpu;
 611  	struct modminer_fpga_state *state = thr->cgpu_data;
 612  	unsigned char cmd[6], buf[1];
 613  	int err, amount;
 614  
 615  	// Device is gone
 616  	if (modminer->usbinfo.nodev)
 617  		return clocknodev;
 618  
 619  	// Only do once if multiple shares per work or multiple reasons
 620  	if (!state->new_work && !force)
 621  		return clockoldwork;
 622  
 623  	state->new_work = false;
 624  
 625  	state->shares = 0;
 626  	state->shares_last_hw = 0;
 627  	state->hw_errors = 0;
 628  
 629  	// FYI clock drop has little effect on temp
 630  	if (delta < 0 && (modminer->clock + delta) < MODMINER_MIN_CLOCK)
 631  		return clocktoolow;
 632  
 633  	if (delta > 0 && (modminer->clock + delta) > MODMINER_MAX_CLOCK)
 634  		return clocktoohi;
 635  
 636  	if (delta < 0) {
 637  		if (temp)
 638  			state->shares_to_good = MODMINER_MIN_BACK;
 639  		else {
 640  			if ((state->shares_to_good * 2) < MODMINER_TRY_UP)
 641  				state->shares_to_good *= 2;
 642  			else
 643  				state->shares_to_good = MODMINER_TRY_UP;
 644  		}
 645  	}
 646  
 647  	modminer->clock += delta;
 648  
 649  	cmd[0] = MODMINER_SET_CLOCK;
 650  	cmd[1] = modminer->fpgaid;
 651  	cmd[2] = modminer->clock;
 652  	cmd[3] = cmd[4] = cmd[5] = '\0';
 653  
 654  	mutex_lock(modminer->modminer_mutex);
 655  
 656  	if ((err = usb_write(modminer, (char *)cmd, 6, &amount, C_SETCLOCK)) < 0 || amount != 6) {
 657  		mutex_unlock(modminer->modminer_mutex);
 658  
 659  		applog(LOG_ERR, "%s%u: Error writing set clock speed (%d:%d)",
 660  			modminer->drv->name, modminer->device_id, amount, err);
 661  
 662  		return clocksetfail;
 663  	}
 664  
 665  	if ((err = usb_read(modminer, (char *)(&buf), 1, &amount, C_REPLYSETCLOCK)) < 0 || amount != 1) {
 666  		mutex_unlock(modminer->modminer_mutex);
 667  
 668  		applog(LOG_ERR, "%s%u: Error reading set clock speed (%d:%d)",
 669  			modminer->drv->name, modminer->device_id, amount, err);
 670  
 671  		return clockreplyfail;
 672  	}
 673  
 674  	mutex_unlock(modminer->modminer_mutex);
 675  
 676  	applog(LOG_WARNING, "%s%u: Set clock speed %sto %u",
 677  			modminer->drv->name, modminer->device_id,
 678  			(delta < 0) ? "down " : (delta > 0 ? "up " : ""),
 679  			modminer->clock);
 680  
 681  	return NULL;
 682  }
 683  
 684  static bool modminer_fpga_init(struct thr_info *thr)
 685  {
 686  	struct cgpu_info *modminer = thr->cgpu;
 687  	unsigned char cmd[2], buf[4];
 688  	int err, amount;
 689  
 690  	mutex_lock(modminer->modminer_mutex);
 691  
 692  	cmd[0] = MODMINER_GET_USERCODE;
 693  	cmd[1] = modminer->fpgaid;
 694  	if ((err = usb_write(modminer, (char *)cmd, 2, &amount, C_REQUESTUSERCODE)) < 0 || amount != 2) {
 695  		mutex_unlock(modminer->modminer_mutex);
 696  
 697  		applog(LOG_ERR, "%s%u: Error requesting USER code (%d:%d)",
 698  			modminer->drv->name, modminer->device_id, amount, err);
 699  
 700  		return false;
 701  	}
 702  
 703  	if ((err = usb_read(modminer, (char *)buf, 4, &amount, C_GETUSERCODE)) < 0 || amount != 4) {
 704  		mutex_unlock(modminer->modminer_mutex);
 705  
 706  		applog(LOG_ERR, "%s%u: Error reading USER code (%d:%d)",
 707  			modminer->drv->name, modminer->device_id, amount, err);
 708  
 709  		return false;
 710  	}
 711  
 712  	if (memcmp(buf, BISTREAM_USER_ID, 4)) {
 713  		applog(LOG_ERR, "%s%u: FPGA not programmed",
 714  			modminer->drv->name, modminer->device_id);
 715  
 716  		if (!modminer_fpga_upload_bitstream(modminer))
 717  			return false;
 718  
 719  		mutex_unlock(modminer->modminer_mutex);
 720  	} else {
 721  		mutex_unlock(modminer->modminer_mutex);
 722  
 723  		applog(LOG_DEBUG, "%s%u: FPGA is already programmed :)",
 724  			modminer->drv->name, modminer->device_id);
 725  	}
 726  
 727  	modminer->clock = MODMINER_DEF_CLOCK;
 728  	modminer_delta_clock(thr, MODMINER_CLOCK_SET, false, false);
 729  
 730  	thr->primary_thread = true;
 731  
 732  	return true;
 733  }
 734  
 735  static void get_modminer_statline_before(char *buf, size_t bufsiz, struct cgpu_info *modminer)
 736  {
 737  	tailsprintf(buf, bufsiz, "%s%.1fC %3uMHz",
 738  			(modminer->temp < 10) ? " " : "",
 739  			modminer->temp,
 740  			(unsigned int)(modminer->clock));
 741  }
 742  
 743  static bool modminer_start_work(struct thr_info *thr, struct work *work)
 744  {
 745  	struct cgpu_info *modminer = thr->cgpu;
 746  	struct modminer_fpga_state *state = thr->cgpu_data;
 747  	int err, amount;
 748  	char cmd[48];
 749  	bool sta;
 750  
 751  	cmd[0] = MODMINER_SEND_WORK;
 752  	cmd[1] = modminer->fpgaid;
 753  	memcpy(&cmd[2], work->midstate, 32);
 754  	memcpy(&cmd[34], work->data + 64, 12);
 755  
 756  	if (state->first_work.tv_sec == 0)
 757  		cgtime(&state->first_work);
 758  
 759  	if (state->last_nonce.tv_sec == 0)
 760  		cgtime(&state->last_nonce);
 761  
 762  	mutex_lock(modminer->modminer_mutex);
 763  
 764  	if ((err = usb_write(modminer, cmd, 46, &amount, C_SENDWORK)) < 0 || amount != 46) {
 765  		mutex_unlock(modminer->modminer_mutex);
 766  
 767  		applog(LOG_ERR, "%s%u: Start work failed (%d:%d)",
 768  			modminer->drv->name, modminer->device_id, amount, err);
 769  
 770  		return false;
 771  	}
 772  
 773  	cgtime(&state->tv_workstart);
 774  
 775  	sta = get_status(modminer, "start work", C_SENDWORKSTATUS);
 776  
 777  	if (sta) {
 778  		mutex_unlock(modminer->modminer_mutex);
 779  		state->new_work = true;
 780  	}
 781  
 782  	return sta;
 783  }
 784  
 785  static void check_temperature(struct thr_info *thr)
 786  {
 787  	struct cgpu_info *modminer = thr->cgpu;
 788  	struct modminer_fpga_state *state = thr->cgpu_data;
 789  	char cmd[2], temperature[2];
 790  	int tbytes, tamount;
 791  	int amount;
 792  
 793  	// Device is gone
 794  	if (modminer->usbinfo.nodev)
 795  		return;
 796  
 797  	if (state->one_byte_temp) {
 798  		cmd[0] = MODMINER_TEMP1;
 799  		tbytes = 1;
 800  	} else {
 801  		cmd[0] = MODMINER_TEMP2;
 802  		tbytes = 2;
 803  	}
 804  
 805  	cmd[1] = modminer->fpgaid;
 806  
 807  	mutex_lock(modminer->modminer_mutex);
 808  	if (usb_write(modminer, (char *)cmd, 2, &amount, C_REQUESTTEMPERATURE) == 0 && amount == 2 &&
 809  	    usb_read(modminer, (char *)(&temperature), tbytes, &tamount, C_GETTEMPERATURE) == 0 && tamount == tbytes) {
 810  		mutex_unlock(modminer->modminer_mutex);
 811  		if (state->one_byte_temp)
 812  			modminer->temp = temperature[0];
 813  		else {
 814  			// Only accurate to 2 and a bit places
 815  			modminer->temp = roundf((temperature[1] * 256.0 + temperature[0]) / 0.128) / 1000.0;
 816  
 817  			state->tried_two_byte_temp = true;
 818  		}
 819  
 820  		if (state->overheated) {
 821  			// Limit recovery to lower than OVERHEAT so it doesn't just go straight over again
 822  			if (modminer->temp < MODMINER_RECOVER_TEMP) {
 823  				state->overheated = false;
 824  				applog(LOG_WARNING, "%s%u: Recovered, temp less than (%.1f) now %.3f",
 825  					modminer->drv->name, modminer->device_id,
 826  					MODMINER_RECOVER_TEMP, modminer->temp);
 827  			}
 828  		}
 829  		else if (modminer->temp >= MODMINER_OVERHEAT_TEMP) {
 830  			if (modminer->temp >= MODMINER_CUTOFF_TEMP) {
 831  				applog(LOG_WARNING, "%s%u: Hit thermal cutoff limit! (%.1f) at %.3f",
 832  					modminer->drv->name, modminer->device_id,
 833  					MODMINER_CUTOFF_TEMP, modminer->temp);
 834  
 835  				modminer_delta_clock(thr, MODMINER_CLOCK_CUTOFF, true, false);
 836  				state->overheated = true;
 837  				dev_error(modminer, REASON_DEV_THERMAL_CUTOFF);
 838  			} else {
 839  				applog(LOG_WARNING, "%s%u: Overheat limit (%.1f) reached %.3f",
 840  					modminer->drv->name, modminer->device_id,
 841  					MODMINER_OVERHEAT_TEMP, modminer->temp);
 842  
 843  				// If it's defined to be 0 then don't call modminer_delta_clock()
 844  				if (MODMINER_CLOCK_OVERHEAT != 0)
 845  					modminer_delta_clock(thr, MODMINER_CLOCK_OVERHEAT, true, false);
 846  				state->overheated = true;
 847  				dev_error(modminer, REASON_DEV_OVER_HEAT);
 848  			}
 849  		}
 850  	} else {
 851  		mutex_unlock(modminer->modminer_mutex);
 852  
 853  		if (!state->tried_two_byte_temp) {
 854  			state->tried_two_byte_temp = true;
 855  			state->one_byte_temp = true;
 856  		}
 857  	}
 858  }
 859  
 860  #define work_restart(thr)  thr->work_restart
 861  
 862  // 250Mhz is 17.17s - ensure we don't go idle
 863  static const double processtime = 17.0;
 864  // 160Mhz is 26.84 - when overheated ensure we don't throw away shares
 865  static const double overheattime = 26.9;
 866  
 867  static uint64_t modminer_process_results(struct thr_info *thr, struct work *work)
 868  {
 869  	struct cgpu_info *modminer = thr->cgpu;
 870  	struct modminer_fpga_state *state = thr->cgpu_data;
 871  	struct timeval now;
 872  	char cmd[2];
 873  	uint32_t nonce;
 874  	uint32_t curr_hw_errors;
 875  	int err, amount, amount2;
 876  	int timeoutloop;
 877  	double timeout;
 878  	int temploop;
 879  
 880  	// Device is gone
 881  	if (modminer->usbinfo.nodev)
 882  		return -1;
 883  
 884  	// If we are overheated it will just keep checking for results
 885  	// since we can't stop the work
 886  	// The next work will not start until the temp drops
 887  	check_temperature(thr);
 888  
 889  	cmd[0] = MODMINER_CHECK_WORK;
 890  	cmd[1] = modminer->fpgaid;
 891  
 892  	timeoutloop = 0;
 893  	temploop = 0;
 894  	while (0x80085) {
 895  		mutex_lock(modminer->modminer_mutex);
 896  		if ((err = usb_write(modminer, cmd, 2, &amount, C_REQUESTWORKSTATUS)) < 0 || amount != 2) {
 897  			mutex_unlock(modminer->modminer_mutex);
 898  
 899  			// timeoutloop never resets so the timeouts can't
 900  			// accumulate much during a single item of work
 901  			if (err == LIBUSB_ERROR_TIMEOUT && ++timeoutloop < 5) {
 902  				state->timeout_fail++;
 903  				goto tryagain;
 904  			}
 905  
 906  			applog(LOG_ERR, "%s%u: Error sending (get nonce) (%d:%d)",
 907  				modminer->drv->name, modminer->device_id, amount, err);
 908  
 909  			return -1;
 910  		}
 911  
 912  		err = usb_read(modminer, (char *)(&nonce), 4, &amount, C_GETWORKSTATUS);
 913  		while (err == LIBUSB_SUCCESS && amount < 4) {
 914  			size_t remain = 4 - amount;
 915  			char *pos = ((char *)(&nonce)) + amount;
 916  
 917  			state->success_more++;
 918  
 919  			err = usb_read(modminer, pos, remain, &amount2, C_GETWORKSTATUS);
 920  
 921  			amount += amount2;
 922  		}
 923  		mutex_unlock(modminer->modminer_mutex);
 924  
 925  		if (err < 0 || amount < 4) {
 926  			// timeoutloop never resets so the timeouts can't
 927  			// accumulate much during a single item of work
 928  			if (err == LIBUSB_ERROR_TIMEOUT && ++timeoutloop < 10) {
 929  				state->timeout_fail++;
 930  				goto tryagain;
 931  			}
 932  
 933  			applog(LOG_ERR, "%s%u: Error reading (get nonce) (%d:%d)",
 934  				modminer->drv->name, modminer->device_id, amount+amount2, err);
 935  		}
 936  
 937  		if (memcmp(&nonce, "\xff\xff\xff\xff", 4)) {
 938  			// found 'something' ...
 939  			state->shares++;
 940  			curr_hw_errors = state->hw_errors;
 941  			submit_nonce(thr, work, nonce);
 942  			if (state->hw_errors > curr_hw_errors) {
 943  				cgtime(&now);
 944  				// Ignore initial errors that often happen
 945  				if (tdiff(&now, &state->first_work) < 2.0) {
 946  					state->shares = 0;
 947  					state->shares_last_hw = 0;
 948  					state->hw_errors = 0;
 949  				} else {
 950  					state->shares_last_hw = state->shares;
 951  					if (modminer->clock > MODMINER_DEF_CLOCK || state->hw_errors > 1) {
 952  						float pct = (state->hw_errors * 100.0 / (state->shares ? : 1.0));
 953  						if (pct >= MODMINER_HW_ERROR_PERCENT)
 954  							modminer_delta_clock(thr, MODMINER_CLOCK_DOWN, false, false);
 955  					}
 956  				}
 957  			} else {
 958  				cgtime(&state->last_nonce);
 959  				state->death_stage_one = false;
 960  				// If we've reached the required good shares in a row then clock up
 961  				if (((state->shares - state->shares_last_hw) >= state->shares_to_good) &&
 962  						modminer->temp < MODMINER_TEMP_UP_LIMIT)
 963  					modminer_delta_clock(thr, MODMINER_CLOCK_UP, false, false);
 964  			}
 965  		} else {
 966  			// on rare occasions - the MMQ can just stop returning valid nonces
 967  			double death = ITS_DEAD_JIM * (state->death_stage_one ? 2.0 : 1.0);
 968  			cgtime(&now);
 969  			if (tdiff(&now, &state->last_nonce) >= death) {
 970  				if (state->death_stage_one) {
 971  					modminer_delta_clock(thr, MODMINER_CLOCK_DEAD, false, true);
 972  					applog(LOG_ERR, "%s%u: DEATH clock down",
 973  						modminer->drv->name, modminer->device_id);
 974  
 975  					// reset the death info and DISABLE it
 976  					state->last_nonce.tv_sec = 0;
 977  					state->last_nonce.tv_usec = 0;
 978  					state->death_stage_one = false;
 979  					return -1;
 980  				} else {
 981  					modminer_delta_clock(thr, MODMINER_CLOCK_DEAD, false, true);
 982  					applog(LOG_ERR, "%s%u: death clock down",
 983  						modminer->drv->name, modminer->device_id);
 984  
 985  					state->death_stage_one = true;
 986  				}
 987  			}
 988  		}
 989  
 990  tryagain:
 991  
 992  		if (work_restart(thr))
 993  			break;
 994  
 995  		if (state->overheated == true) {
 996  			// don't check every time (every ~1/2 sec)
 997  			if (++temploop > 4) {
 998  				check_temperature(thr);
 999  				temploop = 0;
1000  			}
1001  
1002  		}
1003  
1004  		if (state->overheated == true)
1005  			timeout = overheattime;
1006  		else
1007  			timeout = processtime;
1008  
1009  		cgtime(&now);
1010  		if (tdiff(&now, &state->tv_workstart) > timeout)
1011  			break;
1012  
1013  		// 1/10th sec to lower CPU usage
1014  		cgsleep_ms(100);
1015  		if (work_restart(thr))
1016  			break;
1017  	}
1018  
1019  	struct timeval tv_workend, elapsed;
1020  	cgtime(&tv_workend);
1021  	timersub(&tv_workend, &state->tv_workstart, &elapsed);
1022  
1023  	// Not exact since the clock may have changed ... but close enough I guess
1024  	uint64_t hashes = (uint64_t)modminer->clock * (((uint64_t)elapsed.tv_sec * 1000000) + elapsed.tv_usec);
1025  	// Overheat will complete the nonce range
1026  	if (hashes > 0xffffffff)
1027  		hashes = 0xffffffff;
1028  
1029  	work->nonce = 0xffffffff;
1030  
1031  	return hashes;
1032  }
1033  
1034  static int64_t modminer_scanhash(struct thr_info *thr, struct work *work, int64_t __maybe_unused max_nonce)
1035  {
1036  	struct modminer_fpga_state *state = thr->cgpu_data;
1037  	struct timeval tv1, tv2;
1038  	int64_t hashes;
1039  
1040  	// Device is gone
1041  	if (thr->cgpu->usbinfo.nodev)
1042  		return -1;
1043  
1044  	// Don't start new work if overheated
1045  	if (state->overheated == true) {
1046  		cgtime(&tv1);
1047  
1048  		while (state->overheated == true) {
1049  			check_temperature(thr);
1050  
1051  			// Device is gone
1052  			if (thr->cgpu->usbinfo.nodev)
1053  				return -1;
1054  
1055  			if (state->overheated == true) {
1056  				cgtime(&tv2);
1057  
1058  				// give up on this work item after 30s
1059  				if (work_restart(thr) || tdiff(&tv2, &tv1) > 30)
1060  					return 0;
1061  
1062  				// Give it 1s rest then check again
1063  				cgsleep_ms(1000);
1064  			}
1065  		}
1066  	}
1067  
1068  	if (!modminer_start_work(thr, work))
1069  		return -1;
1070  
1071  	hashes = modminer_process_results(thr, work);
1072  	if (hashes == -1)
1073  		return hashes;
1074  
1075  	return hashes;
1076  }
1077  
1078  static void modminer_hw_error(struct thr_info *thr)
1079  {
1080  	struct modminer_fpga_state *state = thr->cgpu_data;
1081  
1082  	state->hw_errors++;
1083  }
1084  
1085  static void modminer_fpga_shutdown(struct thr_info *thr)
1086  {
1087  	free(thr->cgpu_data);
1088  	thr->cgpu_data = NULL;
1089  }
1090  
1091  static char *modminer_set_device(struct cgpu_info *modminer, char *option, char *setting, char *replybuf)
1092  {
1093  	const char *ret;
1094  	int val;
1095  
1096  	if (strcasecmp(option, "help") == 0) {
1097  		sprintf(replybuf, "clock: range %d-%d and a multiple of 2",
1098  					MODMINER_MIN_CLOCK, MODMINER_MAX_CLOCK);
1099  		return replybuf;
1100  	}
1101  
1102  	if (strcasecmp(option, "clock") == 0) {
1103  		if (!setting || !*setting) {
1104  			sprintf(replybuf, "missing clock setting");
1105  			return replybuf;
1106  		}
1107  
1108  		val = atoi(setting);
1109  		if (val < MODMINER_MIN_CLOCK || val > MODMINER_MAX_CLOCK || (val & 1) != 0) {
1110  			sprintf(replybuf, "invalid clock: '%s' valid range %d-%d and a multiple of 2",
1111  						setting, MODMINER_MIN_CLOCK, MODMINER_MAX_CLOCK);
1112  			return replybuf;
1113  		}
1114  
1115  		val -= (int)(modminer->clock);
1116  
1117  		ret = modminer_delta_clock(modminer->thr[0], val, false, true);
1118  		if (ret) {
1119  			sprintf(replybuf, "Set clock failed: %s", ret);
1120  			return replybuf;
1121  		} else
1122  			return NULL;
1123  	}
1124  
1125  	sprintf(replybuf, "Unknown option: %s", option);
1126  	return replybuf;
1127  }
1128  
1129  struct device_drv modminer_drv = {
1130  	.drv_id = DRIVER_modminer,
1131  	.dname = "ModMiner",
1132  	.name = "MMQ",
1133  	.drv_detect = modminer_detect,
1134  	.get_statline_before = get_modminer_statline_before,
1135  	.set_device = modminer_set_device,
1136  	.thread_prepare = modminer_fpga_prepare,
1137  	.thread_init = modminer_fpga_init,
1138  	.scanhash = modminer_scanhash,
1139  	.hw_error = modminer_hw_error,
1140  	.thread_shutdown = modminer_fpga_shutdown,
1141  };