diff --git a/README.md b/README.md index 7302c89..355fb9d 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,7 @@ hopefully all Supermicro X8 / X9 / X10 / X11 boards with IPMI. I have personally #### Hardware * Supermicro X9DRi-LN4+ * Supermicro X8SIL-F (IPMI equipped variant) +* Supermicro X9SRW-F (IPMI equipped variant, using external ipmitool from OS repos) #### Operating Systems * VMWare ESXi 6.7 @@ -25,6 +26,8 @@ hopefully all Supermicro X8 / X9 / X10 / X11 boards with IPMI. I have personally * Ubuntu 20.04 * Ubuntu 18.04 * Ubuntu 16.04 +* Proxmox VE 8.4 / Debian 12 "Bookworm" +* Proxmox VE 9 / Debian 13 "Trixie" For this script to work, you **MUST** have an IPMI module **AND** set your fan speeds to FULL SPEED in the BIOS otherwise this tool fights for control with the fans and they will spin up and down repeatedly (yo-yo'ing). @@ -57,12 +60,48 @@ apt-get install multiarch-support Next, find a suitable place for the scripts to live such as `/opt/FanControl/` (you may need to create that dir). -Finally, run `crontab -e` and add the following line; +Modern Ubuntu uses systemd for services, so I suggest installing the included systemd service file as described in the `Systemd Service Installation` section below. If you prefer to use cron, you can also load it via the system cron, as described below: + +Run `crontab -e` and add the following line; ~~~ @reboot /opt/FanControl/daemon.sh ~~~ +#### Proxmox / Debian +On Proxmox systems, I would suggest installing the ipmitool package from the operating system's repository: + +~~~ +sudo apt-get install ipmitool +~~~ + +Then, similar to Ubuntu instructions above, find a suitable place for the scripts to live such as `/opt/FanControl/`. Personally I preferred `/opt/Supermicro-Fan-Control` because then I could just: +* login as root +* install git: `apt-get install git` +* change directory to `/opt/`: `cd /opt` +* clone the repository: `git clone 'https://github.com/jasongaunt/Supermicro-Fan-Control.git'` +* change directory to `/opt/Supermicro-Fan-Control`: `cd Supermicro-Fan-Control` +* edit the configuration file as needed: `nano config.ini`, ensuring the IPMITOOL is set properly + +Finally, choose how to start the process: +* run `crontab -e` as under the Ubuntu example +* install the included systemd service file as described in the `Systemd Service Installation` section below. +* use another method to load and run it on startup. + +#### Systemd Service Installation + +You can install the systemd service to manage everything through systemd: + +~~~bash +ln -s /opt/Supermicro-Fan-Control/fan-control.service /etc/systemd/system/fan-control.service +systemctl daemon-reload +systemctl enable --now fan-control +~~~ + +To check the status or logs: +* `systemctl status fan-control` +* `journalctl -u fan-control -f` + Hardware fan assignments ------------------------ @@ -84,6 +123,8 @@ Supermicro assume Zone A is used for cooling the main system and Zone B for cool * Zone A = Anything that cools the CPU, memory and motherboard * Zone B = Anything that cools PCIe cards and / or drive bays +For at least one Supermicro X9SRW-F board in a 2U case, it seemed these were reversed, so YMMV. + #### Desktop cases If, like myself, you're using a Supermicro board in a desktop case, chances are you've already using dedicated CPU coolers with their @@ -166,3 +207,7 @@ PWM% The values included in `config.ini` by default are sane values to start with. Good luck! ~ JG + +#### Hushing Loud Server Fans + +The fans on my X9SRW-F 2U server were extremely loud by default, even with no load and CPU at 30C. I set 1% fan speed with ~25C/77F ambient room temperature causing my low TDP CPU to idle at 31C with the fans almost silent. In this same configuration with 1% fan speed, my CPU only got up to around 45C at full load. To be safe, I made sure to set my minimum temperature to 33C, and kept my 100% speed threshold far below the CPU thermal limits, but YMMV. -BG diff --git a/config.ini b/config.ini index d11a79f..9ce650a 100644 --- a/config.ini +++ b/config.ini @@ -33,3 +33,4 @@ Temp Averaging Window = 5 Ignore Temp Change Amount = 0 Exit On IPMI Failure = False Debug Mode = False +IPMITOOL = ipmitool diff --git a/fan-control.py b/fan-control.py index e4c7e43..c49d3b6 100755 --- a/fan-control.py +++ b/fan-control.py @@ -14,6 +14,7 @@ # Import required modules import os, sys, re, time, configparser, statistics from subprocess import Popen, PIPE +import shutil # Set up our default variables with safe values ZONE_A_SENSOR_NAME_SEARCH = r'^.*CPU.*$' @@ -32,10 +33,14 @@ IGNORE_TEMP_CHANGE_AMOUNT = 1 EXIT_ON_FAILURE = False DEBUG = False +IPMITOOL = False +CONFIG_TEST = False # Wrapper for (re)reading config.ini def reload_config(): global DEBUG + global IPMITOOL + global CONFIG_TEST if DEBUG: sys.stdout.write('Reloading config... '); sys.stdout.flush() config = configparser.ConfigParser() config.read(os.path.join(os.path.dirname(__file__), './config.ini')) @@ -59,12 +64,78 @@ def reload_config(): global EXIT_ON_FAILURE; EXIT_ON_FAILURE = config.get('General Configuration', 'Exit On IPMI Failure').lower() in ["yes", "true", "1"] DEBUG = config.get('General Configuration', 'Debug Mode').lower() in ["yes", "true", "1"] + global IPMITOOL; + ipmitool_bin = None + ipmitool_desc = None + try: + IPMITOOL = config.get('General Configuration', 'IPMITOOL') + if IPMITOOL.lower() in [ "", "0", "false", "none" ]: + IPMITOOL = False + else: + # validate the external command exists, or replace with False + ipmitool_bin = shutil.which(IPMITOOL) + if ipmitool_bin is None: + err = "Unable to find ipmitool in system path: " + IPMITOOL + if CONFIG_TEST: + raise FileNotFoundError(err) + if DEBUG: + sys.stdout.write("\n\nError: " + err + "\n") + IPMITOOL = False + except configparser.NoOptionError as e: + if DEBUG: sys.stdout.write("\n" + str(e) + "\n") + IPMITOOL = False + # if false, use the builtin IPMICFG tool + if IPMITOOL == False: + ipmitool_bin = get_bundled_ipmicfg_binary() + ipmitool_desc = "bundled IPMICFG tool" + else: + ipmitool_desc = "external IPMITOOL" + if not ipmitool_bin: + err = "Unable to find any IPMI helper while trying to find " + ipmitool_desc + if CONFIG_TEST: + raise FileNotFoundError(err) + if DEBUG: + sys.stdout.write("\n\nError: " + err + "\n") + elif not os.path.isfile(ipmitool_bin): + err = "Unable to find " + ipmitool_desc + " at: " + ipmitool_bin + if CONFIG_TEST: + raise FileNotFoundError(err) + if DEBUG: + sys.stdout.write("\n\nError: " + err + "\n") + elif not os.access(ipmitool_bin, os.X_OK): + err = "Unable to execute " + ipmitool_desc + " at: " + ipmitool_bin + if CONFIG_TEST: + raise PermissionError(err) + if DEBUG: + sys.stdout.write("\n\nError: " + err + "\n") + elif DEBUG and IPMITOOL: + sys.stdout.write("\nUsing ipmitool: " + IPMITOOL + "\n") + if DEBUG: sys.stdout.write("done\n") +def get_bundled_ipmicfg_binary(): + return os.path.join(os.path.dirname(__file__), "./ipmitool/", "IPMICFG-Linux.x86") + # Wrapper for making IPMI calls def call_ipmi(params): - IPMICMD = "./IPMICFG-Linux.x86" - IPMICWD = os.path.join(os.path.dirname(__file__), "./ipmitool/") + global IPMITOOL + if IPMITOOL: + # External ipmitool prefers commands without the leading dash + IPMICWD = os.path.dirname(__file__) + IPMICMD = IPMITOOL + if params[0] in ["-raw", "-sdr"]: + params[0] = params[0][1:] + else: + if DEBUG: + sys.stdout.write('Unknown params[0]: ' + params[0] + '\n') + sys.stdout.flush() + err = "Error: Unknown argument in call to external ipmitool: " + params[0] + return [-1, '', err] + else: + # Bundled ipmicfg tool logic + IPMICMD = get_bundled_ipmicfg_binary() + IPMICWD = os.path.dirname(IPMICMD) + IPMICMD = [IPMICMD] + params if DEBUG: sys.stdout.write(' ' + ' '.join(IPMICMD) + '\n') process = Popen(IPMICMD, stdout=PIPE, cwd=IPMICWD) @@ -104,6 +175,127 @@ def calculate_pwm(PEAK_TEMP, MIN_TEMP, MAX_TEMP, MIN_FAN_PWM, MAX_FAN_PWM): elif PWMVAL > MAX_FAN_PWM: PWMVAL = MAX_FAN_PWM # Sanitise output return int(PWMVAL) +def parse_sdr_fields(line): + """parse SDR fields from an IPMI response. + - If necessary, parses string line parameter into a list + - External IPMITOOL has a different 'sdr' output format than IPMICFG, so if necessary, swap SDR line field order + """ + global IPMITOOL + if type(line) is list: + l = line + elif type(line) is str: + if "|" not in line: return None + l = [p.strip() for p in line.rstrip().split("|")] + else: + return None + if len(l) < 3: return None + if IPMITOOL: + # ipmitool format: Name | Value | Status + return [ l[2], l[0], l[1] ] + # ipmicfg format: Status | Name | Value + return l + +def get_celsius_from_field(line): + """Extracts Celsius temperature from an SDR field list. + Returns integer Celsius value or None if not a valid temperature. + """ + if not line or len(line) < 3: return None + val_str = line[2] + + # Standard format: 40C/104F + match = re.search(r'(\d+)C\/', val_str) + if match: + return int(match.group(1)) + + # Alternate format: 40 degrees C or 104 degrees F + match = re.match(r'^(\d+) degrees (C|F)$', val_str) + if match: + val = int(match.group(1)) + if match.group(2) == 'F': + return (val - 32) * 5 // 9 + return val + + return None + +def config_test(): + """Test configuration file for validity. + Return 0 if valid, 1 if invalid. + """ + global DEBUG + global IPMITOOL + global EXIT_ON_FAILURE + global CONFIG_TEST + CONFIG_TEST = True + try: + reload_config() + EXIT_ON_FAILURE = True + # Perform basic logical validation + if ZONE_A_MIN_TEMP >= ZONE_A_MAX_TEMP: + raise ValueError("Zone A: 'Minimum Temperature Degrees' (%d) must be less than 'Maximum Temperature Degrees' (%d)" + % (ZONE_A_MIN_TEMP, ZONE_A_MAX_TEMP)) + if ZONE_B_MIN_TEMP >= ZONE_B_MAX_TEMP: + raise ValueError("Zone B: 'Minimum Temperature Degrees' (%d) must be less than 'Maximum Temperature Degrees' (%d)" + % (ZONE_B_MIN_TEMP, ZONE_B_MAX_TEMP)) + if ZONE_A_MIN_FAN_PWM > ZONE_A_MAX_FAN_PWM: + raise ValueError("Zone A: 'Minimum Temperature Fan PWM' (%d) cannot be greater than 'Maximum Temperature Fan PWM' (%d)" + % (ZONE_A_MIN_FAN_PWM, ZONE_A_MAX_FAN_PWM)) + if ZONE_B_MIN_FAN_PWM > ZONE_B_MAX_FAN_PWM: + raise ValueError("Zone B: 'Minimum Temperature Fan PWM' (%d) cannot be greater than 'Maximum Temperature Fan PWM' (%d)" + % (ZONE_B_MIN_FAN_PWM, ZONE_B_MAX_FAN_PWM)) + + for name, val in [ + ("Zone A Min Fan PWM", ZONE_A_MIN_FAN_PWM), + ("Zone A Max Fan PWM", ZONE_A_MAX_FAN_PWM), + ("Zone B Min Fan PWM", ZONE_B_MIN_FAN_PWM), + ("Zone B Max Fan PWM", ZONE_B_MAX_FAN_PWM), + ]: + if not (0 <= val <= 100): + raise ValueError("%s (%d) is invalid; must be between 0 and 100" % (name, val)) + + # ensure that we can get and parse output from the -sdr call + sensorinfo = call_ipmi(["-sdr"]) + if sensorinfo[0] != 0: + raise Exception("IPMI Communication Failure (Exit Code %d) using %s: %s" % + (sensorinfo[0], IPMITOOL if IPMITOOL else "IPMICFG", str(sensorinfo[1]).strip())) + found_a, found_b, found_valid_temp = False, False, False + example_temp_field = False + for line in sensorinfo[1].split("\n"): + l = parse_sdr_fields(line) + if not l: continue + + temp = get_celsius_from_field(l) + if temp is not None: + found_valid_temp = True + if (ZONE_A_SENSOR_NAME_SEARCH.lower() in l[1].lower()) == ZONE_A_SENSOR_TEST_MATCH: + found_a = True + if (ZONE_B_SENSOR_NAME_SEARCH.lower() in l[1].lower()) == ZONE_B_SENSOR_TEST_MATCH: + found_b = True + elif not example_temp_field: + # If parsing failed, check if it looks like a temp field to provide a helpful hint + if re.match(r'\d+.*(C|F)|(C|F).*\d+', l[2]): + example_temp_field = l[2] + + if not found_valid_temp: + if example_temp_field: + example_temp_field = "Possible temperature field example: " + example_temp_field + else: + example_temp_field = "No potential temperature fields found." + raise ValueError("No temperature data found. This system may use an unexpected data format. " + example_temp_field) + if not found_a: + raise ValueError("No valid temperature sensors found for Zone A matching '%s'" % ZONE_A_SENSOR_NAME_SEARCH) + if not found_b: + raise ValueError("No valid temperature sensors found for Zone B matching '%s'" % ZONE_B_SENSOR_NAME_SEARCH) + + sys.stdout.write("Configuration is valid and sensors detected.\n") + return 0 + except Exception as e: + sys.stderr.write("Configuration error: " + str(e) + "\n") + return 1 + +# Validate configuration if requested +if "--configtest" in sys.argv: + sys.exit(config_test()) + # Main program loop starts here reload_config(); check_if_already_running(); ZONE_A_TEMP_SAMPLES = [ZONE_A_MAX_TEMP, ZONE_A_MAX_TEMP, ZONE_A_MAX_TEMP, ZONE_A_MAX_TEMP, ZONE_A_MAX_TEMP] @@ -136,30 +328,25 @@ def calculate_pwm(PEAK_TEMP, MIN_TEMP, MAX_TEMP, MIN_FAN_PWM, MAX_FAN_PWM): # Process our sensor values and grab the highest for each zone for line in sensorinfo[1].split("\n"): # Parse returned data if we can, otherwise ignore it - if "|" not in line: continue - line = line.rstrip().split("|") - line[0] = line[0].strip() - line[1] = line[1].strip() - line[2] = line[2].strip() + line = parse_sdr_fields(line) + if not line: continue if DEBUG: sys.stdout.write(line[1] + ": " + line[2] + "\n"); sys.stdout.flush() # Check to see if we have a failed fan if ((line[0].lower() == "fail") and ("fan" in line[1].lower())): FAILED_FAN = True - # Only continue past this point of the for-loop if we have a temperature value - if not re.match(r'\d+C\/\d+F', line[2]): continue + temp = get_celsius_from_field(line) + if temp is None: continue # Check to see if this sensor matches Zone A if (ZONE_A_SENSOR_NAME_SEARCH.lower() in line[1].lower()) == ZONE_A_SENSOR_TEST_MATCH: - temp = line[2].split('C/') - if DEBUG: sys.stdout.write("ZONE A SENSOR MATCH: " + line[1] + " " + temp[0] + "'C\n"); sys.stdout.flush() - if int(temp[0]) > PEAK_ZONE_A_TEMP: PEAK_ZONE_A_TEMP = int(temp[0]) + if DEBUG: sys.stdout.write("ZONE A SENSOR MATCH: " + line[1] + " " + str(temp) + "'C\n"); sys.stdout.flush() + if temp > PEAK_ZONE_A_TEMP: PEAK_ZONE_A_TEMP = temp # Check to see if this sensor matches Zone B if (ZONE_B_SENSOR_NAME_SEARCH.lower() in line[1].lower()) == ZONE_B_SENSOR_TEST_MATCH: - temp = line[2].split('C/') - if DEBUG: sys.stdout.write("ZONE B SENSOR MATCH: " + line[1] + " "+ temp[0] + "'C\n"); sys.stdout.flush() - if int(temp[0]) > PEAK_ZONE_B_TEMP: PEAK_ZONE_B_TEMP = int(temp[0]) + if DEBUG: sys.stdout.write("ZONE B SENSOR MATCH: " + line[1] + " "+ str(temp) + "'C\n"); sys.stdout.flush() + if temp > PEAK_ZONE_B_TEMP: PEAK_ZONE_B_TEMP = temp # Average out temp values over the last 5 samples to smooth RPM changes and output our values ZONE_A_TEMP_SAMPLES.append(PEAK_ZONE_A_TEMP); ZONE_A_TEMP_SAMPLES.pop(0) diff --git a/fan-control.service b/fan-control.service new file mode 100644 index 0000000..efa5e83 --- /dev/null +++ b/fan-control.service @@ -0,0 +1,17 @@ +[Unit] +Description=Supermicro Fan Control Service +After=network.target + +[Service] +Type=simple +User=root +WorkingDirectory=/opt/Supermicro-Fan-Control +Environment=PYTHONUNBUFFERED=1 +# Validate configuration and dependencies before starting +ExecStartPre=/usr/bin/python3 /opt/Supermicro-Fan-Control/fan-control.py --configtest +ExecStart=/usr/bin/python3 /opt/Supermicro-Fan-Control/fan-control.py +Restart=always +RestartSec=10 + +[Install] +WantedBy=multi-user.target \ No newline at end of file