17 #ifndef sysMonitor_hpp
18 #define sysMonitor_hpp
20 #include "../../libMagAOX/libMagAOX.hpp"
21 #include "../../magaox_git_version.h"
130 const std::string& line
140 const std::string& line
150 const std::string& line
195 std::vector<float>& hdd_temps
207 const std::string& line
371 config.add(
"sysType",
"",
"sysType", argType::Required,
"",
"sysType",
false,
"string",
"The system type, Intel (default) or AMD");
372 config.add(
"diskNames",
"",
"diskNames", argType::Required,
"",
"diskNames",
false,
"vector<string>",
"The names (/dev/sdX) of the drives to monitor");
373 config.add(
"warningCoreTemp",
"",
"warningCoreTemp", argType::Required,
"",
"warningCoreTemp",
false,
"int",
"The warning temperature for CPU cores.");
374 config.add(
"criticalCoreTemp",
"",
"criticalCoreTemp", argType::Required,
"",
"criticalCoreTemp",
false,
"int",
"The critical temperature for CPU cores.");
375 config.add(
"warningDiskTemp",
"",
"warningDiskTemp", argType::Required,
"",
"warningDiskTemp",
false,
"int",
"The warning temperature for the disk.");
376 config.add(
"criticalDiskTemp",
"",
"criticalDiskTemp", argType::Required,
"",
"criticalDiskTemp",
false,
"int",
"The critical temperature for disk.");
393 config(st,
"sysType");
398 else if (st ==
"AMD")
404 log<software_critical>({ __FILE__, __LINE__,
"Invalid system type specified." });
432 for (
unsigned int i = 0; i <
m_diskTemps.size(); i++)
470 log<software_critical>({ __FILE__, __LINE__ });
494 else if (rvCPUTemp == 2)
503 log<software_error>({ __FILE__, __LINE__,
"Could not log values for CPU core temps." });
515 log<software_error>({ __FILE__, __LINE__,
"Could not log values for CPU core loads." });
534 else if (rvDiskTemp == 2)
543 log<software_error>({ __FILE__, __LINE__,
"Could not log values for drive temps." });
550 if (rvDiskUsage >= 0 && rvRamUsage >= 0)
556 log<software_error>({ __FILE__, __LINE__,
"Could not log values for usage." });
565 log<software_error>({ __FILE__, __LINE__,
"Could not get chronyd status." });
570 log<software_error>({ __FILE__, __LINE__ });
597 std::vector<std::string> commandList{
"sensors" };
599 std::vector<std::string> commandOutput, commandError;
603 if (commandOutput.size() < 1)
return log<
software_error, -1>({ __FILE__, __LINE__ });
607 if (commandError.size() > 0)
609 for (
size_t n = 0; n < commandError.size(); ++n)
611 log<software_error>({ __FILE__, __LINE__,
"sensors stderr: " + commandError[n] });
616 for (
size_t n = 0; n < commandOutput.size(); ++n)
621 temps.push_back(tempVal);
629 const std::string& line
642 log<software_error>({ __FILE__, __LINE__,
"invalid system type" });
650 const std::string& line
653 if (line.length() <= 1)
659 std::string str = line.substr(0, 5);
660 if (str.compare(
"Core ") == 0)
662 size_t st = line.find(
':', 0);
663 if (st == std::string::npos)
665 log<software_error>({ __FILE__, __LINE__,
"Invalid read occured when parsing CPU temperatures." });
672 size_t ed = line.find(
'C', st);
673 if (ed == std::string::npos)
675 log<software_error>({ __FILE__, __LINE__,
"Invalid read occured when parsing CPU temperatures." });
682 std::string temp_str = line.substr(st, ed - st);
686 temp = std::stof(temp_str);
688 catch (
const std::invalid_argument& e)
690 log<software_error>({ __FILE__, __LINE__,
"Invalid read occured when parsing CPU temperatures." });
697 std::istringstream iss(line);
698 std::vector<std::string> tokens{ std::istream_iterator<std::string>{iss},std::istream_iterator<std::string>{} };
701 tokens.at(5).pop_back();
702 tokens.at(5).pop_back();
703 tokens.at(5).pop_back();
704 tokens.at(5).pop_back();
705 tokens.at(5).erase(0, 1);
708 catch (
const std::invalid_argument& e)
710 log<software_error>({ __FILE__, __LINE__,
"Invalid read occured when parsing warning CPU temperatures." });
716 std::istringstream iss(line);
717 std::vector<std::string> tokens{ std::istream_iterator<std::string>{iss},std::istream_iterator<std::string>{} };
720 tokens.at(8).pop_back();
721 tokens.at(8).pop_back();
722 tokens.at(8).pop_back();
723 tokens.at(8).pop_back();
724 tokens.at(8).erase(0, 1);
727 catch (
const std::invalid_argument& e)
729 log<software_error>({ __FILE__, __LINE__,
"Invalid read occured when parsing critical CPU temperatures." });
745 const std::string& line
748 if (line.length() <= 1)
754 std::string str = line.substr(0, 6);
755 if (str.compare(
"Tctl: ") == 0)
757 size_t ed = line.find(
'C', 0);
758 if (ed == std::string::npos)
760 log<software_error>({ __FILE__, __LINE__,
"Invalid read occured when parsing CPU temperatures." });
765 str = line.substr(7, ((ed - 1) - 7));
769 temp = std::stof(str);
773 log<software_error>({ __FILE__, __LINE__,
"Invalid read occured when parsing CPU temperatures." });
788 int coreNum = 0, rv = 0;
794 std::cout <<
"Warning temperature for Core " << coreNum << std::endl;
802 std::cout <<
"Critical temperature for Core " << coreNum << std::endl;
812 std::vector<std::string> commandList{
"mpstat",
"-P",
"ALL",
"1",
"1" };
813 std::vector<std::string> commandOutput, commandError;
817 if (commandOutput.size() < 1)
return log<
software_error, -1>({ __FILE__, __LINE__ });
821 if (commandError.size() > 0)
823 for (
size_t n = 0; n < commandError.size(); ++n)
825 log<software_error>({ __FILE__, __LINE__,
"mpstat stderr: " + commandError[n] });
831 if (commandOutput.size() < 5)
833 return log<
software_error, -1>({ __FILE__, __LINE__,
"not enough lines returned by mpstat" });
836 for (
auto line = commandOutput.begin() + 4; line != commandOutput.end(); line++)
841 loads.push_back(loadVal);
849 const std::string & line
852 if (line.length() <= 1)
854 log<software_error>({ __FILE__, __LINE__,
"zero length line in parseCPULoads." });
857 std::istringstream iss(line);
859 std::vector<std::string> tokens(std::istream_iterator<std::string>{iss}, std::istream_iterator<std::string>{});
860 if (tokens.size() < 8)
return 1;
865 cpu_load = 100.0 - std::stof(tokens.at(tokens.size() - 1));
867 catch (
const std::invalid_argument& e)
869 log<software_error>({ __FILE__, __LINE__,
"Invalid read occured when parsing CPU core usage." });
872 catch (
const std::out_of_range& e)
874 log<software_error>({ __FILE__, __LINE__,
"Out of range exception in parseCPULoads." });
883 std::vector<float>& hdd_temps
886 std::vector<std::string> commandList{
"hddtemp" };
892 std::vector<std::string> commandOutput, commandError;
896 if (commandOutput.size() < 1)
return log<
software_error, -1>({ __FILE__, __LINE__ });
900 if (commandError.size() > 0)
902 for (
size_t n = 0; n < commandError.size(); ++n)
904 log<software_error>({ __FILE__, __LINE__,
"hddtemp stderr: " + commandError[n] });
909 for (
auto line : commandOutput)
911 std::string driveName;
915 hdd_names.push_back(driveName);
916 hdd_temps.push_back(tempVal);
927 const std::string& line
931 if (line.length() <= 6)
938 size_t sp = line.find(
':', 0);
939 driveName = line.substr(5, sp - 5);
941 std::istringstream iss(line);
942 std::vector<std::string> tokens{ std::istream_iterator<std::string>{iss},std::istream_iterator<std::string>{} };
944 for (
auto temp_s : tokens)
948 if (isdigit(temp_s.at(0)) && temp_s.substr(temp_s.length() - 1, 1) ==
"C")
954 tempValue = std::stof(temp_s);
956 catch (
const std::invalid_argument& e)
958 log<software_error>({ __FILE__, __LINE__,
"Invalid read occured when parsing drive temperatures." });
963 hdd_temp = tempValue;
975 catch (
const std::out_of_range& e)
996 std::cout <<
"Warning temperature for Disk" << std::endl;
1004 std::cout <<
"Critical temperature for Disk " << std::endl;
1013 std::vector<std::string> commandList{
"df" };
1015 std::vector<std::string> commandOutput, commandError;
1019 if (commandOutput.size() < 1)
return log<
software_error, -1>({ __FILE__, __LINE__ });
1023 if (commandError.size() > 0)
1025 for (
size_t n = 0; n < commandError.size(); ++n)
1027 log<software_error>({ __FILE__, __LINE__,
"df stderr: " + commandError[n] });
1032 for (
auto line : commandOutput)
1034 int rvDiskUsage =
parseDiskUsage(line, rootUsage, dataUsage, bootUsage);
1035 if (rvDiskUsage == 0)
1045 if (line.length() <= 1)
1050 std::istringstream iss(line);
1051 std::vector<std::string> tokens{ std::istream_iterator<std::string>{iss},std::istream_iterator<std::string>{} };
1054 if (tokens.at(5).compare(
"/") == 0)
1056 tokens.at(4).pop_back();
1059 rootUsage = std::stof(tokens.at(4)) / 100;
1062 catch (
const std::invalid_argument& e)
1064 log<software_error>({ __FILE__, __LINE__,
"Invalid read occured when parsing drive usage." });
1068 else if (tokens.at(5).compare(
"/data") == 0)
1070 tokens.at(4).pop_back();
1073 dataUsage = std::stof(tokens.at(4)) / 100;
1076 catch (
const std::invalid_argument& e)
1078 log<software_error>({ __FILE__, __LINE__,
"Invalid read occured when parsing drive usage." });
1082 else if (tokens.at(5).compare(
"/boot") == 0)
1084 tokens.at(4).pop_back();
1087 bootUsage = std::stof(tokens.at(4)) / 100;
1090 catch (
const std::invalid_argument& e)
1092 log<software_error>({ __FILE__, __LINE__,
"Invalid read occured when parsing drive usage." });
1097 catch (
const std::out_of_range& e) {
1105 std::vector<std::string> commandList{
"free",
"-m" };
1107 std::vector<std::string> commandOutput, commandError;
1111 if (commandOutput.size() < 1)
return log<
software_error, -1>({ __FILE__, __LINE__ });
1115 if (commandError.size() > 0)
1117 for (
size_t n = 0; n < commandError.size(); ++n)
1119 log<software_error>({ __FILE__, __LINE__,
"free stderr: " + commandError[n] });
1123 for (
auto line : commandOutput)
1135 if (line.length() <= 1)
1139 std::istringstream iss(line);
1140 std::vector<std::string> tokens{ std::istream_iterator<std::string>{iss},std::istream_iterator<std::string>{} };
1143 if (tokens.at(0).compare(
"Mem:") != 0)
1147 ramUsage = std::stof(tokens.at(2)) / std::stof(tokens.at(1));
1148 if (ramUsage > 1 || ramUsage == 0)
1155 catch (
const std::invalid_argument& e)
1157 log<software_error>({ __FILE__, __LINE__,
"Invalid read occured when parsing RAM usage." });
1160 catch (
const std::out_of_range& e) {
1167 std::vector<std::string> commandList{
"chronyc",
"-c",
"tracking" };
1169 std::vector<std::string> commandOutput, commandError;
1173 if (commandOutput.size() < 1)
return log<
software_error, -1>({ __FILE__, __LINE__ });
1177 if (commandError.size() > 0)
1179 for (
size_t n = 0; n < commandError.size(); ++n)
1181 log<software_error>({ __FILE__, __LINE__,
"chronyc stderr: " + commandError[n] });
1185 if (commandOutput.size() < 1)
1187 log<software_error>({ __FILE__,__LINE__,
"no response from chronyc -c" });
1191 std::vector<std::string> results;
1192 mx::ioutils::parseStringVector(results, commandOutput[0],
',');
1194 if (results.size() < 1)
1196 log<software_error>({ __FILE__,__LINE__,
"wrong number of fields from chronyc -c" });
1200 static std::string last_mac;
1201 static std::string last_ip;
1242 float min, max, mean;
1257 updateIfChanged<float>(
m_indiP_core_loads, {
"min",
"max",
"mean" }, { min,max,mean });
1274 updateIfChanged<float>(
m_indiP_core_temps, {
"min",
"max",
"mean" }, { min,max,mean });
1325 for (
size_t cpu = 0; cpu <
m_coreLoads.size(); ++cpu)
1327 std::string cpuFile =
"/sys/devices/system/cpu/cpu";
1328 cpuFile += std::to_string(cpu);
1329 cpuFile +=
"/cpufreq/scaling_governor";
1330 int wfd = open(cpuFile.c_str(), O_WRONLY);
1331 ssize_t perfsz =
sizeof(
"performance");
1332 ssize_t wrtsz = write(wfd,
"performance", perfsz);
1335 log<software_error>({ __FILE__,__LINE__,
"error setting performance governor for CPU " + std::to_string(cpu) });
1341 fd = open(
"/dev/cpu_dma_latency", O_WRONLY);
1343 if (fd <= 0) log<software_error>({ __FILE__,__LINE__,
"error opening cpu_dma_latency" });
1347 if (write(fd, &l,
sizeof(l)) !=
sizeof(l))
1349 log<software_error>({ __FILE__,__LINE__,
"error writing to cpu_dma_latency" });
1369 for (
size_t cpu = 0; cpu <
m_coreLoads.size(); ++cpu)
1371 std::string cpuFile =
"/sys/devices/system/cpu/cpu";
1372 cpuFile += std::to_string(cpu);
1373 cpuFile +=
"/cpufreq/scaling_governor";
1374 int wfd = open(cpuFile.c_str(), O_WRONLY);
1375 ssize_t pwrsz =
sizeof(
"powersave");
1376 ssize_t wrtsz = write(wfd,
"powersave", pwrsz);
1379 log<software_error>({ __FILE__,__LINE__,
"error setting powersave governor for CPU " + std::to_string(cpu) });
1399 if (
ipRecv.getName() != m_indiP_setlat.getName())
1401 log<software_error>({ __FILE__,__LINE__,
"wrong INDI property received." });
1405 if (!
ipRecv.find(
"toggle"))
return 0;
1407 if (
ipRecv[
"toggle"].getSwitchState() == pcf::IndiElement::Off)
1409 m_setLatency =
false;
1412 if (
ipRecv[
"toggle"].getSwitchState() == pcf::IndiElement::On)
1414 m_setLatency =
true;
1463 static std::vector<float> old_coreLoads;
1474 if (
m_coreLoads[n] != old_coreLoads[n]) write =
true;
1492 static std::vector<float> old_coreTemps;
1503 if (
m_coreTemps[n] != old_coreTemps[n]) write =
true;
1520 static std::vector<std::string> old_diskNames;
1521 static std::vector<float> old_diskTemps;
1551 static float old_ramUsage = 0;
1552 static float old_bootUsage = 0;
1553 static float old_rootUsage = 0;
1554 static float old_dataUsage = 0;
1571 static std::string old_chronySourceMac;
1572 static std::string old_chronySourceIP;
1573 static std::string old_chronySynch;
1574 static std::string old_chronyLeap;
1592 static double old_chronySystemTime = 1e50;
1593 static double old_chronyLastOffset = 0;
1594 static double old_chronyRMSOffset = 0;
1595 static double old_chronyFreq = 0;
1596 static double old_chronyResidFreq = 0;
1597 static double old_chronySkew = 0;
1598 static double old_chronyRootDelay = 0;
1599 static double old_chronyRootDispersion = 0;
1600 static double old_chronyUpdateInt = 0;
Internal class to manage setuid privilege escalation with RAII.
The base-class for MagAO-X applications.
void updateIfChanged(pcf::IndiProperty &p, const std::string &el, const T &newVal, pcf::IndiProperty::PropertyStateType ipState=pcf::IndiProperty::Ok)
Update an INDI property element value if it has changed.
stateCodes::stateCodeT state()
Get the current state code.
int registerIndiPropertyNew(pcf::IndiProperty &prop, int(*)(void *, const pcf::IndiProperty &))
Register an INDI property which is exposed for others to request a New Property for.
int createStandardIndiToggleSw(pcf::IndiProperty &prop, const std::string &name, const std::string &label="", const std::string &group="")
Create a standard R/W INDI switch with a single toggle element.
int m_shutdown
Flag to signal it's time to shutdown. When not 0, the main loop exits.
void updateSwitchIfChanged(pcf::IndiProperty &p, const std::string &el, const pcf::IndiElement::SwitchStateType &newVal, pcf::IndiProperty::PropertyStateType ipState=pcf::IndiProperty::Ok)
Update an INDI switch element value if it has changed.
static int log(const typename logT::messageT &msg, logPrioT level=logPrio::LOG_DEFAULT)
Make a log entry.
int threadStart(std::thread &thrd, bool &thrdInit, pid_t &tpid, pcf::IndiProperty &thProp, int thrdPrio, const std::string &cpuset, const std::string &thrdName, thisPtr *thrdThis, Function &&thrdStart)
Start a thread, using this class's privileges to set priority, etc.
int m_criticalDiskTemp
User defined critical temperature for drives.
std::string m_chronySourceMac
pcf::IndiProperty m_indiP_core_temps
Indi variable for reporting CPU core temperature(s)
int findRamUsage(float &)
Finds current RAM usage.
int findDiskUsage(float &, float &, float &)
Finds usages of space for following directory paths: /; /data; /boot.
pcf::IndiProperty m_indiP_chronyStatus
void setlatThreadExec()
Execute the frame grabber main loop.
double m_chronyRootDispersion
float m_bootUsage
Disk usage in /boot path as a value out of 100.
int parseDiskTemperature(std::string &driveName, float &temp, const std::string &line)
Parses string from system call to find drive temperatures.
int recordChronyStats(bool force=false)
float m_rootUsage
Disk usage in root path as a value out of 100.
int recordDriveTemps(bool force=false)
int recordTelem(const telem_coreloads *)
int parseCPUTemperaturesAMD(float &temp, const std::string &line)
Parses string from system call to find CPU temperatures on an AMD system.
int recordChronyStatus(bool force=false)
std::vector< std::string > m_diskNames
vector of names of the hard disks returned by hdd_temp
virtual void setupConfig()
Setup the user-defined warning and critical values for core and drive temperatures.
int parseCPUTemperatures(float &temp, const std::string &line)
Parses string from system call to find CPU temperatures.
~sysMonitor() noexcept
D'tor, declared and defined for noexcept.
virtual int appLogic()
Implementation of reading and logging each of the measured statistics.
int parseCPULoads(float &, const std::string &)
Parses string from system call to find CPU usage loads.
std::thread m_setlatThread
A separate thread for the actual setting of low latency.
pcf::IndiProperty m_setlatThreadProp
The property to hold the setlat thread details.
int m_warningDiskTemp
User defined warning temperature for drives.
pid_t m_setlatThreadID
Set latency thread ID.
pcf::IndiProperty m_indiP_setlat
std::vector< float > m_coreLoads
List of current core load(s)
int findCPUTemperatures(std::vector< float > &)
Finds all CPU core temperatures.
std::vector< float > m_coreTemps
List of current core temperature(s)
std::string m_chronySynch
int recordCoreLoads(bool force=false)
int updateVals()
Updates Indi property values of all system statistics.
double m_chronyLastOffset
virtual void loadConfig()
Load the warning and critical temperature values for core and drive temperatures.
int findDiskTemperature(std::vector< std::string > &hdd_names, std::vector< float > &hdd_temps)
Finds all drive temperatures.
INDI_NEWCALLBACK_DECL(sysMonitor, m_indiP_setlat)
int m_criticalCoreTemp
User defined critical temperature for CPU cores.
virtual int appStartup()
Registers all new Indi properties for each of the reported values to publish.
bool m_setlatThreadInit
Synchronizer to ensure set lat thread initializes before doing dangerous things.
pcf::IndiProperty m_indiP_drive_temps
Indi variable for reporting drive temperature(s)
int recordCoreTemps(bool force=false)
static void setlatThreadStart(sysMonitor *s)
Thread starter, called by threadStart on thread construction. Calls setlatThreadExec.
int parseCPUTemperaturesIntel(float &temp, const std::string &line)
Parses string from system call to find CPU temperatures on an Intel system.
int m_warningCoreTemp
User defined warning temperature for CPU cores.
float m_dataUsage
Disk usage in /data path as a value out of 100.
int criticalCoreTemperature(std::vector< float > &)
Checks if any core temperatures are warning or critical levels.
virtual int appShutdown()
Do any needed shutdown tasks; currently nothing in this app.
std::vector< std::string > m_diskNameList
vector of names of the hard disks to monitor
std::vector< float > m_diskTemps
vector of current disk temperature(s)
pcf::IndiProperty m_indiP_usage
Indi variable for reporting drive usage of all paths.
int criticalDiskTemperature(std::vector< float > &)
Checks if any drive temperatures are warning or critical levels.
float m_ramUsage
RAM usage as a decimal value between 0 and 1.
int m_setlatThreadPrio
Priority of the set latency thread, should normally be > 00.
std::string m_chronySourceIP
pcf::IndiProperty m_indiP_core_loads
Indi variable for reporting CPU core loads.
sysMonitor()
Default c'tor.
int findChronyStatus()
Finds current chronyd status.
int parseDiskUsage(std::string, float &, float &, float &)
Parses string from system call to find drive usage space.
pcf::IndiProperty m_indiP_chronyStats
double m_chronySystemTime
int findCPULoads(std::vector< float > &)
Finds all CPU core usage loads.
int parseRamUsage(std::string, float &)
Parses string from system call to find RAM usage.
int recordUsage(bool force=false)
#define REG_INDI_NEWPROP_NOCB(prop, propName, type)
Register a NEW INDI property with the class, with no callback.
#define INDI_NEWCALLBACK(prop)
Get the name of the static callback wrapper for a new property.
@ READY
The device is ready for operation, but is not operating.
int runCommand(std::vector< std::string > &commandOutput, std::vector< std::string > &commandStderr, std::vector< std::string > &commandList)
Runs a command (with parameters) passed in using fork/exec.
INDI_VALIDATE_CALLBACK_PROPS(function, ipRecv)
const pcf::IndiProperty & ipRecv
INDI_NEWCALLBACK_DEFN(acesxeCtrl, m_indiP_windspeed)(const pcf
constexpr static logPrioT LOG_ALERT
This should only be used if some action is required by operators to keep the system safe.
constexpr static logPrioT LOG_WARNING
A condition has occurred which may become an error, but the process continues.
constexpr static logPrioT LOG_NOTICE
A normal but significant condition.
A device base class which saves telemetry.
int appShutdown()
Perform telemeter application shutdown.
int loadConfig(appConfigurator &config)
Load the device section from an application configurator.
int appLogic()
Perform telemeter application logic.
int setupConfig(appConfigurator &config)
Setup an application configurator for the device section.
int checkRecordTimes(const telT &tel, telTs... tels)
Check the time of the last record for each telemetry type and make an entry if needed.
Log entry recording the statistics from chrony.
Log entry recording the status of chrony.
Log entry recording CPU loads.
Log entry recording CPU temperatures.
Log entry recording hdd temperatures.
Log entry recording hdd temperatures.