17 #ifndef sysMonitor_hpp
18 #define sysMonitor_hpp
20 #include "../../libMagAOX/libMagAOX.hpp"
21 #include "../../magaox_git_version.h"
165 std::vector<float> & hdd_temps
177 const std::string & line
341 config.add(
"diskNames",
"",
"diskNames", argType::Required,
"",
"diskNames",
false,
"vector<string>",
"The names (/dev/sdX) of the drives to monitor");
342 config.add(
"warningCoreTemp",
"",
"warningCoreTemp", argType::Required,
"",
"warningCoreTemp",
false,
"int",
"The warning temperature for CPU cores.");
343 config.add(
"criticalCoreTemp",
"",
"criticalCoreTemp", argType::Required,
"",
"criticalCoreTemp",
false,
"int",
"The critical temperature for CPU cores.");
344 config.add(
"warningDiskTemp",
"",
"warningDiskTemp", argType::Required,
"",
"warningDiskTemp",
false,
"int",
"The warning temperature for the disk.");
345 config.add(
"criticalDiskTemp",
"",
"criticalDiskTemp", argType::Required,
"",
"criticalDiskTemp",
false,
"int",
"The critical temperature for disk.");
380 for (
unsigned int i = 0; i <
m_diskTemps.size(); i++)
418 log<software_critical>({__FILE__, __LINE__});
442 else if (rvCPUTemp == 2)
451 log<software_error>({__FILE__, __LINE__,
"Could not log values for CPU core temps."});
463 log<software_error>({__FILE__, __LINE__,
"Could not log values for CPU core loads."});
482 else if (rvDiskTemp == 2)
493 log<software_error>({__FILE__, __LINE__,
"Could not log values for drive temps."});
500 if (rvDiskUsage >= 0 && rvRamUsage >= 0)
506 log<software_error>({__FILE__, __LINE__,
"Could not log values for usage."});
515 log<software_error>({__FILE__, __LINE__,
"Could not get chronyd status."});
520 log<software_error>({__FILE__, __LINE__});
547 std::vector<std::string> commandList{
"sensors"};
549 std::vector<std::string> commandOutput, commandError;
553 if(commandOutput.size() < 1)
return log<
software_error,-1>({__FILE__, __LINE__});
557 if(commandError.size() > 0)
559 for(
size_t n=0; n< commandError.size(); ++n)
561 log<software_error>({__FILE__, __LINE__,
"sensors stderr: " + commandError[n]});
566 for(
size_t n=0; n < commandOutput.size(); ++n)
571 temps.push_back(tempVal);
580 if (line.length() <= 1)
586 std::string str = line.substr(0, 5);
587 if (str.compare(
"Core ") == 0)
589 size_t st = line.find(
':',0);
590 if(st == std::string::npos)
592 log<software_error>({__FILE__, __LINE__,
"Invalid read occured when parsing CPU temperatures."});
599 size_t ed = line.find(
'C', st);
600 if(ed == std::string::npos)
602 log<software_error>({__FILE__, __LINE__,
"Invalid read occured when parsing CPU temperatures."});
609 std::string temp_str = line.substr(st, ed-st);
616 temp = std::stof (temp_str);
618 catch (
const std::invalid_argument& e)
620 log<software_error>({__FILE__, __LINE__,
"Invalid read occured when parsing CPU temperatures."});
629 std::istringstream iss(line);
630 std::vector<std::string> tokens{std::istream_iterator<std::string>{iss},std::istream_iterator<std::string>{}};
633 tokens.at(5).pop_back();
634 tokens.at(5).pop_back();
635 tokens.at(5).pop_back();
636 tokens.at(5).pop_back();
637 tokens.at(5).erase(0,1);
640 catch (
const std::invalid_argument& e)
642 log<software_error>({__FILE__, __LINE__,
"Invalid read occured when parsing warning CPU temperatures."});
648 std::istringstream iss(line);
649 std::vector<std::string> tokens{std::istream_iterator<std::string>{iss},std::istream_iterator<std::string>{}};
652 tokens.at(8).pop_back();
653 tokens.at(8).pop_back();
654 tokens.at(8).pop_back();
655 tokens.at(8).pop_back();
656 tokens.at(8).erase(0,1);
659 catch (
const std::invalid_argument& e)
661 log<software_error>({__FILE__, __LINE__,
"Invalid read occured when parsing critical CPU temperatures."});
678 int coreNum = 0, rv = 0;
684 std::cout <<
"Warning temperature for Core " << coreNum << std::endl;
692 std::cout <<
"Critical temperature for Core " << coreNum << std::endl;
702 std::vector<std::string> commandList{
"mpstat",
"-P",
"ALL",
"1",
"1"};
703 std::vector<std::string> commandOutput, commandError;
707 if(commandOutput.size() < 1)
return log<
software_error,-1>({__FILE__, __LINE__});
711 if(commandError.size() > 0)
713 for(
size_t n=0; n< commandError.size(); ++n)
715 log<software_error>({__FILE__, __LINE__,
"mpstat stderr: " + commandError[n]});
721 if (commandOutput.size() < 5)
723 return log<
software_error,-1>({__FILE__, __LINE__,
"not enough lines returned by mpstat"});
726 for (
auto line = commandOutput.begin()+4; line != commandOutput.end(); line++)
731 loads.push_back(loadVal);
740 if (line.length() <= 1)
742 log<software_error>({__FILE__, __LINE__,
"zero lenght line in parseCPULoads."});
745 std::istringstream iss(line);
746 std::vector<std::string> tokens{std::istream_iterator<std::string>{iss},std::istream_iterator<std::string>{}};
747 if(tokens.size() < 8)
return 1;
751 cpu_load = 100.0 - std::stof(tokens.at(tokens.size()-1));
753 catch (
const std::invalid_argument& e)
755 log<software_error>({__FILE__, __LINE__,
"Invalid read occured when parsing CPU core usage."});
758 catch (
const std::out_of_range& e)
760 log<software_error>({__FILE__, __LINE__,
"Out of range exception in parseCPULoads."});
769 std::vector<float>& hdd_temps
813 const std::string & line
817 if (line.length() <= 6)
824 size_t sp = line.find(
':',0);
825 driveName = line.substr(5, sp-5);
827 std::istringstream iss(line);
828 std::vector<std::string> tokens{std::istream_iterator<std::string>{iss},std::istream_iterator<std::string>{}};
830 for(
auto temp_s: tokens)
834 if (isdigit(temp_s.at(0)) && temp_s.substr(temp_s.length() - 1, 1) ==
"C")
840 tempValue = std::stof (temp_s);
842 catch (
const std::invalid_argument& e)
844 log<software_error>({__FILE__, __LINE__,
"Invalid read occured when parsing drive temperatures."});
849 hdd_temp = tempValue;
861 catch (
const std::out_of_range& e)
882 std::cout <<
"Warning temperature for Disk" << std::endl;
890 std::cout <<
"Critical temperature for Disk " << std::endl;
899 std::vector<std::string> commandList{
"df"};
901 std::vector<std::string> commandOutput, commandError;
905 if(commandOutput.size() < 1)
return log<
software_error,-1>({__FILE__, __LINE__});
909 if(commandError.size() > 0)
911 for(
size_t n=0; n< commandError.size(); ++n)
913 log<software_error>({__FILE__, __LINE__,
"df stderr: " + commandError[n]});
918 for (
auto line: commandOutput)
920 int rvDiskUsage =
parseDiskUsage(line, rootUsage, dataUsage, bootUsage);
921 if (rvDiskUsage == 0)
931 if (line.length() <= 1)
936 std::istringstream iss(line);
937 std::vector<std::string> tokens{std::istream_iterator<std::string>{iss},std::istream_iterator<std::string>{}};
940 if (tokens.at(5).compare(
"/") == 0)
942 tokens.at(4).pop_back();
945 rootUsage = std::stof (tokens.at(4))/100;
948 catch (
const std::invalid_argument& e)
950 log<software_error>({__FILE__, __LINE__,
"Invalid read occured when parsing drive usage."});
954 else if (tokens.at(5).compare(
"/data") == 0)
956 tokens.at(4).pop_back();
959 dataUsage = std::stof (tokens.at(4))/100;
962 catch (
const std::invalid_argument& e)
964 log<software_error>({__FILE__, __LINE__,
"Invalid read occured when parsing drive usage."});
968 else if (tokens.at(5).compare(
"/boot") == 0)
970 tokens.at(4).pop_back();
973 bootUsage = std::stof (tokens.at(4))/100;
976 catch (
const std::invalid_argument& e)
978 log<software_error>({__FILE__, __LINE__,
"Invalid read occured when parsing drive usage."});
983 catch (
const std::out_of_range& e) {
991 std::vector<std::string> commandList{
"free",
"-m"};
993 std::vector<std::string> commandOutput, commandError;
997 if(commandOutput.size() < 1)
return log<
software_error,-1>({__FILE__, __LINE__});
1001 if(commandError.size() > 0)
1003 for(
size_t n=0; n< commandError.size(); ++n)
1005 log<software_error>({__FILE__, __LINE__,
"free stderr: " + commandError[n]});
1009 for (
auto line: commandOutput)
1021 if (line.length() <= 1)
1025 std::istringstream iss(line);
1026 std::vector<std::string> tokens{std::istream_iterator<std::string>{iss},std::istream_iterator<std::string>{}};
1029 if (tokens.at(0).compare(
"Mem:") != 0)
1033 ramUsage = std::stof(tokens.at(2))/std::stof(tokens.at(1));
1034 if (ramUsage > 1 || ramUsage == 0)
1041 catch (
const std::invalid_argument& e)
1043 log<software_error>({__FILE__, __LINE__,
"Invalid read occured when parsing RAM usage."});
1046 catch (
const std::out_of_range& e) {
1053 std::vector<std::string> commandList{
"chronyc",
"-c",
"tracking"};
1055 std::vector<std::string> commandOutput, commandError;
1059 if(commandOutput.size() < 1)
return log<
software_error,-1>({__FILE__, __LINE__});
1063 if(commandError.size() > 0)
1065 for(
size_t n=0; n< commandError.size(); ++n)
1067 log<software_error>({__FILE__, __LINE__,
"chronyc stderr: " + commandError[n]});
1071 if(commandOutput.size() < 1)
1073 log<software_error>({__FILE__,__LINE__,
"no response from chronyc -c"});
1077 std::vector<std::string> results;
1078 mx::ioutils::parseStringVector(results, commandOutput[0],
',');
1080 if(results.size() < 1)
1082 log<software_error>({__FILE__,__LINE__,
"wrong number of fields from chronyc -c"});
1086 static std::string last_mac;
1087 static std::string last_ip;
1128 float min, max, mean;
1211 for(
size_t cpu =0; cpu <
m_coreLoads.size(); ++cpu)
1213 std::string cpuFile =
"/sys/devices/system/cpu/cpu";
1214 cpuFile += std::to_string(cpu);
1215 cpuFile +=
"/cpufreq/scaling_governor";
1216 int wfd = open( cpuFile.c_str(), O_WRONLY);
1217 write(wfd,
"performance",
sizeof(
"performance"));
1222 fd = open(
"/dev/cpu_dma_latency", O_WRONLY);
1224 if(fd <=0) log<software_error>({__FILE__,__LINE__,
"error opening cpu_dma_latency"});
1228 if (write(fd, &l,
sizeof(l)) !=
sizeof(l))
1230 log<software_error>({__FILE__,__LINE__,
"error writing to cpu_dma_latency"});
1250 for(
size_t cpu =0; cpu <
m_coreLoads.size(); ++cpu)
1252 std::string cpuFile =
"/sys/devices/system/cpu/cpu";
1253 cpuFile += std::to_string(cpu);
1254 cpuFile +=
"/cpufreq/scaling_governor";
1255 int wfd = open( cpuFile.c_str(), O_WRONLY);
1256 write(wfd,
"powersave",
sizeof(
"powersave"));
1272 if(
ipRecv.getName() != m_indiP_setlat.getName())
1274 log<software_error>({__FILE__,__LINE__,
"wrong INDI property received."});
1278 if(!
ipRecv.find(
"toggle"))
return 0;
1280 if(
ipRecv[
"toggle"].getSwitchState() == pcf::IndiElement::Off)
1282 m_setLatency =
false;
1285 if(
ipRecv[
"toggle"].getSwitchState() == pcf::IndiElement::On)
1287 m_setLatency =
true;
1330 static std::vector<float> old_coreLoads;
1341 if(
m_coreLoads[n] != old_coreLoads[n]) write =
true;
1359 static std::vector<float> old_coreTemps;
1370 if(
m_coreTemps[n] != old_coreTemps[n]) write =
true;
1387 static std::vector<std::string> old_diskNames;
1388 static std::vector<float> old_diskTemps;
1418 static float old_ramUsage = 0;
1419 static float old_bootUsage = 0;
1420 static float old_rootUsage = 0;
1421 static float old_dataUsage = 0;
1438 static std::string old_chronySourceMac;
1439 static std::string old_chronySourceIP;
1440 static std::string old_chronySynch;
1441 static std::string old_chronyLeap;
1459 double old_chronySystemTime = 1e50;
1460 double old_chronyLastOffset = 0;
1461 double old_chronyRMSOffset = 0;
1462 double old_chronyFreq = 0;
1463 double old_chronyResidFreq = 0;
1464 double old_chronySkew = 0;
1465 double old_chronyRootDelay = 0;
1466 double old_chronyRootDispersion = 0;
1467 double old_chronyUpdateInt = 0;
Internal class to manage setuid privilege escalation with RAII.
The base-class for MagAO-X applications.
void updateIfChanged(pcf::IndiProperty &p, const std::string &el, const T &newVal, pcf::IndiProperty::PropertyStateType ipState=pcf::IndiProperty::Ok)
Update an INDI property element value if it has changed.
stateCodes::stateCodeT state()
Get the current state code.
int registerIndiPropertyNew(pcf::IndiProperty &prop, int(*)(void *, const pcf::IndiProperty &))
Register an INDI property which is exposed for others to request a New Property for.
int createStandardIndiToggleSw(pcf::IndiProperty &prop, const std::string &name, const std::string &label="", const std::string &group="")
Create a standard R/W INDI switch with a single toggle element.
int m_shutdown
Flag to signal it's time to shutdown. When not 0, the main loop exits.
void updateSwitchIfChanged(pcf::IndiProperty &p, const std::string &el, const pcf::IndiElement::SwitchStateType &newVal, pcf::IndiProperty::PropertyStateType ipState=pcf::IndiProperty::Ok)
Update an INDI switch element value if it has changed.
static int log(const typename logT::messageT &msg, logPrioT level=logPrio::LOG_DEFAULT)
Make a log entry.
int threadStart(std::thread &thrd, bool &thrdInit, pid_t &tpid, pcf::IndiProperty &thProp, int thrdPrio, const std::string &cpuset, const std::string &thrdName, thisPtr *thrdThis, Function &&thrdStart)
Start a thread, using this class's privileges to set priority, etc.
int m_criticalDiskTemp
User defined critical temperature for drives.
std::string m_chronySourceMac
pcf::IndiProperty m_indiP_core_temps
Indi variable for reporting CPU core temperature(s)
int findRamUsage(float &)
Finds current RAM usage.
int findDiskUsage(float &, float &, float &)
Finds usages of space for following directory paths: /; /data; /boot.
pcf::IndiProperty m_indiP_chronyStatus
void setlatThreadExec()
Execute the frame grabber main loop.
double m_chronyRootDispersion
float m_bootUsage
Disk usage in /boot path as a value out of 100.
int parseCPULoads(std::string, float &)
Parses string from system call to find CPU usage loads.
int parseDiskTemperature(std::string &driveName, float &temp, const std::string &line)
Parses string from system call to find drive temperatures.
int recordChronyStats(bool force=false)
float m_rootUsage
Disk usage in root path as a value out of 100.
int recordDriveTemps(bool force=false)
int recordTelem(const telem_coreloads *)
int recordChronyStatus(bool force=false)
std::vector< std::string > m_diskNames
vector of names of the hard disks returned by hdd_temp
virtual void setupConfig()
Setup the user-defined warning and critical values for core and drive temperatures.
~sysMonitor() noexcept
D'tor, declared and defined for noexcept.
virtual int appLogic()
Implementation of reading and logging each of the measured statistics.
std::thread m_setlatThread
A separate thread for the actual setting of low latency.
pcf::IndiProperty m_setlatThreadProp
The property to hold the setlat thread details.
int m_warningDiskTemp
User defined warning temperature for drives.
pid_t m_setlatThreadID
Set latency thread ID.
pcf::IndiProperty m_indiP_setlat
std::vector< float > m_coreLoads
List of current core load(s)
int findCPUTemperatures(std::vector< float > &)
Finds all CPU core temperatures.
std::vector< float > m_coreTemps
List of current core temperature(s)
int parseCPUTemperatures(std::string, float &)
Parses string from system call to find CPU temperatures.
std::string m_chronySynch
int recordCoreLoads(bool force=false)
int updateVals()
Updates Indi property values of all system statistics.
double m_chronyLastOffset
virtual void loadConfig()
Load the warning and critical temperature values for core and drive temperatures.
int findDiskTemperature(std::vector< std::string > &hdd_names, std::vector< float > &hdd_temps)
Finds all drive temperatures.
INDI_NEWCALLBACK_DECL(sysMonitor, m_indiP_setlat)
int m_criticalCoreTemp
User defined critical temperature for CPU cores.
virtual int appStartup()
Registers all new Indi properties for each of the reported values to publish.
bool m_setlatThreadInit
Synchronizer to ensure set lat thread initializes before doing dangerous things.
pcf::IndiProperty m_indiP_drive_temps
Indi variable for reporting drive temperature(s)
int recordCoreTemps(bool force=false)
static void setlatThreadStart(sysMonitor *s)
Thread starter, called by threadStart on thread construction. Calls setlatThreadExec.
int m_warningCoreTemp
User defined warning temperature for CPU cores.
float m_dataUsage
Disk usage in /data path as a value out of 100.
int criticalCoreTemperature(std::vector< float > &)
Checks if any core temperatures are warning or critical levels.
virtual int appShutdown()
Do any needed shutdown tasks; currently nothing in this app.
std::vector< std::string > m_diskNameList
vector of names of the hard disks to monitor
std::vector< float > m_diskTemps
vector of current disk temperature(s)
pcf::IndiProperty m_indiP_usage
Indi variable for reporting drive usage of all paths.
int criticalDiskTemperature(std::vector< float > &)
Checks if any drive temperatures are warning or critical levels.
float m_ramUsage
RAM usage as a decimal value between 0 and 1.
int m_setlatThreadPrio
Priority of the set latency thread, should normally be > 00.
std::string m_chronySourceIP
pcf::IndiProperty m_indiP_core_loads
Indi variable for reporting CPU core loads.
sysMonitor()
Default c'tor.
int findChronyStatus()
Finds current chronyd status.
int parseDiskUsage(std::string, float &, float &, float &)
Parses string from system call to find drive usage space.
pcf::IndiProperty m_indiP_chronyStats
double m_chronySystemTime
int findCPULoads(std::vector< float > &)
Finds all CPU core usage loads.
int parseRamUsage(std::string, float &)
Parses string from system call to find RAM usage.
int recordUsage(bool force=false)
#define REG_INDI_NEWPROP_NOCB(prop, propName, type)
Register a NEW INDI property with the class, with no callback.
#define INDI_NEWCALLBACK(prop)
Get the name of the static callback wrapper for a new property.
@ READY
The device is ready for operation, but is not operating.
int runCommand(std::vector< std::string > &commandOutput, std::vector< std::string > &commandStderr, std::vector< std::string > &commandList)
Runs a command (with parameters) passed in using fork/exec.
const pcf::IndiProperty & ipRecv
INDI_NEWCALLBACK_DEFN(acesxeCtrl, m_indiP_windspeed)(const pcf
constexpr static logPrioT LOG_ALERT
This should only be used if some action is required by operators to keep the system safe.
constexpr static logPrioT LOG_WARNING
A condition has occurred which may become an error, but the process continues.
constexpr static logPrioT LOG_NOTICE
A normal but significant condition.
A device which saves telemetry.
int appShutdown()
Perform telemeter application shutdown.
int loadConfig(appConfigurator &config)
Load the device section from an application configurator.
int appLogic()
Perform telemeter application logic.
int setupConfig(appConfigurator &config)
Setup an application configurator for the device section.
int checkRecordTimes(const telT &tel, telTs... tels)
Check the time of the last record for each telemetry type and make an entry if needed.
Log entry recording the statistics from chrony.
Log entry recording the status of chrony.
Log entry recording CPU loads.
Log entry recording CPU temperatures.
Log entry recording hdd temperatures.
Log entry recording hdd temperatures.