diff --git a/src/lib/platform/MSWindowsWatchdog.cpp b/src/lib/platform/MSWindowsWatchdog.cpp index 61c255ec0..ecc476cee 100644 --- a/src/lib/platform/MSWindowsWatchdog.cpp +++ b/src/lib/platform/MSWindowsWatchdog.cpp @@ -24,7 +24,8 @@ #include #define CURRENT_PROCESS_ID 0 -#define MAXIMUM_WAIT_TIME 3 + +const auto kStartDelaySeconds = 1; namespace { std::string trimDesktopName(const std::string &nameFromTraces) @@ -56,30 +57,14 @@ const char g_activeDesktop[] = {"activeDesktop:"}; MSWindowsWatchdog::MSWindowsWatchdog(bool autoDetectCommand, bool foreground) : m_thread(NULL), m_autoDetectCommand(autoDetectCommand), - m_monitoring(true), - m_commandChanged(false), m_outputWritePipe(nullptr), m_outputReadPipe(nullptr), m_elevateProcess(false), - m_processFailures(0), - m_processStarted(false), + m_startFailures(0), m_fileLogOutputter(NULL), m_ready(false), m_foreground(foreground) { - m_mutex = ARCH->newMutex(); - m_condVar = ARCH->newCondVar(); -} - -MSWindowsWatchdog::~MSWindowsWatchdog() -{ - if (m_condVar != NULL) { - ARCH->closeCondVar(m_condVar); - } - - if (m_mutex != NULL) { - ARCH->closeMutex(m_mutex); - } } void MSWindowsWatchdog::startAsync() @@ -91,7 +76,7 @@ void MSWindowsWatchdog::startAsync() void MSWindowsWatchdog::stop() { - m_monitoring = false; + m_running = false; if (!m_thread->wait(5)) { LOG((CLOG_WARN "could not stop main thread")); @@ -194,60 +179,74 @@ void MSWindowsWatchdog::mainLoop(void *) // Set the pipe to non-blocking mode, which allows us to stop the output reader thread immediately // in order to speed up the shutdown process when the Windows service needs to stop. - DWORD mode = PIPE_NOWAIT; - if (!SetNamedPipeHandleState(m_outputReadPipe, &mode, nullptr, nullptr)) { + if (DWORD mode = PIPE_NOWAIT; !SetNamedPipeHandleState(m_outputReadPipe, &mode, nullptr, nullptr)) { LOG_ERR("could not set pipe to non-blocking mode"); throw XArch(new XArchEvalWindows()); } - while (m_monitoring) { - try { + while (m_running) { + if (!m_command.empty() && !m_foreground && m_session.hasChanged()) { + LOG_DEBUG("session changed, queueing process start"); + m_processState = ProcessState::StartPending; + m_nextStartTime.reset(); + } - if (m_processStarted && getCommand().empty()) { - LOG((CLOG_DEBUG "process started but command is empty, shutting down")); - shutdownExistingProcesses(); - continue; + switch (m_processState) { + using enum ProcessState; + + case Idle: + LOG_DEBUG3("watchdog: process idle"); + break; + + case StartScheduled: { + LOG_DEBUG3("watchdog: process start scheduled"); + if (m_nextStartTime.has_value() && m_nextStartTime.value() <= ARCH->time()) { + LOG_DEBUG("start time reached, queueing process start"); + m_processState = StartPending; } + } break; - if (m_processFailures > 1) { - // increasing backoff period, maximum of 10 seconds. - // only start sleeping at 1 second to avoid unnecessary delay when the process stops - // for the first failure (i.e. when the process just stopped running). - int timeout = m_processFailures < 10 ? m_processFailures : 10; - LOG_WARN("backing off after failure, wait=%ds, failures=%d", timeout, m_processFailures); - ARCH->sleep(timeout); + case StartPending: { + LOG_INFO("daemon starting new process"); + try { + startProcess(); + m_startFailures = 0; + m_processState = Running; + } catch (std::exception &e) { // NOSONAR - Catching all exceptions + handleStartError(e.what()); + m_processState = StartPending; + } catch (...) { // NOSONAR - Catching remaining exceptions + handleStartError(); + m_processState = StartPending; } + } break; - if (!getCommand().empty()) { - bool startNeeded = false; - - if (m_processFailures != 0) { - startNeeded = true; - } else if (!m_foreground && m_session.hasChanged()) { - startNeeded = true; - } else if (m_commandChanged) { - startNeeded = true; - } - - if (startNeeded) { - startProcess(); - } - } else { - // prevent backoff when no command is set. - m_processFailures = 0; - } - - if (m_processStarted && !isProcessRunning()) { - - m_processFailures++; - m_processStarted = false; - + case Running: { + LOG_DEBUG3("watchdog: process running"); + if (!isProcessRunning()) { LOG((CLOG_WARN "detected application not running, pid=%d", m_process->info().dwProcessId)); + m_processState = StartPending; } + } break; + case StopPending: { + LOG_INFO("stopping running process"); + if (m_process != nullptr) { + m_process->shutdown(); + m_process.reset(); + } else { + LOG_WARN("no process to stop"); + } + shutdownExistingProcesses(); + m_processState = Idle; + } break; + } + + // TODO: This seems like a hack, why would we need to send the SAS function every loop iteration? + // This slows down both the process relaunch speed and the watchdog thread loop shut down time. + if (sendSasFunc != NULL) { HANDLE sendSasEvent = CreateEvent(NULL, FALSE, FALSE, "Global\\SendSAS"); if (sendSasEvent != NULL) { - // use SendSAS event to wait for next session (timeout 1 second). if (WaitForSingleObject(sendSasEvent, 1000) == WAIT_OBJECT_0) { LOG_DEBUG("calling SendSAS from sas.dll"); @@ -256,32 +255,23 @@ void MSWindowsWatchdog::mainLoop(void *) CloseHandle(sendSasEvent); } else { - LOG((CLOG_ERR "could not create SendSAS event")); + XArchEvalWindows e; + LOG_ERR("could not create SendSAS event"); } - - // Sleep for only 100ms rather than 1 second so that the service can shut down faster. - ARCH->sleep(0.1); - - } catch (std::exception &e) { - LOG((CLOG_CRIT "failed to launch, error: %s", e.what())); - m_processFailures++; - m_processStarted = false; - continue; - } catch (...) { - LOG((CLOG_CRIT "failed to launch, unknown error.")); - m_processFailures++; - m_processStarted = false; - continue; } + + // Sleep for only 100ms rather than 1 second so that the service can shut down faster. + ARCH->sleep(0.1); } if (m_process != nullptr) { LOG((CLOG_DEBUG "terminated running process on exit")); m_process->shutdown(); m_process.reset(); - m_processStarted = false; } + shutdownExistingProcesses(); + LOG((CLOG_DEBUG "watchdog main loop finished")); } @@ -303,19 +293,14 @@ void MSWindowsWatchdog::setFileLogOutputter(FileLogOutputter *outputter) void MSWindowsWatchdog::startProcess() { - LOG_INFO("daemon starting new process"); - if (m_command.empty()) { throw XMSWindowsWatchdogError("cannot start process, command is empty"); } - m_commandChanged = false; - if (m_process != nullptr) { LOG((CLOG_DEBUG "closing existing process to make way for new one")); m_process->shutdown(); m_process.reset(); - m_processStarted = false; } m_process = std::make_unique(m_command, m_outputWritePipe, m_outputWritePipe); @@ -361,9 +346,6 @@ void MSWindowsWatchdog::startProcess() throw XMSWindowsWatchdogError("process immediately stopped"); } - m_processStarted = true; - m_processFailures = 0; - LOG((CLOG_DEBUG "started core process from daemon")); LOG( (CLOG_DEBUG2 "process info, session=%i, elevated: %s, command=%s", m_session.getActiveSessionId(), @@ -372,13 +354,20 @@ void MSWindowsWatchdog::startProcess() } } -void MSWindowsWatchdog::setCommand(const std::string &command, bool elevate) +void MSWindowsWatchdog::setProcessConfig(const std::string &command, bool elevate) { - LOG_DEBUG("command parameters updated"); + LOG_DEBUG("watchdog process config updated"); m_command = command; m_elevateProcess = elevate; - m_commandChanged = true; - m_processFailures = 0; + + if (m_command.empty()) { + LOG_DEBUG("command cleared, queueing process stop"); + m_processState = ProcessState::StopPending; + } else { + LOG_DEBUG("command changed, queueing process start"); + m_processState = ProcessState::StartPending; + m_nextStartTime.reset(); + } } std::string MSWindowsWatchdog::getCommand() const @@ -411,7 +400,7 @@ void MSWindowsWatchdog::outputLoop(void *) // +1 char for \0 CHAR buffer[kOutputBufferSize + 1]; - while (m_monitoring) { + while (m_running) { DWORD bytesRead; BOOL success = ReadFile(m_outputReadPipe, buffer, kOutputBufferSize, &bytesRead, NULL); @@ -482,7 +471,7 @@ HANDLE openProcessForKill(PROCESSENTRY32 entry) void MSWindowsWatchdog::shutdownExistingProcesses() { - LOG_DEBUG("shutting down existing processes"); + LOG_DEBUG("shutting down any existing processes"); // first we need to take a snapshot of the running processes HANDLE snapshot = CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, CURRENT_PROCESS_ID); @@ -571,3 +560,24 @@ std::string MSWindowsWatchdog::runActiveDesktopUtility() output.erase(output.find_last_not_of("\r\n") + 1); return output; } + +void MSWindowsWatchdog::handleStartError(const std::string_view &message) +{ + m_startFailures++; + + if (!message.empty()) { + LOG_CRIT("failed to launch, error: %s", message.data()); + } else { + LOG_CRIT("failed to launch, unknown error"); + } + + // When there has been more than one consecutive failure, slow down the retry rate. + if (m_startFailures > 1) { + m_nextStartTime = ARCH->time() + kStartDelaySeconds; + LOG_WARN("start failed %d times, delaying start", m_startFailures); + LOG_DEBUG("start delay, seconds=%d, time=%f", kStartDelaySeconds, m_nextStartTime.value()); + } else { + LOG_INFO("retrying process start immediately"); + m_nextStartTime.reset(); + } +} diff --git a/src/lib/platform/MSWindowsWatchdog.h b/src/lib/platform/MSWindowsWatchdog.h index 021943a66..085f53f6e 100644 --- a/src/lib/platform/MSWindowsWatchdog.h +++ b/src/lib/platform/MSWindowsWatchdog.h @@ -6,15 +6,14 @@ #pragma once -#include "arch/IArchMultithread.h" #include "deskflow/XDeskflow.h" #include "platform/MSWindowsProcess.h" #include "platform/MSWindowsSession.h" - #define WIN32_LEAN_AND_MEAN #include #include +#include #include class Thread; @@ -22,9 +21,18 @@ class FileLogOutputter; class MSWindowsWatchdog { + enum class ProcessState + { + Idle, + StartScheduled, + StartPending, + StopPending, + Running + }; + public: MSWindowsWatchdog(bool autoDetectCommand, bool foreground); - virtual ~MSWindowsWatchdog(); + ~MSWindowsWatchdog() = default; void startAsync(); std::string getCommand() const; @@ -42,6 +50,7 @@ private: void startProcess(); void sendSas(); void setStartupInfo(STARTUPINFO &si); + void handleStartError(const std::string_view &message = ""); /** * @brief Re-run the process to get the active desktop name. @@ -56,23 +65,21 @@ private: private: Thread *m_thread; bool m_autoDetectCommand; - std::string m_command; - bool m_monitoring; - bool m_commandChanged; + bool m_running; HANDLE m_outputWritePipe; HANDLE m_outputReadPipe; Thread *m_outputThread; bool m_elevateProcess; MSWindowsSession m_session; - int m_processFailures; - bool m_processStarted; + int m_startFailures; FileLogOutputter *m_fileLogOutputter; - ArchMutex m_mutex; - ArchCond m_condVar; bool m_ready; bool m_foreground; std::string m_activeDesktop; std::unique_ptr m_process; + std::optional m_nextStartTime = std::nullopt; + ProcessState m_processState = ProcessState::Idle; + std::string m_command = ""; }; //! Relauncher error