LCOV - code coverage report
Current view: top level - serverManager/common/source - HealthcheckService.cpp (source / functions) Coverage Total Hit
Test: coverage.info Lines: 100.0 % 67 67
Test Date: 2025-03-21 11:02:39 Functions: 100.0 % 9 9

            Line data    Source code
       1              : /*
       2              :  * If not stated otherwise in this file or this component's LICENSE file the
       3              :  * following copyright and licenses apply:
       4              :  *
       5              :  * Copyright 2023 Sky UK
       6              :  *
       7              :  * Licensed under the Apache License, Version 2.0 (the "License");
       8              :  * you may not use this file except in compliance with the License.
       9              :  * You may obtain a copy of the License at
      10              :  *
      11              :  * http://www.apache.org/licenses/LICENSE-2.0
      12              :  *
      13              :  * Unless required by applicable law or agreed to in writing, software
      14              :  * distributed under the License is distributed on an "AS IS" BASIS,
      15              :  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      16              :  * See the License for the specific language governing permissions and
      17              :  * limitations under the License.
      18              :  */
      19              : 
      20              : #include "HealthcheckService.h"
      21              : #include "RialtoServerManagerLogging.h"
      22              : 
      23              : namespace
      24              : {
      25           25 : int generatePingId()
      26              : {
      27              :     static int id{0};
      28           25 :     return id++;
      29              : }
      30              : } // namespace
      31              : 
      32              : namespace rialto::servermanager::common
      33              : {
      34           14 : HealthcheckService::HealthcheckService(ISessionServerAppManager &sessionServerAppManager,
      35              :                                        const std::shared_ptr<firebolt::rialto::common::ITimerFactory> &timerFactory,
      36           14 :                                        std::chrono::seconds healthcheckInterval, unsigned numOfFailedPingsBeforeRecovery)
      37           14 :     : m_sessionServerAppManager{sessionServerAppManager},
      38           14 :       m_kNumOfFailedPingsBeforeRecovery{numOfFailedPingsBeforeRecovery}, m_currentPingId{-1}
      39              : {
      40           14 :     if (std::chrono::seconds{0} != healthcheckInterval)
      41              :     {
      42           39 :         m_healthcheckTimer = timerFactory->createTimer(healthcheckInterval,
      43           26 :                                                        std::bind(&HealthcheckService::sendPing, this),
      44           13 :                                                        firebolt::rialto::common::TimerType::PERIODIC);
      45              :     }
      46           14 : }
      47              : 
      48           28 : HealthcheckService::~HealthcheckService()
      49              : {
      50           14 :     if (m_healthcheckTimer && m_healthcheckTimer->isActive())
      51              :     {
      52           13 :         m_healthcheckTimer->cancel();
      53           13 :         m_healthcheckTimer.reset();
      54              :     }
      55           28 : }
      56              : 
      57           12 : void HealthcheckService::onPingSent(int serverId, int pingId)
      58              : {
      59           12 :     std::unique_lock<std::mutex> lock{m_mutex};
      60           12 :     if (pingId != m_currentPingId)
      61              :     {
      62            1 :         RIALTO_SERVER_MANAGER_LOG_ERROR("Something went seriously wrong. Ping sent with wrong id to server: %d, valid "
      63              :                                         "ping id: %d, sent pingId: %d",
      64              :                                         serverId, m_currentPingId, pingId);
      65            1 :         return;
      66              :     }
      67           11 :     m_remainingPings.insert(serverId);
      68           11 :     m_failedPings.try_emplace(serverId, 0);
      69           12 : }
      70              : 
      71            3 : void HealthcheckService::onPingFailed(int serverId, int pingId)
      72              : {
      73            3 :     std::unique_lock<std::mutex> lock{m_mutex};
      74            3 :     if (pingId != m_currentPingId)
      75              :     {
      76            1 :         RIALTO_SERVER_MANAGER_LOG_ERROR("Something went seriously wrong. Ping sent with wrong id to server: %d, valid "
      77              :                                         "ping id: %d, sent pingId: %d",
      78              :                                         serverId, m_currentPingId, pingId);
      79            1 :         return;
      80              :     }
      81            2 :     if (m_failedPings.end() != m_failedPings.find(serverId))
      82              :     {
      83            1 :         handleError(serverId);
      84              :     }
      85              :     else
      86              :     {
      87            1 :         m_sessionServerAppManager.onSessionServerStateChanged(serverId,
      88            1 :                                                               firebolt::rialto::common::SessionServerState::ERROR);
      89            1 :         m_failedPings.emplace(serverId, 1);
      90              :     }
      91            3 : }
      92              : 
      93            5 : void HealthcheckService::onAckReceived(int serverId, int pingId, bool success)
      94              : {
      95            5 :     std::unique_lock<std::mutex> lock{m_mutex};
      96            5 :     if (pingId != m_currentPingId)
      97              :     {
      98            1 :         RIALTO_SERVER_MANAGER_LOG_WARN("Unexpected ack received from server id: %d. Current ping id: %d, received ping "
      99              :                                        "id: %d",
     100              :                                        serverId, m_currentPingId, pingId);
     101            1 :         return;
     102              :     }
     103            4 :     m_remainingPings.erase(serverId);
     104            4 :     if (success)
     105              :     {
     106            3 :         m_failedPings[serverId] = 0;
     107              :     }
     108              :     else
     109              :     {
     110            1 :         RIALTO_SERVER_MANAGER_LOG_WARN("Ack with error received from server id: %d, ping id: %d", serverId, pingId);
     111            1 :         handleError(serverId);
     112              :     }
     113            5 : }
     114              : 
     115            1 : void HealthcheckService::onServerRemoved(int serverId)
     116              : {
     117            1 :     std::unique_lock<std::mutex> lock{m_mutex};
     118            1 :     m_remainingPings.erase(serverId);
     119            1 :     m_failedPings.erase(serverId);
     120              : }
     121              : 
     122           25 : void HealthcheckService::sendPing()
     123              : {
     124           25 :     std::unique_lock<std::mutex> lock{m_mutex};
     125           32 :     for (int serverId : m_remainingPings)
     126              :     {
     127            7 :         RIALTO_SERVER_MANAGER_LOG_WARN("Ping (id: %d) timeout for server id: %d", m_currentPingId, serverId);
     128            7 :         handleError(serverId);
     129              :     }
     130           25 :     m_remainingPings.clear();
     131           25 :     m_currentPingId = generatePingId();
     132           25 :     RIALTO_SERVER_MANAGER_LOG_DEBUG("Start ping procedure with id: %d", m_currentPingId);
     133           25 :     m_sessionServerAppManager.sendPingEvents(m_currentPingId);
     134              : }
     135              : 
     136            9 : void HealthcheckService::handleError(int serverId)
     137              : {
     138            9 :     m_sessionServerAppManager.onSessionServerStateChanged(serverId, firebolt::rialto::common::SessionServerState::ERROR);
     139            9 :     unsigned &failedPingsNum{m_failedPings[serverId]};
     140            9 :     if (++failedPingsNum >= m_kNumOfFailedPingsBeforeRecovery)
     141              :     {
     142            2 :         RIALTO_SERVER_MANAGER_LOG_WARN(
     143              :             "Max num of failed pings reached for server with id: %d. Starting recovery action", serverId);
     144            2 :         failedPingsNum = 0;
     145            2 :         m_sessionServerAppManager.restartServer(serverId);
     146              :     }
     147            9 : }
     148              : } // namespace rialto::servermanager::common
        

Generated by: LCOV version 2.0-1