Эх сурвалжийг харах

Server: Actually send FCM Notifications when HTTP Check Errors exceed a threshold; added "notify" & "notifyThreshold" settings to HttpCheckConfig

Christian Kahlau 3 жил өмнө
parent
commit
c152995a9c

+ 3 - 1
common/defaults.module.ts

@@ -5,7 +5,9 @@ export const serverSync = {
 export const serviceChecks = {
   active: true,
   httpTimeout: 10000,
-  interval: 300
+  interval: 300,
+  notify: false,
+  notifyThreshold: 3
 };
 
 export default {

+ 2 - 0
common/types/http-check-config.d.ts

@@ -7,5 +7,7 @@ type HttpCheckConfig = {
   active: boolean;
   interval: number;
   timeout?: number;
+  notify: boolean;
+  notifyThreshold: number;
   checks: string[];
 };

+ 56 - 9
server/src/ctrl/database.class.ts

@@ -324,6 +324,9 @@ export class Database extends SQLiteController {
           updValues.push([conf.active ?? defaults.serviceChecks.active ? 1 : 0, conf.id, 'active']);
           status = conf.active ?? defaults.serviceChecks.active ? ServiceChangedStatus.Activated : ServiceChangedStatus.Deactivated;
         }
+        if (oldConf.notify !== conf.notify) updValues.push([conf.notify ?? defaults.serviceChecks.notify ? 1 : 0, conf.id, 'notify']);
+        if (oldConf.notifyThreshold !== conf.notifyThreshold)
+          updValues.push([conf.notifyThreshold ?? defaults.serviceChecks.notifyThreshold, conf.id, 'notifyThreshold']);
         if (updValues.length) {
           for (const data of updValues) {
             await this.run(`UPDATE HealthCheckParams SET Value = ? WHERE ConfigID = ? AND Key = ?;`, data);
@@ -371,12 +374,16 @@ export class Database extends SQLiteController {
           (?, ?, ?, ?),
           (?, ?, ?, ?),
           (?, ?, ?, ?),
+          (?, ?, ?, ?),
+          (?, ?, ?, ?),
           (?, ?, ?, ?)${conf.checks.length ? `,${insCheckValues.map(() => '(?, ?, ?, ?)').join(',')}` : ''}`,
           [
             ...[res.lastID, 'text', 'url', conf.url],
             ...[res.lastID, 'boolean', 'active', conf.active ?? defaults.serviceChecks.active ? 1 : 0],
             ...[res.lastID, 'number', 'interval', conf.interval],
             ...[res.lastID, 'number', 'timeout', conf.timeout ?? defaults.serviceChecks.httpTimeout],
+            ...[res.lastID, 'boolean', 'notify', conf.notify ?? defaults.serviceChecks.notify],
+            ...[res.lastID, 'number', 'notifyThreshold', conf.notifyThreshold ?? defaults.serviceChecks.notifyThreshold],
             ...conf.checks.reduce((ret, check) => [...ret, res.lastID, 'regexp', 'check', check], [] as any[])
           ]
         );
@@ -470,15 +477,7 @@ export class Database extends SQLiteController {
       [confID, from.getTime(), to.getTime(), confID, confID, from.getTime(), to.getTime(), serverID, from.getTime(), to.getTime()]
     );
 
-    const mapByTimestamp = result.rows.reduce((res: Map<number, ServiceCheckDataEntry[]>, row) => {
-      const time: number = row['Timestamp'];
-      if (!res.has(time)) res.set(time, []);
-      res.get(time)?.push({
-        status: row['Status'] as number,
-        message: row['Message']
-      });
-      return res;
-    }, new Map()) as Map<number, ServiceCheckDataEntry[]>;
+    const mapByTimestamp = this.mapServiceCheckDataByTimestamp(result.rows);
 
     const arr: ServiceCheckData[] = [];
     for (const entry of mapByTimestamp.entries()) {
@@ -490,6 +489,52 @@ export class Database extends SQLiteController {
     return arr;
   }
 
+  public async getLastErrors(confID: number, threshold: number) {
+    const result = await this.stmt(
+      `SELECT * FROM HealthCheckDataEntry
+        WHERE ConfigID = ?
+        AND Timestamp IN (
+          SELECT Timestamp 
+          FROM HealthCheckDataEntry
+          WHERE ConfigID = ?
+          GROUP BY Timestamp
+          ORDER BY Timestamp DESC
+          LIMIT 0, ?
+        )
+        ORDER BY Timestamp DESC, ID DESC`,
+      [confID, confID, threshold]
+    );
+
+    const mapByTimestamp = this.mapServiceCheckDataByTimestamp(result.rows);
+    const errors: ServiceCheckData[] = [];
+    for (const entry of mapByTimestamp.entries()) {
+      const time = entry[0];
+      const data = entry[1];
+
+      const errorData = data.filter(d => d.status !== HttpCheckStatus.OK);
+      if (!errorData.length) break;
+
+      errors.push({
+        time: new Date(time),
+        data: errorData
+      });
+    }
+
+    return errors;
+  }
+
+  private mapServiceCheckDataByTimestamp(rows: any[]) {
+    return rows.reduce((res: Map<number, ServiceCheckDataEntry[]>, row) => {
+      const time: number = row['Timestamp'];
+      if (!res.has(time)) res.set(time, []);
+      res.get(time)?.push({
+        status: row['Status'] as number,
+        message: row['Message']
+      });
+      return res;
+    }, new Map()) as Map<number, ServiceCheckDataEntry[]>;
+  }
+
   private configFromResultRows(rows: any[]) {
     return rows.reduce((res: ServiceConfig[], line, i) => {
       const configID = line['ID'];
@@ -542,6 +587,8 @@ export class Database extends SQLiteController {
       active: (hcConf.params?.find(p => p.key === 'active')?.value as boolean) ?? defaults.serviceChecks.active,
       interval: hcConf.params?.find(p => p.key === 'interval')?.value as number,
       timeout: (hcConf.params?.find(p => p.key === 'timeout')?.value as number) ?? defaults.serviceChecks.httpTimeout,
+      notify: (hcConf.params?.find(p => p.key === 'notify')?.value as boolean) ?? defaults.serviceChecks.notify,
+      notifyThreshold: (hcConf.params?.find(p => p.key === 'notifyThreshold')?.value as number) ?? defaults.serviceChecks.notifyThreshold,
       checks: hcConf.params?.reduce((res, p) => (p.key === 'check' && Array.isArray(p.value) ? [...res, ...p.value] : res), [] as string[])
     };
     return {

+ 25 - 2
server/src/ctrl/http-check-controller.class.ts

@@ -1,11 +1,15 @@
 import axios, { AxiosError, AxiosRequestConfig } from 'axios';
+import moment from 'moment';
 
 import defaults from '../../../common/defaults.module';
 import { HttpCheckStatus } from '../../../common/lib/http-check-data.module';
 import { Logger } from '../../../common/util/logger.class';
 
-import { Database, ServiceChangedStatus } from './database.class';
 import { Timer } from '../timer.class';
+import { Database, ServiceChangedStatus } from './database.class';
+import { FCMController } from './fcm-controller.class';
+
+const FCM_TOPIC_SERVICES = 'monitoring-services';
 
 type Subscriber = { id: number; interval: number; conf: HttpCheckConfig };
 
@@ -89,6 +93,7 @@ export class HttpCheckController {
       timeout: conf.timeout,
       responseType: 'text'
     };
+    let success = true;
     try {
       const current = await this.db.getHttpCheckConfigByID(conf.serverId ?? 0, conf.id);
 
@@ -101,7 +106,6 @@ export class HttpCheckController {
       let response = await axios.get(current.url, options);
       const responseText = new String(response.data).toString();
 
-      let success = true;
       for (const check of current.checks) {
         const reg = new RegExp(check, 'i');
         if (!reg.test(responseText)) {
@@ -117,6 +121,7 @@ export class HttpCheckController {
       }
     } catch (err) {
       let log = false;
+      success = false;
       if (err instanceof AxiosError) {
         // err.code = 'ECONNREFUSED' | 'ECONNABORTED' | 'ERR_BAD_REQUEST' | 'ERR_BAD_RESPONSE' | ...?
         try {
@@ -141,6 +146,24 @@ export class HttpCheckController {
       }
       if (log) Logger.error('[ERROR] HTTP Service Check failed:', err);
     }
+    if (!success && conf.notify) {
+      try {
+        const lastErrors = await this.db.getLastErrors(conf.id, conf.notifyThreshold + 1);
+        if (lastErrors.length > conf.notifyThreshold) {
+          Logger.debug(`[DEBUG] Sending FCM Notification for`, conf.title);
+          const lastCheck = lastErrors[0];
+          const lastError = lastCheck.data[0];
+          await FCMController.instance.sendNotificationToTopic(FCM_TOPIC_SERVICES, {
+            title: `[CRIT] ${conf.title} since ${moment(lastCheck.time).format('HH:mm')}`,
+            body:
+              `HTTP Check '${conf.title}' has failed over ${conf.notifyThreshold} times in a row\n` +
+              `Last error status was: (${lastError.status}) ${lastError.message}`
+          });
+        }
+      } catch (err) {
+        Logger.error('[ERROR] Notification failure:', err);
+      }
+    }
   }
 
   async close() {

+ 7 - 0
server/src/migrations/202212301910_website_healthcheck_notification_defaults.sql

@@ -0,0 +1,7 @@
+INSERT INTO HealthCheckParams(ConfigID, Type, Key, Value)
+  SELECT ID, 'boolean', 'notify', '0'
+  FROM HealthCheckConfig;
+
+INSERT INTO HealthCheckParams(ConfigID, Type, Key, Value)
+  SELECT ID, 'number', 'notifyThreshold', '3'
+  FROM HealthCheckConfig;