From 9c80bb1bb2d3397254b8c9a0ee234efb49c9b88f Mon Sep 17 00:00:00 2001 From: Mark Heily Date: Mon, 12 Oct 2020 10:02:12 -0400 Subject: [PATCH 1/2] Initial empty repository From e217a9f6f206a034b00690d86062c4263b7a9bed Mon Sep 17 00:00:00 2001 From: Mark Heily Date: Mon, 12 Oct 2020 10:06:59 -0400 Subject: [PATCH 2/2] pmp-check-mongo: catch ServerSelectionTimeoutError exceptions and retry. This fixes an error that appears intermittently: CRITICAL - Could not connect or exec 'isMaster' command: 'No servers found yet' --- nagios/bin/pmp-check-mongo.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/nagios/bin/pmp-check-mongo.py b/nagios/bin/pmp-check-mongo.py index cddaa31..61cc073 100755 --- a/nagios/bin/pmp-check-mongo.py +++ b/nagios/bin/pmp-check-mongo.py @@ -300,10 +300,21 @@ def get_default(self, key, level): def sanatize(self, status_output): return status_output + # Run the isMaster command + def run_isMaster(self, con): + for i in range(5): + try: + self.isMaster = con['admin'].command('isMaster') + break + except pymongo.errors.ServerSelectionTimeoutError: + time.sleep(1) + except Exception, e: + return self.return_result("critical", "Could not connect or exec 'isMaster' command: '%s'" % e) + + # Parse isMaster to determine nodetype - def parse_isMaster(self, con): + def parse_isMaster(self): try: - self.isMaster = con['admin'].command('isMaster') if 'setName' in self.isMaster: self.setName = self.isMaster['setName'] if self.isMaster['ismaster']: @@ -331,7 +342,8 @@ def connect(self, connectTimeout=5000): else: con = pymongo.MongoClient(self.host, self.port, ssl=self.ssl, replicaSet=self.replicaset, serverSelectionTimeoutMS=connectTimeout) # parse isMaster command output - self.parse_isMaster(con) + self.run_isMaster(con) + self.parse_isMaster() if self.user and self.passwd and not self.isArbiter: try: con['admin'].authenticate(self.user, self.passwd)