/common/monitoring/datasets_Monitoring.py - Annotate - SITools2

02482b9a

Alessandro_N

#! /usr/bin/python

2

3

# ******************************************************************************

4

#    Copyright 2015 IAS - IDOC

5

6

#    This program is free software: you can redistribute it and/or modify

7

#    it under the terms of the GNU General Public License as published by

8

#    the Free Software Foundation, either version 3 of the License, or

9

#    (at your option) any later version.

10

11

#    This program is distributed in the hope that it will be useful,

12

#    but WITHOUT ANY WARRANTY; without even the implied warranty of

13

#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

14

#    GNU General Public License for more details.

15

16

#    You should have received a copy of the GNU General Public License

17

#    along with this program.  If not, see <http://www.gnu.org/licenses/>.

18

19

# ******************************************************************************

20

21

'''

22

This is a script aimed at monitoring the status of the Sitools2 datasets

23

and the mapping of their fields.

24

25

Put the script in the Sitools2 folder data/datasets(/map, for the latest versions)

26

where the datasets information are stored as xml files.

27

Then, read and record the current status/mapping, with:

28

29

$ datasets_Monitoring --record

30

31

The datasets properties are locally stored in files named as:

32

33

        <dataset_name>.lastStatus.xml

34

35

The '--record' option should be executed manually by the administrator every time

36

one or more datasets are modified (it would be good to add a reminder to the

37

Sitools2 pop-up message).

38

39

To check the datasets mapping/status, launch the script with the '--check' option:

40

41

$ datasets_Monitoring --check

42

43

This performs a consistency check between the current (int@*.xml or map/string@*.xml)

44

and the last recorded status (<dataset_name>.lastStatus.xml).

45

If any inconsistency is found, an alert e-mail is sent.

46

47

The '--check' option should be run daily and automatically by the system.

48

49

@author: Alessandro NASTASI for IAS -IDOC

50

@date: 27/04/2015

51

'''

52

53

__author__ = "Alessandro Nastasi"

54

__credits__ = ["Alessandro Nastasi", "Herve' Ballans"]

55

__license__ = "GPL"

56

__version__ = "1.0"

57

__date__ = "27/04/2015"

58

59

import sys,os, time

60

from datetime import date

61

import xml.etree.ElementTree as ET

62

import smtplib

63

from email.mime.text import MIMEText

64

65

sitools2_xml_filenames = "string@*.xml"                                   ## Files where Sitools stores datasets information

66

file_path='/usr/local/Sitools2_SZ_Cluster_DB/data/datasets/map/'         ## Path where the (string@*.xml) sitools files are stored

67

68

class bcolors:

69

    HEADER = '\033[95m'

70

    OKBLUE = '\033[94m'

71

    OKGREEN = '\033[92m'

72

    WARNING = '\033[93m'

73

    FAIL = '\033[91m'

74

    ENDC = '\033[0m'

75

76

_ERROR_CODE = {

77

    1:'Status inconsistency found:',

78

    2:'Mapping inconsistency found:',

79

    3:"The *.lastStatus.xml files were probably not updated: re-run the script with '--record' option."

80

81

82

def create_id_alias_dict(xml_root):

83

    ## Column dictionary " column_id : column_alias ". No mapping information here.

84

    id_alias_dict = {}

85

    for col in xml_root.findall('column'):

86

        column_id = col.find('id').text

87

        column_alias = col.find('columnAlias').text

88

        id_alias_dict[column_id] = column_alias

89

90

    return id_alias_dict

91

92

93

def send_alert_mail(body):

94

    SMTP_SERVER = 'smtp.ias.u-psud.fr'

95

    SMTP_PORT = 25

96

97

    sender = 'sitools2.notifier@ias.u-psud.fr'

98

    #password = ''

99

    recipient = 'sitools2@ias.u-psud.fr'

100

    subject = '[Sitools2 - SZDB] Datasets status ALERT'

101

102

    headers = ["From: "+sender,

103

               "Subject: " + subject,

104

               "To: " + recipient,

105

               "MIME-Version: 1.0",

106

               "Content-Type: text/html"]

107

    headers = "\r\n".join(headers)

108

109

    session = smtplib.SMTP(SMTP_SERVER, SMTP_PORT)

110

111

    #session.ehlo()

112

    #session.starttls()

113

    #session.ehlo

114

    #session.login(sender, password)

115

116

    body = MIMEText(body, 'html')

117

    session.sendmail(sender, recipient, headers + "\r\n\r\n" + body.as_string())

118

    session.quit()

119

120

def record_status():

121

    print "\n> Recording current datasets properties ...\n"

122

123

    command = "ls "+file_path+sitools2_xml_filenames

124

    intXml_list = os.popen(command).readlines()

125

126

    for item in intXml_list:

127

        item=item.strip()

128

129

        tree = ET.parse(item)

130

        root = tree.getroot()

131

132

        dataset_name = root.find('name').text

133

        fileDataset = file_path+dataset_name+'.lastStatus.xml'

134

        fileOut = open(fileDataset, 'w')

135

136

        ##Header in recorded file

137

        today = date.today().strftime("%A %d. %B %Y")

138

        towrite = "<!--File recorded on "+str(today)+"-->\n"

139

        fileOut.write(towrite)

140

141

        ##Record the last status of the Dataset

142

        towrite = "<dataset>\n"

143

        fileOut.write(towrite)

144

        towrite = "<!--Last Dataset status:-->\n"

145

        fileOut.write(towrite)

146

        status = root.find('status').text

147

        fileOut.write(" <lastStatus>"+status+"</lastStatus>")

148

149

        ## Column dictionary " column_id : column_alias ". No mapping information here.

150

        column = create_id_alias_dict(root)

151

152

        ## Num of mapped concepts

153

        mapped_Concepts_Id = []

154

        mapped_Column_Id = []

155

        for elem in root.findall('conceptId'): mapped_Concepts_Id.append(elem.text)

156

        for elem in root.findall('columnId'): mapped_Column_Id.append(elem.text)

157

158

        num_mapped_Concepts = len(mapped_Concepts_Id)

159

160

        ## Record the last mapping

161

        ## Num of last mapped concepts

162

        towrite = "\n<!--Last mapping:-->\n <!--mappedColumns-->\n"

163

        fileOut.write(towrite)

164

        towrite = " <totNum>"+str(num_mapped_Concepts)+"</totNum>\n"

165

        fileOut.write(towrite)

166

167

        for i,item in enumerate(mapped_Column_Id):

168

            towrite=" <columnId>"+str(item)+"</columnId>\n"

169

            fileOut.write(towrite)

170

            towrite=" <columnAlias>"+str(column[item])+"</columnAlias>\n"

171

            fileOut.write(towrite)

172

            towrite=" <conceptId>"+str(mapped_Concepts_Id[i])+"</conceptId>\n"

173

            fileOut.write(towrite)

174

175

        fileOut.write(" <!--/mappedColumns-->\n</dataset>")

176

        fileOut.close()

177

        print "   - Current status and mapping of %s written in %s\n" % (dataset_name, fileDataset)

178

179

def check_status():

180

181

    today = date.today().strftime("%A %d. %B %Y")

182

    now = time.strftime("%H:%M:%S")

183

    check_datime = today +' at '+ now

184

    print "\n#\n#Last check done on", check_datime,"\n#"

185

    print "\n> Checking datasets properties ..."

186

    command = "ls "+file_path+sitools2_xml_filenames

187

    intXml_list = os.popen(command).readlines()

188

    error_status, error_mapping, warning = False, False, False

189

    email_body = "<br></br><i>Outcome of the datasets check done on %s</i>" % check_datime

190

    for item in intXml_list:

191

        item=item.strip()

192

        email_body+= "\n"

193

        tree = ET.parse(item)

194

        currentRoot = tree.getroot()

195

196

        dataset_name = currentRoot.find('name').text

197

        filename_dataset = file_path+dataset_name+'.lastStatus.xml'

198

        print "\n   - ",dataset_name

199

200

        tree = ET.parse(filename_dataset)

201

        lastRoot = tree.getroot()

202

203

        #Start the check ...

204

205

        ##************  Status  ************##

206

207

        lastStatus = lastRoot.find('lastStatus').text

208

        currentStatus = currentRoot.find('status').text

209

        output_check = True

210

        showStatus = ""

211

        output_message = ""

212

213

        if lastStatus != currentStatus:

214

            if currentStatus == 'INACTIVE':

215

                output_check = bcolors.FAIL+"[FAIL]"+bcolors.ENDC

216

                output_message =  "\n     "+bcolors.FAIL+_ERROR_CODE[1]+bcolors.ENDC

217

                output_message += "\n     Current status: "+bcolors.FAIL+currentStatus+bcolors.ENDC+" -  last status: "+bcolors.FAIL+lastStatus+bcolors.ENDC

218

                email_body+="<h3>"+dataset_name+"</h3><b>*** "+_ERROR_CODE[1]+" ***</b><p>Current status: <b>"+currentStatus+"</b> , last status: <b>"+lastStatus+"</b>."

219

            else:

220

                warning = True

221

                output_check = bcolors.WARNING+"[FAIL]"+bcolors.ENDC

222

                output_message =  "\n     "+bcolors.WARNING+_ERROR_CODE[1]+bcolors.ENDC

223

                output_message += "\n     Current status: "+bcolors.WARNING+currentStatus+bcolors.ENDC+" -  last status: "+bcolors.WARNING+lastStatus+bcolors.ENDC

224

                email_body+="<h3>"+dataset_name+"</h3><b>"+_ERROR_CODE[1]+"</b><p>Current status: <b>"+currentStatus+"</b> , last status: <b>"+lastStatus+"</b>."

225

226

            error_status = True

227

            showStatus=""

228

229

        else:

230

            output_check = bcolors.OKGREEN+"[OK]"+bcolors.ENDC

231

            showStatus="   -   "+currentStatus

232

233

        print '   {0:20s} {1:7s}'.format('Status check ...', output_check)+showStatus+output_message

234

235

236

        ##************  Mapping  ************##

237

238

        ## Last mapped concepts

239

        #num_last_mapped_Concepts = int(lastRoot.find('totNum').text)

240

        last_current_mapped_Column_Id = []

241

        for elem in lastRoot.findall('columnId'): last_current_mapped_Column_Id.append(elem.text)

242

243

        ## Current mapped concepts

244

        current_mapped_Column_Id = []

245

        for elem in currentRoot.findall('columnId'): current_mapped_Column_Id.append(elem.text)

246

247

        output_check = ""

248

249

        ## Find the columns that lost the mapping

250

        # Create dictionary from current mapping - this does not change from last/current files

251

        column = create_id_alias_dict(currentRoot)

252

253

        # Columns mapped in the last file, but not in the current one --> Mapping lost!

254

        missing_current_columnAlias = [column[columnId] for columnId in last_current_mapped_Column_Id if columnId not in current_mapped_Column_Id]

255

        # Columns mapped in the current file, but not in the last one --> User's error: *.lastStatus.xml probably not updated

256

        missing_last_columnAlias = [column[columnId] for columnId in current_mapped_Column_Id if columnId not in last_current_mapped_Column_Id]

257

258

        output_message = ""

259

260

        if len(missing_current_columnAlias) == 0 and len(missing_last_columnAlias) == 0:

261

            output_check = bcolors.OKGREEN+"[OK]"+bcolors.ENDC

262

        elif len(missing_last_columnAlias) > 0:

263

            warning = True

264

            output_check = bcolors.WARNING+"[FAIL]"+bcolors.ENDC

265

            output_message =  "\n     "+bcolors.WARNING+_ERROR_CODE[2]+bcolors.ENDC

266

            output_message += "\n     Some columns are mapped in the new version, but not in the last one: "+bcolors.WARNING+str(missing_last_columnAlias)+bcolors.ENDC

267

            email_body += "<h3>"+dataset_name+"</h3><b>"+_ERROR_CODE[2]+"</b><p> Some columns are mapped in the new version, but not in the last one: "+str(missing_last_columnAlias)+"</p>"

268

        elif len(missing_current_columnAlias) > 0 and len(missing_last_columnAlias) == 0:

269

            output_check = bcolors.FAIL+"[FAIL]"+bcolors.ENDC

270

            output_message =  "\n     "+bcolors.FAIL+_ERROR_CODE[2]+bcolors.ENDC

271

            output_message += "\n     The following column(s) is(are) not mapped anymore: "+bcolors.FAIL+str(missing_current_columnAlias)+bcolors.ENDC

272

            email_body += "<h3>"+dataset_name+"</h3><b>*** "+_ERROR_CODE[2]+" ***</b><p> The following column(s) is(are) not mapped anymore: <b>"+str(missing_current_columnAlias)+"</b></p>"

273

274

            error_mapping = True

275

276

        print '   {0:20s} {1:7s}'.format('Mapping check ...', output_check)+output_message

277

278

    print "\n__________________________________________________\n\n> Outcome of the check process:"

279

    email_body += "<p>__________________________________________________</p><p>The check process has produced the following message:"

280

281

    ## Send an e-mail alert if any error/inconsistency is found

282

    if error_status or error_mapping or warning:

283

        if warning:

284

            print "\n   "+bcolors.WARNING+_ERROR_CODE[3]+bcolors.ENDC

285

            email_body += "<p></p><i><h4>"+_ERROR_CODE[3]+'</h4></i></p>'

286

        else:

287

            print bcolors.FAIL+'\n   Unexpected errors have been found! Please check the datasets properties in Sitools2.'+bcolors.ENDC

288

            email_body += "<p></p><h4>Unexpected errors have been found! Please check the datasets properties in Sitools2.</h4></br></p>"

289

290

        send_alert_mail(email_body)

291

        print "\n\t>> !! ALERT E-MAIL SENT !! <<\n"

292

293

        #The alert mail text is stored in an *log.html file for keeping track of the found errors

294

        errors_log_name = file_path+'monitoring_errors.log.html'

295

        errors_log_file = open(errors_log_name, 'a')

296

        errors_log_file.write(email_body)

297

        errors_log_file.close()

298

299

    else:

300

        print bcolors.OKGREEN+'\n   No errors/inconsistencies found.\n'+bcolors.ENDC

if (len(sys.argv) > 1):

305

    option = sys.argv[1]

306

    if option=='--check': check_status()

307

    elif option=='--record':

308

        overwrite = raw_input(bcolors.WARNING+"\n  This option will overwrite the current recorded settings. Do you really want to proceed?:  "+ bcolors.ENDC)

309

        if overwrite in ["Yes","yes","Y","y","oui", "OUI", "Oui"]: record_status()

310

        elif overwrite in ["No","no","N", "n"]:

311

            print "Aborted.\n"

312

            exit(0)

313

    else:

314

        print bcolors.WARNING +  "\n> Sintax:\t$ python dataset_Monitoring.py [OPTION]\n" + bcolors.ENDC

315

        print "Options:\n\n   --check\n\tA status consistency check is performed between the current and last recorded datasets properties."

316

        print "\n   --record\n\tThe current datasets properties (status and mapping) are recorded. NB. This procedure will overwrite the previously recorded entries.\n"

317

        exit(0)

Project

General

Profile

SITools2

git_sitools_idoc / common / monitoring / datasets_Monitoring.py @ master