Project

General

Profile

Download (13.5 KB) Statistics
| Branch: | Revision:

git_sitools_idoc / common / monitoring / datasets_Monitoring.py @ d16caf89

1
#! /usr/bin/python
2

    
3
# ******************************************************************************
4
#    Copyright 2015 IAS - IDOC
5
#
6
#    This program is free software: you can redistribute it and/or modify
7
#    it under the terms of the GNU General Public License as published by
8
#    the Free Software Foundation, either version 3 of the License, or
9
#    (at your option) any later version.
10
#
11
#    This program is distributed in the hope that it will be useful,
12
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
13
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
#    GNU General Public License for more details.
15
#
16
#    You should have received a copy of the GNU General Public License
17
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
18
#
19
# ******************************************************************************
20

    
21
'''
22
This is a script aimed at monitoring the status of the Sitools2 datasets
23
and the mapping of their fields.
24

25
Put the script in the Sitools2 folder data/datasets(/map, for the latest versions)
26
where the datasets information are stored as xml files. 
27
Then, read and record the current status/mapping, with:
28

29
$ datasets_Monitoring --record 
30

31
The datasets properties are locally stored in files named as:
32

33
        <dataset_name>.lastStatus.xml
34

35
The '--record' option should be executed manually by the administrator every time 
36
one or more datasets are modified (it would be good to add a reminder to the 
37
Sitools2 pop-up message). 
38

39
To check the datasets mapping/status, launch the script with the '--check' option:
40

41
$ datasets_Monitoring --check 
42

43
This performs a consistency check between the current (int@*.xml or map/string@*.xml) 
44
and the last recorded status (<dataset_name>.lastStatus.xml).
45
If any inconsistency is found, an alert e-mail is sent.
46

47
The '--check' option should be run daily and automatically by the system.
48

49
@author: Alessandro NASTASI for IAS -IDOC
50
@date: 27/04/2015
51
'''
52

    
53
__author__ = "Alessandro Nastasi"
54
__credits__ = ["Alessandro Nastasi", "Herve' Ballans"]
55
__license__ = "GPL"
56
__version__ = "1.0"
57
__date__ = "27/04/2015"
58

    
59
import sys,os, time
60
from datetime import date
61
import xml.etree.ElementTree as ET
62
import smtplib
63
from email.mime.text import MIMEText
64

    
65
sitools2_xml_filenames = "string@*.xml"                                   ## Files where Sitools stores datasets information
66
file_path='/usr/local/Sitools2_SZ_Cluster_DB/data/datasets/map/'         ## Path where the (string@*.xml) sitools files are stored
67

    
68
class bcolors:
69
    HEADER = '\033[95m'
70
    OKBLUE = '\033[94m'
71
    OKGREEN = '\033[92m'
72
    WARNING = '\033[93m'
73
    FAIL = '\033[91m'
74
    ENDC = '\033[0m'
75

    
76
_ERROR_CODE = {
77
    1:'Status inconsistency found:',
78
    2:'Mapping inconsistency found:',
79
    3:"The *.lastStatus.xml files were probably not updated: re-run the script with '--record' option."
80
              }
81

    
82
def create_id_alias_dict(xml_root):
83
    ## Column dictionary " column_id : column_alias ". No mapping information here.
84
    id_alias_dict = {}
85
    for col in xml_root.findall('column'): 
86
        column_id = col.find('id').text
87
        column_alias = col.find('columnAlias').text
88
        id_alias_dict[column_id] = column_alias
89
        
90
    return id_alias_dict
91
    
92
    
93
def send_alert_mail(body):     
94
    SMTP_SERVER = 'smtp.ias.u-psud.fr'
95
    SMTP_PORT = 25
96
     
97
    sender = 'sitools2.notifier@ias.u-psud.fr'
98
    #password = ''
99
    recipient = 'sitools2@ias.u-psud.fr'
100
    subject = '[Sitools2 - SZDB] Datasets status ALERT'
101
         
102
    headers = ["From: "+sender,
103
               "Subject: " + subject,
104
               "To: " + recipient,
105
               "MIME-Version: 1.0",
106
               "Content-Type: text/html"]
107
    headers = "\r\n".join(headers)
108
     
109
    session = smtplib.SMTP(SMTP_SERVER, SMTP_PORT)
110
     
111
    #session.ehlo()
112
    #session.starttls()
113
    #session.ehlo
114
    #session.login(sender, password)
115
        
116
    body = MIMEText(body, 'html')
117
    session.sendmail(sender, recipient, headers + "\r\n\r\n" + body.as_string())
118
    session.quit()
119

    
120
def record_status():
121
    print "\n> Recording current datasets properties ...\n"
122
    
123
    command = "ls "+file_path+sitools2_xml_filenames
124
    intXml_list = os.popen(command).readlines()
125
    
126
    for item in intXml_list:
127
        item=item.strip()
128
        
129
        tree = ET.parse(item)
130
        root = tree.getroot()
131
    
132
        dataset_name = root.find('name').text
133
        fileDataset = file_path+dataset_name+'.lastStatus.xml'
134
        fileOut = open(fileDataset, 'w')
135
    
136
        ##Header in recorded file
137
        today = date.today().strftime("%A %d. %B %Y")
138
        towrite = "<!--File recorded on "+str(today)+"-->\n"
139
        fileOut.write(towrite)
140
    
141
        ##Record the last status of the Dataset
142
        towrite = "<dataset>\n"
143
        fileOut.write(towrite)
144
        towrite = "<!--Last Dataset status:-->\n"
145
        fileOut.write(towrite)
146
        status = root.find('status').text
147
        fileOut.write(" <lastStatus>"+status+"</lastStatus>")
148

    
149
        ## Column dictionary " column_id : column_alias ". No mapping information here.            
150
        column = create_id_alias_dict(root)
151
        
152
        ## Num of mapped concepts
153
        mapped_Concepts_Id = []
154
        mapped_Column_Id = []    
155
        for elem in root.findall('conceptId'): mapped_Concepts_Id.append(elem.text)
156
        for elem in root.findall('columnId'): mapped_Column_Id.append(elem.text)
157
            
158
        num_mapped_Concepts = len(mapped_Concepts_Id)
159
        
160
        ## Record the last mapping
161
        ## Num of last mapped concepts
162
        towrite = "\n<!--Last mapping:-->\n <!--mappedColumns-->\n"
163
        fileOut.write(towrite)
164
        towrite = " <totNum>"+str(num_mapped_Concepts)+"</totNum>\n"
165
        fileOut.write(towrite)
166
        
167
        for i,item in enumerate(mapped_Column_Id):
168
            towrite=" <columnId>"+str(item)+"</columnId>\n"
169
            fileOut.write(towrite)
170
            towrite=" <columnAlias>"+str(column[item])+"</columnAlias>\n"
171
            fileOut.write(towrite)
172
            towrite=" <conceptId>"+str(mapped_Concepts_Id[i])+"</conceptId>\n"
173
            fileOut.write(towrite)
174
      
175
        fileOut.write(" <!--/mappedColumns-->\n</dataset>")
176
        fileOut.close()
177
        print "   - Current status and mapping of %s written in %s\n" % (dataset_name, fileDataset)
178

    
179
def check_status():
180
    
181
    today = date.today().strftime("%A %d. %B %Y")
182
    now = time.strftime("%H:%M:%S")
183
    check_datime = today +' at '+ now
184
    print "\n#\n#Last check done on", check_datime,"\n#"
185
    print "\n> Checking datasets properties ..."
186
    command = "ls "+file_path+sitools2_xml_filenames
187
    intXml_list = os.popen(command).readlines()
188
    error_status, error_mapping, warning = False, False, False
189
    email_body = "<br></br><i>Outcome of the datasets check done on %s</i>" % check_datime
190
    for item in intXml_list:
191
        item=item.strip()
192
        email_body+= "\n"
193
        tree = ET.parse(item)
194
        currentRoot = tree.getroot()
195
  
196
        dataset_name = currentRoot.find('name').text
197
        filename_dataset = file_path+dataset_name+'.lastStatus.xml'
198
        print "\n   - ",dataset_name
199

    
200
        tree = ET.parse(filename_dataset)
201
        lastRoot = tree.getroot()
202

    
203
        #Start the check ...
204

    
205
        ##************  Status  ************##
206

    
207
        lastStatus = lastRoot.find('lastStatus').text
208
        currentStatus = currentRoot.find('status').text
209
        output_check = True
210
        showStatus = ""
211
        output_message = ""
212

    
213
        if lastStatus != currentStatus:
214
            if currentStatus == 'INACTIVE':
215
                output_check = bcolors.FAIL+"[FAIL]"+bcolors.ENDC
216
                output_message =  "\n     "+bcolors.FAIL+_ERROR_CODE[1]+bcolors.ENDC
217
                output_message += "\n     Current status: "+bcolors.FAIL+currentStatus+bcolors.ENDC+" -  last status: "+bcolors.FAIL+lastStatus+bcolors.ENDC
218
                email_body+="<h3>"+dataset_name+"</h3><b>*** "+_ERROR_CODE[1]+" ***</b><p>Current status: <b>"+currentStatus+"</b> , last status: <b>"+lastStatus+"</b>."
219
            else:
220
                warning = True
221
                output_check = bcolors.WARNING+"[FAIL]"+bcolors.ENDC
222
                output_message =  "\n     "+bcolors.WARNING+_ERROR_CODE[1]+bcolors.ENDC
223
                output_message += "\n     Current status: "+bcolors.WARNING+currentStatus+bcolors.ENDC+" -  last status: "+bcolors.WARNING+lastStatus+bcolors.ENDC
224
                email_body+="<h3>"+dataset_name+"</h3><b>"+_ERROR_CODE[1]+"</b><p>Current status: <b>"+currentStatus+"</b> , last status: <b>"+lastStatus+"</b>."
225
                
226
            error_status = True
227
            showStatus=""                
228
            
229
        else: 
230
            output_check = bcolors.OKGREEN+"[OK]"+bcolors.ENDC
231
            showStatus="   -   "+currentStatus
232
        
233
        print '   {0:20s} {1:7s}'.format('Status check ...', output_check)+showStatus+output_message
234
            
235

    
236
        ##************  Mapping  ************##
237

    
238
        ## Last mapped concepts
239
        #num_last_mapped_Concepts = int(lastRoot.find('totNum').text)
240
        last_current_mapped_Column_Id = []
241
        for elem in lastRoot.findall('columnId'): last_current_mapped_Column_Id.append(elem.text)
242
        
243
        ## Current mapped concepts
244
        current_mapped_Column_Id = [] 
245
        for elem in currentRoot.findall('columnId'): current_mapped_Column_Id.append(elem.text)
246
            
247
        output_check = ""
248
        
249
        ## Find the columns that lost the mapping
250
        # Create dictionary from current mapping - this does not change from last/current files
251
        column = create_id_alias_dict(currentRoot)            
252

    
253
        # Columns mapped in the last file, but not in the current one --> Mapping lost!
254
        missing_current_columnAlias = [column[columnId] for columnId in last_current_mapped_Column_Id if columnId not in current_mapped_Column_Id]
255
        # Columns mapped in the current file, but not in the last one --> User's error: *.lastStatus.xml probably not updated
256
        missing_last_columnAlias = [column[columnId] for columnId in current_mapped_Column_Id if columnId not in last_current_mapped_Column_Id]
257

    
258
        output_message = ""
259

    
260
        if len(missing_current_columnAlias) == 0 and len(missing_last_columnAlias) == 0:
261
            output_check = bcolors.OKGREEN+"[OK]"+bcolors.ENDC
262
        elif len(missing_last_columnAlias) > 0:
263
            warning = True
264
            output_check = bcolors.WARNING+"[FAIL]"+bcolors.ENDC
265
            output_message =  "\n     "+bcolors.WARNING+_ERROR_CODE[2]+bcolors.ENDC
266
            output_message += "\n     Some columns are mapped in the new version, but not in the last one: "+bcolors.WARNING+str(missing_last_columnAlias)+bcolors.ENDC
267
            email_body += "<h3>"+dataset_name+"</h3><b>"+_ERROR_CODE[2]+"</b><p> Some columns are mapped in the new version, but not in the last one: "+str(missing_last_columnAlias)+"</p>"
268
        elif len(missing_current_columnAlias) > 0 and len(missing_last_columnAlias) == 0:
269
            output_check = bcolors.FAIL+"[FAIL]"+bcolors.ENDC
270
            output_message =  "\n     "+bcolors.FAIL+_ERROR_CODE[2]+bcolors.ENDC
271
            output_message += "\n     The following column(s) is(are) not mapped anymore: "+bcolors.FAIL+str(missing_current_columnAlias)+bcolors.ENDC
272
            email_body += "<h3>"+dataset_name+"</h3><b>*** "+_ERROR_CODE[2]+" ***</b><p> The following column(s) is(are) not mapped anymore: <b>"+str(missing_current_columnAlias)+"</b></p>"
273

    
274
            error_mapping = True
275
            
276
        print '   {0:20s} {1:7s}'.format('Mapping check ...', output_check)+output_message
277

    
278
    print "\n__________________________________________________\n\n> Outcome of the check process:"
279
    email_body += "<p>__________________________________________________</p><p>The check process has produced the following message:"
280
    
281
    ## Send an e-mail alert if any error/inconsistency is found
282
    if error_status or error_mapping or warning:
283
        if warning:
284
            print "\n   "+bcolors.WARNING+_ERROR_CODE[3]+bcolors.ENDC
285
            email_body += "<p></p><i><h4>"+_ERROR_CODE[3]+'</h4></i></p>'
286
        else:
287
            print bcolors.FAIL+'\n   Unexpected errors have been found! Please check the datasets properties in Sitools2.'+bcolors.ENDC
288
            email_body += "<p></p><h4>Unexpected errors have been found! Please check the datasets properties in Sitools2.</h4></br></p>"
289
            
290
        send_alert_mail(email_body)            
291
        print "\n\t>> !! ALERT E-MAIL SENT !! <<\n"           
292

    
293
        #The alert mail text is stored in an *log.html file for keeping track of the found errors
294
        errors_log_name = file_path+'monitoring_errors.log.html'
295
        errors_log_file = open(errors_log_name, 'a')
296
        errors_log_file.write(email_body)
297
        errors_log_file.close()
298

    
299
    else:
300
        print bcolors.OKGREEN+'\n   No errors/inconsistencies found.\n'+bcolors.ENDC
301

    
302

    
303
            
304
if (len(sys.argv) > 1):
305
    option = sys.argv[1]
306
    if option=='--check': check_status()
307
    elif option=='--record':
308
        overwrite = raw_input(bcolors.WARNING+"\n  This option will overwrite the current recorded settings. Do you really want to proceed?:  "+ bcolors.ENDC)
309
        if overwrite in ["Yes","yes","Y","y","oui", "OUI", "Oui"]: record_status()
310
        elif overwrite in ["No","no","N", "n"]: 
311
            print "Aborted.\n"
312
            exit(0)
313
    else:
314
        print bcolors.WARNING +  "\n> Sintax:\t$ python dataset_Monitoring.py [OPTION]\n" + bcolors.ENDC
315
        print "Options:\n\n   --check\n\tA status consistency check is performed between the current and last recorded datasets properties."
316
        print "\n   --record\n\tThe current datasets properties (status and mapping) are recorded. NB. This procedure will overwrite the previously recorded entries.\n"
317
        exit(0)