1
|
|
2
|
|
3
|
|
4
|
|
5
|
|
6
|
|
7
|
|
8
|
|
9
|
|
10
|
|
11
|
|
12
|
|
13
|
|
14
|
|
15
|
|
16
|
|
17
|
|
18
|
|
19
|
|
20
|
|
21
|
'''
|
22
|
This is a script aimed at monitoring the status of the Sitools2 datasets
|
23
|
and the mapping of their fields.
|
24
|
|
25
|
Put the script in the Sitools2 folder data/datasets(/map, for the latest versions)
|
26
|
where the datasets information are stored as xml files.
|
27
|
Then, read and record the current status/mapping, with:
|
28
|
|
29
|
$ datasets_Monitoring --record
|
30
|
|
31
|
The datasets properties are locally stored in files named as:
|
32
|
|
33
|
<dataset_name>.lastStatus.xml
|
34
|
|
35
|
The '--record' option should be executed manually by the administrator every time
|
36
|
one or more datasets are modified (it would be good to add a reminder to the
|
37
|
Sitools2 pop-up message).
|
38
|
|
39
|
To check the datasets mapping/status, launch the script with the '--check' option:
|
40
|
|
41
|
$ datasets_Monitoring --check
|
42
|
|
43
|
This performs a consistency check between the current (int@*.xml or map/string@*.xml)
|
44
|
and the last recorded status (<dataset_name>.lastStatus.xml).
|
45
|
If any inconsistency is found, an alert e-mail is sent.
|
46
|
|
47
|
The '--check' option should be run daily and automatically by the system.
|
48
|
|
49
|
@author: Alessandro NASTASI for IAS -IDOC
|
50
|
@date: 27/04/2015
|
51
|
'''
|
52
|
|
53
|
__author__ = "Alessandro Nastasi"
|
54
|
__credits__ = ["Alessandro Nastasi", "Herve' Ballans"]
|
55
|
__license__ = "GPL"
|
56
|
__version__ = "1.0"
|
57
|
__date__ = "27/04/2015"
|
58
|
|
59
|
import sys,os, time
|
60
|
from datetime import date
|
61
|
import xml.etree.ElementTree as ET
|
62
|
import smtplib
|
63
|
from email.mime.text import MIMEText
|
64
|
|
65
|
sitools2_xml_filenames = "string@*.xml"
|
66
|
file_path='/usr/local/Sitools2_SZ_Cluster_DB/data/datasets/map/'
|
67
|
|
68
|
class bcolors:
|
69
|
HEADER = '\033[95m'
|
70
|
OKBLUE = '\033[94m'
|
71
|
OKGREEN = '\033[92m'
|
72
|
WARNING = '\033[93m'
|
73
|
FAIL = '\033[91m'
|
74
|
ENDC = '\033[0m'
|
75
|
|
76
|
_ERROR_CODE = {
|
77
|
1:'Status inconsistency found:',
|
78
|
2:'Mapping inconsistency found:',
|
79
|
3:"The *.lastStatus.xml files were probably not updated: re-run the script with '--record' option."
|
80
|
}
|
81
|
|
82
|
def create_id_alias_dict(xml_root):
|
83
|
|
84
|
id_alias_dict = {}
|
85
|
for col in xml_root.findall('column'):
|
86
|
column_id = col.find('id').text
|
87
|
column_alias = col.find('columnAlias').text
|
88
|
id_alias_dict[column_id] = column_alias
|
89
|
|
90
|
return id_alias_dict
|
91
|
|
92
|
|
93
|
def send_alert_mail(body):
|
94
|
SMTP_SERVER = 'smtp.ias.u-psud.fr'
|
95
|
SMTP_PORT = 25
|
96
|
|
97
|
sender = 'sitools2.notifier@ias.u-psud.fr'
|
98
|
|
99
|
recipient = 'sitools2@ias.u-psud.fr'
|
100
|
subject = '[Sitools2 - SZDB] Datasets status ALERT'
|
101
|
|
102
|
headers = ["From: "+sender,
|
103
|
"Subject: " + subject,
|
104
|
"To: " + recipient,
|
105
|
"MIME-Version: 1.0",
|
106
|
"Content-Type: text/html"]
|
107
|
headers = "\r\n".join(headers)
|
108
|
|
109
|
session = smtplib.SMTP(SMTP_SERVER, SMTP_PORT)
|
110
|
|
111
|
|
112
|
|
113
|
|
114
|
|
115
|
|
116
|
body = MIMEText(body, 'html')
|
117
|
session.sendmail(sender, recipient, headers + "\r\n\r\n" + body.as_string())
|
118
|
session.quit()
|
119
|
|
120
|
def record_status():
|
121
|
print "\n> Recording current datasets properties ...\n"
|
122
|
|
123
|
command = "ls "+file_path+sitools2_xml_filenames
|
124
|
intXml_list = os.popen(command).readlines()
|
125
|
|
126
|
for item in intXml_list:
|
127
|
item=item.strip()
|
128
|
|
129
|
tree = ET.parse(item)
|
130
|
root = tree.getroot()
|
131
|
|
132
|
dataset_name = root.find('name').text
|
133
|
fileDataset = file_path+dataset_name+'.lastStatus.xml'
|
134
|
fileOut = open(fileDataset, 'w')
|
135
|
|
136
|
|
137
|
today = date.today().strftime("%A %d. %B %Y")
|
138
|
towrite = "<!--File recorded on "+str(today)+"-->\n"
|
139
|
fileOut.write(towrite)
|
140
|
|
141
|
|
142
|
towrite = "<dataset>\n"
|
143
|
fileOut.write(towrite)
|
144
|
towrite = "<!--Last Dataset status:-->\n"
|
145
|
fileOut.write(towrite)
|
146
|
status = root.find('status').text
|
147
|
fileOut.write(" <lastStatus>"+status+"</lastStatus>")
|
148
|
|
149
|
|
150
|
column = create_id_alias_dict(root)
|
151
|
|
152
|
|
153
|
mapped_Concepts_Id = []
|
154
|
mapped_Column_Id = []
|
155
|
for elem in root.findall('conceptId'): mapped_Concepts_Id.append(elem.text)
|
156
|
for elem in root.findall('columnId'): mapped_Column_Id.append(elem.text)
|
157
|
|
158
|
num_mapped_Concepts = len(mapped_Concepts_Id)
|
159
|
|
160
|
|
161
|
|
162
|
towrite = "\n<!--Last mapping:-->\n <!--mappedColumns-->\n"
|
163
|
fileOut.write(towrite)
|
164
|
towrite = " <totNum>"+str(num_mapped_Concepts)+"</totNum>\n"
|
165
|
fileOut.write(towrite)
|
166
|
|
167
|
for i,item in enumerate(mapped_Column_Id):
|
168
|
towrite=" <columnId>"+str(item)+"</columnId>\n"
|
169
|
fileOut.write(towrite)
|
170
|
towrite=" <columnAlias>"+str(column[item])+"</columnAlias>\n"
|
171
|
fileOut.write(towrite)
|
172
|
towrite=" <conceptId>"+str(mapped_Concepts_Id[i])+"</conceptId>\n"
|
173
|
fileOut.write(towrite)
|
174
|
|
175
|
fileOut.write(" <!--/mappedColumns-->\n</dataset>")
|
176
|
fileOut.close()
|
177
|
print " - Current status and mapping of %s written in %s\n" % (dataset_name, fileDataset)
|
178
|
|
179
|
def check_status():
|
180
|
|
181
|
today = date.today().strftime("%A %d. %B %Y")
|
182
|
now = time.strftime("%H:%M:%S")
|
183
|
check_datime = today +' at '+ now
|
184
|
print "\n#\n#Last check done on", check_datime,"\n#"
|
185
|
print "\n> Checking datasets properties ..."
|
186
|
command = "ls "+file_path+sitools2_xml_filenames
|
187
|
intXml_list = os.popen(command).readlines()
|
188
|
error_status, error_mapping, warning = False, False, False
|
189
|
email_body = "<br></br><i>Outcome of the datasets check done on %s</i>" % check_datime
|
190
|
for item in intXml_list:
|
191
|
item=item.strip()
|
192
|
email_body+= "\n"
|
193
|
tree = ET.parse(item)
|
194
|
currentRoot = tree.getroot()
|
195
|
|
196
|
dataset_name = currentRoot.find('name').text
|
197
|
filename_dataset = file_path+dataset_name+'.lastStatus.xml'
|
198
|
print "\n - ",dataset_name
|
199
|
|
200
|
tree = ET.parse(filename_dataset)
|
201
|
lastRoot = tree.getroot()
|
202
|
|
203
|
|
204
|
|
205
|
|
206
|
|
207
|
lastStatus = lastRoot.find('lastStatus').text
|
208
|
currentStatus = currentRoot.find('status').text
|
209
|
output_check = True
|
210
|
showStatus = ""
|
211
|
output_message = ""
|
212
|
|
213
|
if lastStatus != currentStatus:
|
214
|
if currentStatus == 'INACTIVE':
|
215
|
output_check = bcolors.FAIL+"[FAIL]"+bcolors.ENDC
|
216
|
output_message = "\n "+bcolors.FAIL+_ERROR_CODE[1]+bcolors.ENDC
|
217
|
output_message += "\n Current status: "+bcolors.FAIL+currentStatus+bcolors.ENDC+" - last status: "+bcolors.FAIL+lastStatus+bcolors.ENDC
|
218
|
email_body+="<h3>"+dataset_name+"</h3><b>*** "+_ERROR_CODE[1]+" ***</b><p>Current status: <b>"+currentStatus+"</b> , last status: <b>"+lastStatus+"</b>."
|
219
|
else:
|
220
|
warning = True
|
221
|
output_check = bcolors.WARNING+"[FAIL]"+bcolors.ENDC
|
222
|
output_message = "\n "+bcolors.WARNING+_ERROR_CODE[1]+bcolors.ENDC
|
223
|
output_message += "\n Current status: "+bcolors.WARNING+currentStatus+bcolors.ENDC+" - last status: "+bcolors.WARNING+lastStatus+bcolors.ENDC
|
224
|
email_body+="<h3>"+dataset_name+"</h3><b>"+_ERROR_CODE[1]+"</b><p>Current status: <b>"+currentStatus+"</b> , last status: <b>"+lastStatus+"</b>."
|
225
|
|
226
|
error_status = True
|
227
|
showStatus=""
|
228
|
|
229
|
else:
|
230
|
output_check = bcolors.OKGREEN+"[OK]"+bcolors.ENDC
|
231
|
showStatus=" - "+currentStatus
|
232
|
|
233
|
print ' {0:20s} {1:7s}'.format('Status check ...', output_check)+showStatus+output_message
|
234
|
|
235
|
|
236
|
|
237
|
|
238
|
|
239
|
|
240
|
last_current_mapped_Column_Id = []
|
241
|
for elem in lastRoot.findall('columnId'): last_current_mapped_Column_Id.append(elem.text)
|
242
|
|
243
|
|
244
|
current_mapped_Column_Id = []
|
245
|
for elem in currentRoot.findall('columnId'): current_mapped_Column_Id.append(elem.text)
|
246
|
|
247
|
output_check = ""
|
248
|
|
249
|
|
250
|
|
251
|
column = create_id_alias_dict(currentRoot)
|
252
|
|
253
|
|
254
|
missing_current_columnAlias = [column[columnId] for columnId in last_current_mapped_Column_Id if columnId not in current_mapped_Column_Id]
|
255
|
|
256
|
missing_last_columnAlias = [column[columnId] for columnId in current_mapped_Column_Id if columnId not in last_current_mapped_Column_Id]
|
257
|
|
258
|
output_message = ""
|
259
|
|
260
|
if len(missing_current_columnAlias) == 0 and len(missing_last_columnAlias) == 0:
|
261
|
output_check = bcolors.OKGREEN+"[OK]"+bcolors.ENDC
|
262
|
elif len(missing_last_columnAlias) > 0:
|
263
|
warning = True
|
264
|
output_check = bcolors.WARNING+"[FAIL]"+bcolors.ENDC
|
265
|
output_message = "\n "+bcolors.WARNING+_ERROR_CODE[2]+bcolors.ENDC
|
266
|
output_message += "\n Some columns are mapped in the new version, but not in the last one: "+bcolors.WARNING+str(missing_last_columnAlias)+bcolors.ENDC
|
267
|
email_body += "<h3>"+dataset_name+"</h3><b>"+_ERROR_CODE[2]+"</b><p> Some columns are mapped in the new version, but not in the last one: "+str(missing_last_columnAlias)+"</p>"
|
268
|
elif len(missing_current_columnAlias) > 0 and len(missing_last_columnAlias) == 0:
|
269
|
output_check = bcolors.FAIL+"[FAIL]"+bcolors.ENDC
|
270
|
output_message = "\n "+bcolors.FAIL+_ERROR_CODE[2]+bcolors.ENDC
|
271
|
output_message += "\n The following column(s) is(are) not mapped anymore: "+bcolors.FAIL+str(missing_current_columnAlias)+bcolors.ENDC
|
272
|
email_body += "<h3>"+dataset_name+"</h3><b>*** "+_ERROR_CODE[2]+" ***</b><p> The following column(s) is(are) not mapped anymore: <b>"+str(missing_current_columnAlias)+"</b></p>"
|
273
|
|
274
|
error_mapping = True
|
275
|
|
276
|
print ' {0:20s} {1:7s}'.format('Mapping check ...', output_check)+output_message
|
277
|
|
278
|
print "\n__________________________________________________\n\n> Outcome of the check process:"
|
279
|
email_body += "<p>__________________________________________________</p><p>The check process has produced the following message:"
|
280
|
|
281
|
|
282
|
if error_status or error_mapping or warning:
|
283
|
if warning:
|
284
|
print "\n "+bcolors.WARNING+_ERROR_CODE[3]+bcolors.ENDC
|
285
|
email_body += "<p></p><i><h4>"+_ERROR_CODE[3]+'</h4></i></p>'
|
286
|
else:
|
287
|
print bcolors.FAIL+'\n Unexpected errors have been found! Please check the datasets properties in Sitools2.'+bcolors.ENDC
|
288
|
email_body += "<p></p><h4>Unexpected errors have been found! Please check the datasets properties in Sitools2.</h4></br></p>"
|
289
|
|
290
|
send_alert_mail(email_body)
|
291
|
print "\n\t>> !! ALERT E-MAIL SENT !! <<\n"
|
292
|
|
293
|
|
294
|
errors_log_name = file_path+'monitoring_errors.log.html'
|
295
|
errors_log_file = open(errors_log_name, 'a')
|
296
|
errors_log_file.write(email_body)
|
297
|
errors_log_file.close()
|
298
|
|
299
|
else:
|
300
|
print bcolors.OKGREEN+'\n No errors/inconsistencies found.\n'+bcolors.ENDC
|
301
|
|
302
|
|
303
|
|
304
|
if (len(sys.argv) > 1):
|
305
|
option = sys.argv[1]
|
306
|
if option=='--check': check_status()
|
307
|
elif option=='--record':
|
308
|
overwrite = raw_input(bcolors.WARNING+"\n This option will overwrite the current recorded settings. Do you really want to proceed?: "+ bcolors.ENDC)
|
309
|
if overwrite in ["Yes","yes","Y","y","oui", "OUI", "Oui"]: record_status()
|
310
|
elif overwrite in ["No","no","N", "n"]:
|
311
|
print "Aborted.\n"
|
312
|
exit(0)
|
313
|
else:
|
314
|
print bcolors.WARNING + "\n> Sintax:\t$ python dataset_Monitoring.py [OPTION]\n" + bcolors.ENDC
|
315
|
print "Options:\n\n --check\n\tA status consistency check is performed between the current and last recorded datasets properties."
|
316
|
print "\n --record\n\tThe current datasets properties (status and mapping) are recorded. NB. This procedure will overwrite the previously recorded entries.\n"
|
317
|
exit(0)
|