|
1 |
#! /usr/bin/python
|
|
2 |
|
|
3 |
# ******************************************************************************
|
|
4 |
# Copyright 2015 IAS - IDOC
|
|
5 |
#
|
|
6 |
# This program is free software: you can redistribute it and/or modify
|
|
7 |
# it under the terms of the GNU General Public License as published by
|
|
8 |
# the Free Software Foundation, either version 3 of the License, or
|
|
9 |
# (at your option) any later version.
|
|
10 |
#
|
|
11 |
# This program is distributed in the hope that it will be useful,
|
|
12 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
14 |
# GNU General Public License for more details.
|
|
15 |
#
|
|
16 |
# You should have received a copy of the GNU General Public License
|
|
17 |
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
18 |
#
|
|
19 |
# ******************************************************************************
|
|
20 |
|
|
21 |
'''
|
|
22 |
This is a script aimed at monitoring the status of the Sitools2 datasets
|
|
23 |
and the mapping of their fields.
|
|
24 |
|
|
25 |
Put the script in the Sitools2 folder data/datasets(/map, for the latest versions)
|
|
26 |
where the datasets information are stored as xml files.
|
|
27 |
Then, read and record the current status/mapping, with:
|
|
28 |
|
|
29 |
$ datasets_Monitoring --record
|
|
30 |
|
|
31 |
The datasets properties are locally stored in files named as:
|
|
32 |
|
|
33 |
<dataset_name>.lastStatus.xml
|
|
34 |
|
|
35 |
The '--record' option should be executed manually by the administrator every time
|
|
36 |
one or more datasets are modified (it would be good to add a reminder to the
|
|
37 |
Sitools2 pop-up message).
|
|
38 |
|
|
39 |
To check the datasets mapping/status, launch the script with the '--check' option:
|
|
40 |
|
|
41 |
$ datasets_Monitoring --check
|
|
42 |
|
|
43 |
This performs a consistency check between the current (int@*.xml or map/string@*.xml)
|
|
44 |
and the last recorded status (<dataset_name>.lastStatus.xml).
|
|
45 |
If any inconsistency is found, an alert e-mail is sent.
|
|
46 |
|
|
47 |
The '--check' option should be run daily and automatically by the system.
|
|
48 |
|
|
49 |
@author: Alessandro NASTASI for IAS -IDOC
|
|
50 |
@date: 27/04/2015
|
|
51 |
'''
|
|
52 |
|
|
53 |
__author__ = "Alessandro Nastasi"
|
|
54 |
__credits__ = ["Alessandro Nastasi", "Herve' Ballans"]
|
|
55 |
__license__ = "GPL"
|
|
56 |
__version__ = "1.0"
|
|
57 |
__date__ = "27/04/2015"
|
|
58 |
|
|
59 |
import sys,os, time
|
|
60 |
from datetime import date
|
|
61 |
import xml.etree.ElementTree as ET
|
|
62 |
import smtplib
|
|
63 |
from email.mime.text import MIMEText
|
|
64 |
|
|
65 |
sitools2_xml_filenames = "string@*.xml" ## Files where Sitools stores datasets information
|
|
66 |
file_path='/usr/local/Sitools2_SZ_Cluster_DB/data/datasets/map/' ## Path where the (string@*.xml) sitools files are stored
|
|
67 |
|
|
68 |
class bcolors:
|
|
69 |
HEADER = '\033[95m'
|
|
70 |
OKBLUE = '\033[94m'
|
|
71 |
OKGREEN = '\033[92m'
|
|
72 |
WARNING = '\033[93m'
|
|
73 |
FAIL = '\033[91m'
|
|
74 |
ENDC = '\033[0m'
|
|
75 |
|
|
76 |
_ERROR_CODE = {
|
|
77 |
1:'Status inconsistency found:',
|
|
78 |
2:'Mapping inconsistency found:',
|
|
79 |
3:"The *.lastStatus.xml files were probably not updated: re-run the script with '--record' option."
|
|
80 |
}
|
|
81 |
|
|
82 |
def create_id_alias_dict(xml_root):
|
|
83 |
## Column dictionary " column_id : column_alias ". No mapping information here.
|
|
84 |
id_alias_dict = {}
|
|
85 |
for col in xml_root.findall('column'):
|
|
86 |
column_id = col.find('id').text
|
|
87 |
column_alias = col.find('columnAlias').text
|
|
88 |
id_alias_dict[column_id] = column_alias
|
|
89 |
|
|
90 |
return id_alias_dict
|
|
91 |
|
|
92 |
|
|
93 |
def send_alert_mail(body):
|
|
94 |
SMTP_SERVER = 'smtp.ias.u-psud.fr'
|
|
95 |
SMTP_PORT = 25
|
|
96 |
|
|
97 |
sender = 'sitools2.notifier@ias.u-psud.fr'
|
|
98 |
#password = ''
|
|
99 |
recipient = 'sitools2@ias.u-psud.fr'
|
|
100 |
subject = '[Sitools2 - SZDB] Datasets status ALERT'
|
|
101 |
|
|
102 |
headers = ["From: "+sender,
|
|
103 |
"Subject: " + subject,
|
|
104 |
"To: " + recipient,
|
|
105 |
"MIME-Version: 1.0",
|
|
106 |
"Content-Type: text/html"]
|
|
107 |
headers = "\r\n".join(headers)
|
|
108 |
|
|
109 |
session = smtplib.SMTP(SMTP_SERVER, SMTP_PORT)
|
|
110 |
|
|
111 |
#session.ehlo()
|
|
112 |
#session.starttls()
|
|
113 |
#session.ehlo
|
|
114 |
#session.login(sender, password)
|
|
115 |
|
|
116 |
body = MIMEText(body, 'html')
|
|
117 |
session.sendmail(sender, recipient, headers + "\r\n\r\n" + body.as_string())
|
|
118 |
session.quit()
|
|
119 |
|
|
120 |
def record_status():
|
|
121 |
print "\n> Recording current datasets properties ...\n"
|
|
122 |
|
|
123 |
command = "ls "+file_path+sitools2_xml_filenames
|
|
124 |
intXml_list = os.popen(command).readlines()
|
|
125 |
|
|
126 |
for item in intXml_list:
|
|
127 |
item=item.strip()
|
|
128 |
|
|
129 |
tree = ET.parse(item)
|
|
130 |
root = tree.getroot()
|
|
131 |
|
|
132 |
dataset_name = root.find('name').text
|
|
133 |
fileDataset = file_path+dataset_name+'.lastStatus.xml'
|
|
134 |
fileOut = open(fileDataset, 'w')
|
|
135 |
|
|
136 |
##Header in recorded file
|
|
137 |
today = date.today().strftime("%A %d. %B %Y")
|
|
138 |
towrite = "<!--File recorded on "+str(today)+"-->\n"
|
|
139 |
fileOut.write(towrite)
|
|
140 |
|
|
141 |
##Record the last status of the Dataset
|
|
142 |
towrite = "<dataset>\n"
|
|
143 |
fileOut.write(towrite)
|
|
144 |
towrite = "<!--Last Dataset status:-->\n"
|
|
145 |
fileOut.write(towrite)
|
|
146 |
status = root.find('status').text
|
|
147 |
fileOut.write(" <lastStatus>"+status+"</lastStatus>")
|
|
148 |
|
|
149 |
## Column dictionary " column_id : column_alias ". No mapping information here.
|
|
150 |
column = create_id_alias_dict(root)
|
|
151 |
|
|
152 |
## Num of mapped concepts
|
|
153 |
mapped_Concepts_Id = []
|
|
154 |
mapped_Column_Id = []
|
|
155 |
for elem in root.findall('conceptId'): mapped_Concepts_Id.append(elem.text)
|
|
156 |
for elem in root.findall('columnId'): mapped_Column_Id.append(elem.text)
|
|
157 |
|
|
158 |
num_mapped_Concepts = len(mapped_Concepts_Id)
|
|
159 |
|
|
160 |
## Record the last mapping
|
|
161 |
## Num of last mapped concepts
|
|
162 |
towrite = "\n<!--Last mapping:-->\n <!--mappedColumns-->\n"
|
|
163 |
fileOut.write(towrite)
|
|
164 |
towrite = " <totNum>"+str(num_mapped_Concepts)+"</totNum>\n"
|
|
165 |
fileOut.write(towrite)
|
|
166 |
|
|
167 |
for i,item in enumerate(mapped_Column_Id):
|
|
168 |
towrite=" <columnId>"+str(item)+"</columnId>\n"
|
|
169 |
fileOut.write(towrite)
|
|
170 |
towrite=" <columnAlias>"+str(column[item])+"</columnAlias>\n"
|
|
171 |
fileOut.write(towrite)
|
|
172 |
towrite=" <conceptId>"+str(mapped_Concepts_Id[i])+"</conceptId>\n"
|
|
173 |
fileOut.write(towrite)
|
|
174 |
|
|
175 |
fileOut.write(" <!--/mappedColumns-->\n</dataset>")
|
|
176 |
fileOut.close()
|
|
177 |
print " - Current status and mapping of %s written in %s\n" % (dataset_name, fileDataset)
|
|
178 |
|
|
179 |
def check_status():
|
|
180 |
|
|
181 |
today = date.today().strftime("%A %d. %B %Y")
|
|
182 |
now = time.strftime("%H:%M:%S")
|
|
183 |
check_datime = today +' at '+ now
|
|
184 |
print "\n#\n#Last check done on", check_datime,"\n#"
|
|
185 |
print "\n> Checking datasets properties ..."
|
|
186 |
command = "ls "+file_path+sitools2_xml_filenames
|
|
187 |
intXml_list = os.popen(command).readlines()
|
|
188 |
error_status, error_mapping, warning = False, False, False
|
|
189 |
email_body = "<br></br><i>Outcome of the datasets check done on %s</i>" % check_datime
|
|
190 |
for item in intXml_list:
|
|
191 |
item=item.strip()
|
|
192 |
email_body+= "\n"
|
|
193 |
tree = ET.parse(item)
|
|
194 |
currentRoot = tree.getroot()
|
|
195 |
|
|
196 |
dataset_name = currentRoot.find('name').text
|
|
197 |
filename_dataset = file_path+dataset_name+'.lastStatus.xml'
|
|
198 |
print "\n - ",dataset_name
|
|
199 |
|
|
200 |
tree = ET.parse(filename_dataset)
|
|
201 |
lastRoot = tree.getroot()
|
|
202 |
|
|
203 |
#Start the check ...
|
|
204 |
|
|
205 |
##************ Status ************##
|
|
206 |
|
|
207 |
lastStatus = lastRoot.find('lastStatus').text
|
|
208 |
currentStatus = currentRoot.find('status').text
|
|
209 |
output_check = True
|
|
210 |
showStatus = ""
|
|
211 |
output_message = ""
|
|
212 |
|
|
213 |
if lastStatus != currentStatus:
|
|
214 |
if currentStatus == 'INACTIVE':
|
|
215 |
output_check = bcolors.FAIL+"[FAIL]"+bcolors.ENDC
|
|
216 |
output_message = "\n "+bcolors.FAIL+_ERROR_CODE[1]+bcolors.ENDC
|
|
217 |
output_message += "\n Current status: "+bcolors.FAIL+currentStatus+bcolors.ENDC+" - last status: "+bcolors.FAIL+lastStatus+bcolors.ENDC
|
|
218 |
email_body+="<h3>"+dataset_name+"</h3><b>*** "+_ERROR_CODE[1]+" ***</b><p>Current status: <b>"+currentStatus+"</b> , last status: <b>"+lastStatus+"</b>."
|
|
219 |
else:
|
|
220 |
warning = True
|
|
221 |
output_check = bcolors.WARNING+"[FAIL]"+bcolors.ENDC
|
|
222 |
output_message = "\n "+bcolors.WARNING+_ERROR_CODE[1]+bcolors.ENDC
|
|
223 |
output_message += "\n Current status: "+bcolors.WARNING+currentStatus+bcolors.ENDC+" - last status: "+bcolors.WARNING+lastStatus+bcolors.ENDC
|
|
224 |
email_body+="<h3>"+dataset_name+"</h3><b>"+_ERROR_CODE[1]+"</b><p>Current status: <b>"+currentStatus+"</b> , last status: <b>"+lastStatus+"</b>."
|
|
225 |
|
|
226 |
error_status = True
|
|
227 |
showStatus=""
|
|
228 |
|
|
229 |
else:
|
|
230 |
output_check = bcolors.OKGREEN+"[OK]"+bcolors.ENDC
|
|
231 |
showStatus=" - "+currentStatus
|
|
232 |
|
|
233 |
print ' {0:20s} {1:7s}'.format('Status check ...', output_check)+showStatus+output_message
|
|
234 |
|
|
235 |
|
|
236 |
##************ Mapping ************##
|
|
237 |
|
|
238 |
## Last mapped concepts
|
|
239 |
#num_last_mapped_Concepts = int(lastRoot.find('totNum').text)
|
|
240 |
last_current_mapped_Column_Id = []
|
|
241 |
for elem in lastRoot.findall('columnId'): last_current_mapped_Column_Id.append(elem.text)
|
|
242 |
|
|
243 |
## Current mapped concepts
|
|
244 |
current_mapped_Column_Id = []
|
|
245 |
for elem in currentRoot.findall('columnId'): current_mapped_Column_Id.append(elem.text)
|
|
246 |
|
|
247 |
output_check = ""
|
|
248 |
|
|
249 |
## Find the columns that lost the mapping
|
|
250 |
# Create dictionary from current mapping - this does not change from last/current files
|
|
251 |
column = create_id_alias_dict(currentRoot)
|
|
252 |
|
|
253 |
# Columns mapped in the last file, but not in the current one --> Mapping lost!
|
|
254 |
missing_current_columnAlias = [column[columnId] for columnId in last_current_mapped_Column_Id if columnId not in current_mapped_Column_Id]
|
|
255 |
# Columns mapped in the current file, but not in the last one --> User's error: *.lastStatus.xml probably not updated
|
|
256 |
missing_last_columnAlias = [column[columnId] for columnId in current_mapped_Column_Id if columnId not in last_current_mapped_Column_Id]
|
|
257 |
|
|
258 |
output_message = ""
|
|
259 |
|
|
260 |
if len(missing_current_columnAlias) == 0 and len(missing_last_columnAlias) == 0:
|
|
261 |
output_check = bcolors.OKGREEN+"[OK]"+bcolors.ENDC
|
|
262 |
elif len(missing_last_columnAlias) > 0:
|
|
263 |
warning = True
|
|
264 |
output_check = bcolors.WARNING+"[FAIL]"+bcolors.ENDC
|
|
265 |
output_message = "\n "+bcolors.WARNING+_ERROR_CODE[2]+bcolors.ENDC
|
|
266 |
output_message += "\n Some columns are mapped in the new version, but not in the last one: "+bcolors.WARNING+str(missing_last_columnAlias)+bcolors.ENDC
|
|
267 |
email_body += "<h3>"+dataset_name+"</h3><b>"+_ERROR_CODE[2]+"</b><p> Some columns are mapped in the new version, but not in the last one: "+str(missing_last_columnAlias)+"</p>"
|
|
268 |
elif len(missing_current_columnAlias) > 0 and len(missing_last_columnAlias) == 0:
|
|
269 |
output_check = bcolors.FAIL+"[FAIL]"+bcolors.ENDC
|
|
270 |
output_message = "\n "+bcolors.FAIL+_ERROR_CODE[2]+bcolors.ENDC
|
|
271 |
output_message += "\n The following column(s) is(are) not mapped anymore: "+bcolors.FAIL+str(missing_current_columnAlias)+bcolors.ENDC
|
|
272 |
email_body += "<h3>"+dataset_name+"</h3><b>*** "+_ERROR_CODE[2]+" ***</b><p> The following column(s) is(are) not mapped anymore: <b>"+str(missing_current_columnAlias)+"</b></p>"
|
|
273 |
|
|
274 |
error_mapping = True
|
|
275 |
|
|
276 |
print ' {0:20s} {1:7s}'.format('Mapping check ...', output_check)+output_message
|
|
277 |
|
|
278 |
print "\n__________________________________________________\n\n> Outcome of the check process:"
|
|
279 |
email_body += "<p>__________________________________________________</p><p>The check process has produced the following message:"
|
|
280 |
|
|
281 |
## Send an e-mail alert if any error/inconsistency is found
|
|
282 |
if error_status or error_mapping or warning:
|
|
283 |
if warning:
|
|
284 |
print "\n "+bcolors.WARNING+_ERROR_CODE[3]+bcolors.ENDC
|
|
285 |
email_body += "<p></p><i><h4>"+_ERROR_CODE[3]+'</h4></i></p>'
|
|
286 |
else:
|
|
287 |
print bcolors.FAIL+'\n Unexpected errors have been found! Please check the datasets properties in Sitools2.'+bcolors.ENDC
|
|
288 |
email_body += "<p></p><h4>Unexpected errors have been found! Please check the datasets properties in Sitools2.</h4></br></p>"
|
|
289 |
|
|
290 |
send_alert_mail(email_body)
|
|
291 |
print "\n\t>> !! ALERT E-MAIL SENT !! <<\n"
|
|
292 |
|
|
293 |
#The alert mail text is stored in an *log.html file for keeping track of the found errors
|
|
294 |
errors_log_name = file_path+'monitoring_errors.log.html'
|
|
295 |
errors_log_file = open(errors_log_name, 'a')
|
|
296 |
errors_log_file.write(email_body)
|
|
297 |
errors_log_file.close()
|
|
298 |
|
|
299 |
else:
|
|
300 |
print bcolors.OKGREEN+'\n No errors/inconsistencies found.\n'+bcolors.ENDC
|
|
301 |
|
|
302 |
|
|
303 |
|
|
304 |
if (len(sys.argv) > 1):
|
|
305 |
option = sys.argv[1]
|
|
306 |
if option=='--check': check_status()
|
|
307 |
elif option=='--record':
|
|
308 |
overwrite = raw_input(bcolors.WARNING+"\n This option will overwrite the current recorded settings. Do you really want to proceed?: "+ bcolors.ENDC)
|
|
309 |
if overwrite in ["Yes","yes","Y","y","oui", "OUI", "Oui"]: record_status()
|
|
310 |
elif overwrite in ["No","no","N", "n"]:
|
|
311 |
print "Aborted.\n"
|
|
312 |
exit(0)
|
|
313 |
else:
|
|
314 |
print bcolors.WARNING + "\n> Sintax:\t$ python dataset_Monitoring.py [OPTION]\n" + bcolors.ENDC
|
|
315 |
print "Options:\n\n --check\n\tA status consistency check is performed between the current and last recorded datasets properties."
|
|
316 |
print "\n --record\n\tThe current datasets properties (status and mapping) are recorded. NB. This procedure will overwrite the previously recorded entries.\n"
|
|
317 |
exit(0)
|
Script to monitor datasets status/mapping