git_sitools_idoc / webstatDev / idoc_Maison / webstat / libStatSitools.py @ 94284c9a
1 |
__version__ = "0.1"
|
---|---|
2 |
__license__ = "GPL"
|
3 |
__author__ = "Marc NICOLAS"
|
4 |
__credit__ = "Marc NICOLAS"
|
5 |
__maintainer__ = "Marc NICOLAS"
|
6 |
__email__ = "marc.nicolas@ias.u-psud.fr"
|
7 |
|
8 |
import sys, os |
9 |
|
10 |
try:
|
11 |
import copy |
12 |
except:
|
13 |
sys.exit("Import failed in module libStatSitools :\n\tcopy module is required")
|
14 |
try:
|
15 |
import pygeoip |
16 |
except:
|
17 |
sys.exit("Import failed in module libStatSitools :\n\tpygeoip module is required")
|
18 |
try:
|
19 |
from datetime import datetime, timedelta |
20 |
except:
|
21 |
sys.exit("Import failed in module libStatSitools :\n\tdatetime module is required")
|
22 |
try:
|
23 |
import os |
24 |
except:
|
25 |
sys.exit("Import failed in module libStatSitools :\n\tos module is required")
|
26 |
try:
|
27 |
import string |
28 |
except:
|
29 |
sys.exit("Import failed in module libStatSitools :\n\tstring module is required")
|
30 |
try:
|
31 |
import time |
32 |
except:
|
33 |
sys.exit("Import failed in module libStatSitools :\n\ttime module is required")
|
34 |
try:
|
35 |
import copy |
36 |
except:
|
37 |
sys.exit("Import failed in module libStatSitools :\n\tcopy module is required")
|
38 |
try:
|
39 |
from libChart4StatSitools2 import * |
40 |
except:
|
41 |
sys.exit("Import failed in module lancementStat :\n\tlibChart4StatSitools2 module is required")
|
42 |
try:
|
43 |
from libStatSitoolsConstante import * |
44 |
except:
|
45 |
sys.exit("Import failed in module lancementStat :\n\tlibStatSitoolsConstante module is required")
|
46 |
try:
|
47 |
import re |
48 |
except:
|
49 |
sys.exit("Import failed in module lancementStat :\n\tre module is required")
|
50 |
try:
|
51 |
import operator |
52 |
except:
|
53 |
sys.exit("Import failed in module lancementStat :\n\toperator module is required")
|
54 |
try:
|
55 |
# PATH FOR PROD
|
56 |
#gi = pygeoip.GeoIP("/usr/local/sitools2_webstat/webstat/GeoIP.dat")
|
57 |
# PATH FOR DEV
|
58 |
gi = pygeoip.GeoIP("/home/marc/MyDev/sitools-idoc/webstatDev/idoc_Maison/webstat/GeoIP.dat")
|
59 |
except:
|
60 |
sys.exit("Import of GeoIP.dat failed. Verify that the GeoIP file exists !")
|
61 |
|
62 |
|
63 |
time_format = "%Y-%m-%d %H:%M:%S"
|
64 |
time_format_for_by_month = "%Y-%m-%d"
|
65 |
|
66 |
|
67 |
def reduce_dico(dico, max_per_cent): |
68 |
dico2 = refactor_dico(dico, max_per_cent) |
69 |
return dico2
|
70 |
|
71 |
|
72 |
def stat_log(application, cond_app): |
73 |
try:
|
74 |
dico_cste = defCst(application) |
75 |
|
76 |
dico_project = dico_cste["dicoProject"]
|
77 |
list_path_logs = dico_cste["listPathLogs"]
|
78 |
start_date = cond_app["start_date"]
|
79 |
end_date = cond_app["end_date"]
|
80 |
|
81 |
# if cond_app["is_access_access_by_country"]:
|
82 |
reg_infos_url = re.compile(ur'\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3}')
|
83 |
|
84 |
#************************* VARIABLE POUR LES STAT D'ACCES *****************************************
|
85 |
dico_access_ip = {} |
86 |
dico_access_by_country = {} |
87 |
dico_access_by_month = {} |
88 |
nb_access_ip = 0
|
89 |
|
90 |
dico_vo_ip = {} |
91 |
dico_vo_by_country = {} |
92 |
nb_vo_ip = 0
|
93 |
|
94 |
#************************* VARIABLE POUR LES STAT DE DOWNLOAD *****************************************
|
95 |
vol_download = 0
|
96 |
nb_download = 0
|
97 |
total_down_by_months = {} |
98 |
total_down_during_period = 0
|
99 |
total_down_per_country = {} |
100 |
total_down_per_project = {} |
101 |
total_down_per_user = {} |
102 |
total_down_per_ip = {} |
103 |
|
104 |
for pathLog in list_path_logs: |
105 |
listarbo = os.listdir(pathLog) |
106 |
print "PathLog : ",pathLog |
107 |
for fileToOpen in listarbo: |
108 |
pathfile = pathLog + "/" + fileToOpen
|
109 |
if os.path.isfile(pathfile) and os.path.basename(fileToOpen).split(".")[1] == "log" and os.path.getsize(pathfile) != 0 and "sitools-log-service" in fileToOpen: |
110 |
print "Traitement de ",pathfile |
111 |
print "taille : ",os.path.getsize(pathfile) |
112 |
fileopen = open(pathfile, "r") |
113 |
lines = fileopen.readlines() |
114 |
for line in lines: |
115 |
infos = line.split("\t")
|
116 |
my_time = datetime.strptime(infos[0].split(" ")[0][:len(infos[0].split(" ")[0]) - 4].replace("T", " ").split(" ")[0], time_format_for_by_month).date() |
117 |
if len(infos) > 14 and infos[2] not in dico_cste["listIpToExclude"] and dico_cste["patternBot"] not in infos[14] and dico_cste["patternYahoo"] not in infos[14] \ |
118 |
and not any(s in infos[14] for s in dico_cste["patternBaidu"]): |
119 |
if start_date < my_time < end_date:
|
120 |
#---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
121 |
# ******************************************************************* STATISTIQUE SUR LES DOWNLOAD *****************************************************************
|
122 |
#-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
123 |
if len(infos) == 16 and infos[6] == dico_cste["pattern_http_method"] and infos[9] == dico_cste["pattern_code_retour"] and any(s in infos[7] for s in dico_cste["pattern_download"]) \ |
124 |
and not any(s in infos[7] for s in dico_cste["sia"]) and not any(s in infos[7] for s in dico_cste["CutFits"]) and gi.country_code_by_addr(infos[2]) != "CN" : |
125 |
nb_download += 1
|
126 |
try:
|
127 |
vol_download += float(infos[10]) |
128 |
except:
|
129 |
pass
|
130 |
################################ DOWNLOAD BY PROJECT ##########################
|
131 |
if cond_app["is_down_stat_by_project"]: |
132 |
if len(infos) == 16: |
133 |
project_tmp = infos[15].split("/") |
134 |
if project_tmp[0] == "-\n": |
135 |
project = infos[7].split("/")[8] |
136 |
elif len(project_tmp) == 7: |
137 |
project = project_tmp[5]
|
138 |
else:
|
139 |
project = "others"
|
140 |
if project.lower() not in dico_project.keys(): |
141 |
project = "others"
|
142 |
else:
|
143 |
if infos[15] != "-\n": |
144 |
project = "others"
|
145 |
else:
|
146 |
if len(infos[7].split("/")) < 8: |
147 |
project = "others"
|
148 |
else:
|
149 |
project_tmp2 = infos[7].split("/")[8] |
150 |
#print "project_tmp2 : ",project_tmp2
|
151 |
if project_tmp2.lower() not in dico_project.keys(): |
152 |
project = "others"
|
153 |
else:
|
154 |
project = project_tmp2 |
155 |
try:
|
156 |
if dico_project[project.lower()] not in total_down_per_project.keys(): |
157 |
total_down_per_project[dico_project[project.lower()]] = float(infos[10]) |
158 |
else:
|
159 |
total_down_per_project[dico_project[project.lower()]] += float(infos[10]) |
160 |
except:
|
161 |
pass
|
162 |
############################### DOWNLOAD BY MONTH ###############################
|
163 |
if cond_app["is_down_stat_by_months"]: |
164 |
try:
|
165 |
if (my_time.month, my_time.year) not in total_down_by_months.keys(): |
166 |
total_down_by_months[my_time.month, my_time.year] = float(infos[10]) |
167 |
total_down_during_period += float(infos[10]) |
168 |
else:
|
169 |
total_down_by_months[my_time.month, my_time.year] += float(infos[10]) |
170 |
total_down_during_period += float(infos[10]) |
171 |
except:
|
172 |
pass
|
173 |
############################# DOWNLOAD BY COUNTRY ##################################
|
174 |
if cond_app["is_down_stat_by_country"]: |
175 |
try:
|
176 |
if gi.country_code_by_addr(infos[2]) not in total_down_per_country.keys(): |
177 |
total_down_per_country[gi.country_code_by_addr(infos[2])] = float(infos[10]) |
178 |
else:
|
179 |
total_down_per_country[gi.country_code_by_addr(infos[2])] += float(infos[10]) |
180 |
except:
|
181 |
pass
|
182 |
######################### DOWNLOAD BY IP ##########################################
|
183 |
if cond_app["is_down_stat_by_ip"]: |
184 |
try:
|
185 |
if infos[2] not in total_down_per_ip.keys(): |
186 |
total_down_per_ip[infos[2]] = float(infos[10]) |
187 |
else:
|
188 |
total_down_per_ip[infos[2]] += float(infos[10]) |
189 |
except:
|
190 |
pass
|
191 |
######################### DOWNLOAD BY USER #######################################
|
192 |
if cond_app["is_down_stat_by_user"]: |
193 |
pass
|
194 |
# AJOUT D'UNE CONDITION SUR 127.0.0.1 POUR EXCLURE HOBBIT PENDANT UNE PERIODE OU ON AVAIT PLUS ACCES AU "VRAI" IP
|
195 |
#---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
196 |
# *************************************************** STATISTIQUES SUR LES ACCESS *************************************************************
|
197 |
#-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
198 |
if not infos[2] == "127.0.0.1" and re.match(reg_infos_url, infos[2]) is not None and gi.country_code_by_addr(infos[2]) != "CN": |
199 |
#********************************* ACCESS By IP ******************************************
|
200 |
if cond_app["is_access_access_by_ip"]: |
201 |
if dico_cste["patternAccessToSeek"] == infos[7]:# and gi.country_code_by_addr(infos[2]) != "CN": |
202 |
# incrementing of number access
|
203 |
nb_access_ip += 1
|
204 |
if infos[2] not in dico_access_ip.keys(): |
205 |
dico_access_ip[infos[2]] = 1 |
206 |
else:
|
207 |
dico_access_ip[infos[2]] += 1 |
208 |
#********************************* ACCESS By COUNTRY *************************************
|
209 |
if cond_app["is_access_access_by_country"]: |
210 |
if dico_cste["patternAccessToSeek"] == infos[7]:# and gi.country_code_by_addr(infos[2]) != "CN": |
211 |
if re.match(reg_infos_url, infos[2]) is not None: |
212 |
if gi.country_name_by_name(infos[2]) not in dico_access_by_country.keys(): |
213 |
dico_access_by_country[gi.country_name_by_name(infos[2])] = 1 |
214 |
else:
|
215 |
dico_access_by_country[gi.country_name_by_name(infos[2])] += 1 |
216 |
#********************************* ACCESS By MONTH ***************************************
|
217 |
if cond_app["is_access_access_by_months"]: |
218 |
if dico_cste["patternAccessToSeek"] == infos[7]:# and gi.country_code_by_addr(infos[2]) != "CN": |
219 |
if (my_time.month, my_time.year) not in dico_access_by_month.keys(): |
220 |
dico_access_by_month[my_time.month, my_time.year] = 1
|
221 |
else:
|
222 |
dico_access_by_month[my_time.month, my_time.year] += 1
|
223 |
#********************************* ACCESS VO By IP ****************************************
|
224 |
'''
|
225 |
if dico_cste["patternVoConeSearchToSeek"] == infos[7] and \
|
226 |
dico_cste["patternVoConeSearchToSeek"] != "":
|
227 |
# incrementing of number access
|
228 |
nb_vo_ip += 1
|
229 |
|
230 |
if infos[2] not in dico_vo_ip.keys():
|
231 |
dico_vo_ip[infos[2]] = 1
|
232 |
else:
|
233 |
dico_vo_ip[infos[2]] += 1
|
234 |
if gi.country_name_by_name(infos[2]) not in dico_vo_by_country.keys():
|
235 |
dico_vo_by_country[gi.country_name_by_name(infos[2])] = 1
|
236 |
else:
|
237 |
dico_vo_by_country[gi.country_name_by_name(infos[2])] += 1
|
238 |
'''
|
239 |
# Closing the log file
|
240 |
fileopen.close() |
241 |
|
242 |
# Creation des graphs :
|
243 |
date_title_chart = ""
|
244 |
if cond_app["period_date"]: |
245 |
date_title_chart = " entre le "+str(start_date)+" et le "+str(end_date) |
246 |
|
247 |
if cond_app["is_access_access_by_ip"]: |
248 |
dico_tmp_access_ip = reduce_dico(dico_access_ip, dico_cste["dico_max_per_cent"]["is_access_access_by_ip"]) |
249 |
access_ip_chart_svg_png = svg_chart(dico_tmp_access_ip, "Acces par IP"+str(date_title_chart)) |
250 |
else:
|
251 |
access_ip_chart_svg_png = None
|
252 |
if cond_app["is_access_access_by_country"]: |
253 |
dico_tmp_access_country = reduce_dico(dico_access_by_country, |
254 |
dico_cste["dico_max_per_cent"]["is_access_access_by_country"]) |
255 |
access_country_chart_svg_png = svg_chart(dico_tmp_access_country, "Acces par pays"+str(date_title_chart)) |
256 |
else:
|
257 |
access_country_chart_svg_png = None
|
258 |
if cond_app["is_access_access_by_months"]: |
259 |
access_month_chart_svg_png = svg_bar_chart(dico_access_by_month, start_date, end_date, "Acces par mois"+str(date_title_chart)) |
260 |
else:
|
261 |
access_month_chart_svg_png = None
|
262 |
if cond_app["is_down_stat_by_project"]: |
263 |
total_down_per_project_chart = svg_chart(reduce_dico(total_down_per_project, |
264 |
dico_cste["dico_max_per_cent"]["is_down_stat_by_project"]), |
265 |
"Volume de telechargement par projet"+str(date_title_chart)) |
266 |
else:
|
267 |
total_down_per_project_chart = None
|
268 |
if cond_app["is_down_stat_by_ip"]: |
269 |
total_down_per_ip_chart = svg_chart(reduce_dico(total_down_per_ip, |
270 |
dico_cste["dico_max_per_cent"]["is_down_stat_by_ip"]), |
271 |
"Volume de telechargement par IP"+str(date_title_chart)) |
272 |
else:
|
273 |
total_down_per_ip_chart = None
|
274 |
if cond_app["is_down_stat_by_months"]: |
275 |
total_down_per_month_chart = svg_bar_chart(total_down_by_months, start_date, end_date,"Volume de telechargement par mois"+str(date_title_chart)) |
276 |
else:
|
277 |
total_down_per_month_chart = None
|
278 |
if cond_app["is_down_stat_by_country"]: |
279 |
total_down_per_country_chart = svg_chart(reduce_dico(total_down_per_country, |
280 |
dico_cste["dico_max_per_cent"]["is_down_stat_by_country"]), |
281 |
"Volume de telechargement par Pays"+str(date_title_chart)) |
282 |
else:
|
283 |
total_down_per_country_chart = None
|
284 |
|
285 |
return {"nb_download": nb_download, "vol_download": format_download_vol(vol_download), "nb_access_ip": |
286 |
nb_access_ip, "access_ip_chart_svg_png": access_ip_chart_svg_png, "access_country_chart_svg_png": |
287 |
access_country_chart_svg_png,"access_month_chart_svg_png": access_month_chart_svg_png,
|
288 |
"total_down_per_project_chart": total_down_per_project_chart, "total_down_per_month_chart": |
289 |
total_down_per_month_chart, "total_down_per_country_chart": total_down_per_country_chart,
|
290 |
"total_down_per_ip_chart": total_down_per_ip_chart, "dico_cste": dico_cste} |
291 |
|
292 |
except BaseException, e: |
293 |
print "Error : ", str(e) |
294 |
|
295 |
|
296 |
def refactor_dico(dico, max): |
297 |
|
298 |
total = float(sum(dico.values())) |
299 |
dico2 = {} |
300 |
for key in dico: |
301 |
#print float(dico[key]/total)
|
302 |
#print "dico[key] : ", dico[key], " float(dico[key]/total) : ", float(dico[key])/total
|
303 |
if float((dico[key])/total) > max: |
304 |
dico2[key] = dico[key] |
305 |
print dico2
|
306 |
return dico2
|
307 |
|
308 |
|
309 |
def format_download_vol(vol_down): |
310 |
vol_data = ""
|
311 |
'''
|
312 |
try:
|
313 |
if 1024 <= vol_down < 1048576:
|
314 |
vol_data = str(round(vol_down/1024, 2))+" Ko"
|
315 |
elif 1048576 <= vol_down < 1073741824:
|
316 |
vol_data = str(round(vol_down/1048576, 2))+" Mo"
|
317 |
elif 1073741824 <= vol_down < (1073741824*1024):
|
318 |
vol_data = str(round(vol_down/1073741824, 2))+" Go"
|
319 |
except BaseException, e:
|
320 |
print str(e)
|
321 |
'''
|
322 |
try:
|
323 |
if 1000 <= vol_down < 1000000: |
324 |
vol_data = str(round(vol_down/1000, 2))+" Ko" |
325 |
elif 1000000 <= vol_down < 1000000000: |
326 |
vol_data = str(round(vol_down/1000000, 2))+" Mo" |
327 |
elif 1000000000 <= vol_down < (1000000000*1000): |
328 |
vol_data = str(round(vol_down/1000000000, 2))+" Go" |
329 |
except BaseException, e: |
330 |
print str(e) |
331 |
return vol_data
|
332 |
|