Project

General

Profile

Download (18.9 KB) Statistics
| Branch: | Revision:

git_sitools_idoc / webstatDev / idoc_Maison / webstat / libStatSitools.py @ 94284c9a

1 8792cad8 Marc NICOLAS
__version__ = "0.1"
2
__license__ = "GPL"
3
__author__ = "Marc NICOLAS"
4
__credit__ = "Marc NICOLAS"
5
__maintainer__ = "Marc NICOLAS"
6
__email__ = "marc.nicolas@ias.u-psud.fr"
7
8
import sys, os
9
10
try:
11
    import copy
12
except:
13
    sys.exit("Import failed in module libStatSitools :\n\tcopy module is required")
14
try:
15
    import pygeoip
16
except:
17
    sys.exit("Import failed in module libStatSitools :\n\tpygeoip module is required")
18
try:
19
    from datetime import datetime, timedelta
20
except:
21
    sys.exit("Import failed in module libStatSitools :\n\tdatetime module is required")
22
try:
23
    import os
24
except:
25
    sys.exit("Import failed in module libStatSitools :\n\tos module is required")
26
try:
27
    import string
28
except:
29
    sys.exit("Import failed in module libStatSitools :\n\tstring module is required")
30
try:
31
    import time
32
except:
33
    sys.exit("Import failed in module libStatSitools :\n\ttime module is required")
34
try:
35
    import copy
36
except:
37
    sys.exit("Import failed in module libStatSitools :\n\tcopy module is required")
38
try:
39
    from libChart4StatSitools2 import *
40
except:
41
    sys.exit("Import failed in module lancementStat :\n\tlibChart4StatSitools2 module is required")
42
try:
43
    from libStatSitoolsConstante import *
44
except:
45
    sys.exit("Import failed in module lancementStat :\n\tlibStatSitoolsConstante module is required")
46
try:
47
    import re
48
except:
49
    sys.exit("Import failed in module lancementStat :\n\tre module is required")
50
try:
51
    import operator
52
except:
53
    sys.exit("Import failed in module lancementStat :\n\toperator module is required")
54
try:
55
    # PATH FOR PROD
56
    #gi = pygeoip.GeoIP("/usr/local/sitools2_webstat/webstat/GeoIP.dat")
57
    # PATH FOR DEV
58
    gi = pygeoip.GeoIP("/home/marc/MyDev/sitools-idoc/webstatDev/idoc_Maison/webstat/GeoIP.dat")
59
except:
60
    sys.exit("Import of GeoIP.dat failed. Verify that the GeoIP file exists !")
61
62
63
time_format = "%Y-%m-%d %H:%M:%S"
64
time_format_for_by_month = "%Y-%m-%d"
65
66
67
def reduce_dico(dico, max_per_cent):
68
    dico2 = refactor_dico(dico, max_per_cent)
69
    return dico2
70
71
72
def stat_log(application, cond_app):
73
    try:
74
        dico_cste = defCst(application)
75
76
        dico_project = dico_cste["dicoProject"]
77
        list_path_logs = dico_cste["listPathLogs"]
78
        start_date = cond_app["start_date"]
79
        end_date = cond_app["end_date"]
80
81
#        if cond_app["is_access_access_by_country"]:
82
        reg_infos_url = re.compile(ur'\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3}')
83
84
        #************************* VARIABLE POUR LES STAT D'ACCES *****************************************
85
        dico_access_ip = {}
86
        dico_access_by_country = {}
87
        dico_access_by_month = {}
88
        nb_access_ip = 0
89
90
        dico_vo_ip = {}
91
        dico_vo_by_country = {}
92
        nb_vo_ip = 0
93
94
        #************************* VARIABLE POUR LES STAT DE DOWNLOAD *****************************************
95
        vol_download = 0
96
        nb_download = 0
97
        total_down_by_months = {}
98
        total_down_during_period = 0
99
        total_down_per_country = {}
100
        total_down_per_project = {}
101
        total_down_per_user = {}
102
        total_down_per_ip = {}
103
104
        for pathLog in list_path_logs:
105
            listarbo = os.listdir(pathLog)
106
            print "PathLog : ",pathLog
107
            for fileToOpen in listarbo:
108
                pathfile = pathLog + "/" + fileToOpen
109
                if os.path.isfile(pathfile) and os.path.basename(fileToOpen).split(".")[1] == "log" and os.path.getsize(pathfile) != 0  and "sitools-log-service" in fileToOpen:
110
                    print "Traitement de ",pathfile
111
                    print  "taille : ",os.path.getsize(pathfile)
112
                    fileopen = open(pathfile, "r")
113
                    lines = fileopen.readlines()
114
                    for line in lines:
115
                        infos = line.split("\t")
116
                        my_time = datetime.strptime(infos[0].split(" ")[0][:len(infos[0].split(" ")[0]) - 4].replace("T", " ").split(" ")[0], time_format_for_by_month).date()
117
                        if len(infos) > 14 and infos[2] not in dico_cste["listIpToExclude"] and dico_cste["patternBot"] not in infos[14] and dico_cste["patternYahoo"] not in infos[14] \
118
                                and not any(s in infos[14] for s in dico_cste["patternBaidu"]):
119
                            if start_date < my_time < end_date:
120
                                #---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
121
                                # *******************************************************************   STATISTIQUE SUR LES DOWNLOAD *****************************************************************
122
                                #-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
123
                                if len(infos) == 16 and infos[6] == dico_cste["pattern_http_method"] and infos[9] == dico_cste["pattern_code_retour"] and any(s in infos[7] for s in dico_cste["pattern_download"]) \
124
                                        and not any(s in infos[7] for s in dico_cste["sia"]) and not any(s in infos[7] for s in dico_cste["CutFits"]) and gi.country_code_by_addr(infos[2]) != "CN" :
125
                                    nb_download += 1
126
                                    try:
127
                                        vol_download += float(infos[10])
128
                                    except:
129
                                        pass
130
                                    ################################ DOWNLOAD BY PROJECT ##########################
131
                                    if cond_app["is_down_stat_by_project"]:
132
                                        if len(infos) == 16:
133
                                            project_tmp = infos[15].split("/")
134
                                            if project_tmp[0] == "-\n":
135
                                                project = infos[7].split("/")[8]
136
                                            elif len(project_tmp) == 7:
137
                                                project = project_tmp[5]
138
                                            else:
139
                                                project = "others"
140
                                            if project.lower() not in dico_project.keys():
141
                                                project = "others"
142
                                        else:
143
                                            if infos[15] != "-\n":
144
                                                project = "others"
145
                                            else:
146
                                                if len(infos[7].split("/")) < 8:
147
                                                    project = "others"
148
                                                else:
149
                                                    project_tmp2 = infos[7].split("/")[8]
150
                                                    #print "project_tmp2 : ",project_tmp2
151
                                                    if project_tmp2.lower() not in dico_project.keys():
152
                                                        project = "others"
153
                                                    else:
154
                                                        project = project_tmp2
155
                                        try:
156
                                            if dico_project[project.lower()] not in total_down_per_project.keys():
157
                                                total_down_per_project[dico_project[project.lower()]] = float(infos[10])
158
                                            else:
159
                                                total_down_per_project[dico_project[project.lower()]] += float(infos[10])
160
                                        except:
161
                                            pass
162
                                    ############################### DOWNLOAD BY MONTH ###############################
163
                                    if cond_app["is_down_stat_by_months"]:
164
                                            try:
165
                                                if (my_time.month, my_time.year) not in total_down_by_months.keys():
166
                                                    total_down_by_months[my_time.month, my_time.year] = float(infos[10])
167
                                                    total_down_during_period += float(infos[10])
168
                                                else:
169
                                                    total_down_by_months[my_time.month, my_time.year] += float(infos[10])
170
                                                    total_down_during_period += float(infos[10])
171
                                            except:
172
                                                pass
173
                                    ############################# DOWNLOAD BY COUNTRY ##################################
174
                                    if cond_app["is_down_stat_by_country"]:
175
                                        try:
176
                                            if gi.country_code_by_addr(infos[2]) not in total_down_per_country.keys():
177
                                                total_down_per_country[gi.country_code_by_addr(infos[2])] = float(infos[10])
178
                                            else:
179
                                                total_down_per_country[gi.country_code_by_addr(infos[2])] += float(infos[10])
180
                                        except:
181
                                            pass
182
                                    ######################### DOWNLOAD BY IP ##########################################
183
                                    if cond_app["is_down_stat_by_ip"]:
184
                                        try:
185
                                            if infos[2] not in total_down_per_ip.keys():
186
                                                total_down_per_ip[infos[2]] = float(infos[10])
187
                                            else:
188
                                                total_down_per_ip[infos[2]] += float(infos[10])
189
                                        except:
190
                                            pass
191
                                    ######################### DOWNLOAD BY USER #######################################
192
                                    if cond_app["is_down_stat_by_user"]:
193
                                        pass
194
                                    # AJOUT D'UNE CONDITION SUR 127.0.0.1 POUR EXCLURE HOBBIT PENDANT UNE PERIODE OU ON AVAIT PLUS ACCES AU "VRAI" IP
195
                                #---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
196
                                # ***************************************************   STATISTIQUES SUR LES ACCESS *************************************************************
197
                                #-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
198
                                    if not infos[2] == "127.0.0.1" and re.match(reg_infos_url, infos[2]) is not None and gi.country_code_by_addr(infos[2]) != "CN":
199
                                        #*********************************  ACCESS By IP ******************************************
200
                                        if cond_app["is_access_access_by_ip"]:
201
                                            if dico_cste["patternAccessToSeek"] == infos[7]:# and gi.country_code_by_addr(infos[2]) != "CN":
202
                                                # incrementing of number access
203
                                                nb_access_ip += 1
204
                                                if infos[2] not in dico_access_ip.keys():
205
                                                    dico_access_ip[infos[2]] = 1
206
                                                else:
207
                                                    dico_access_ip[infos[2]] += 1
208
                                        #*********************************  ACCESS By COUNTRY *************************************
209
                                        if cond_app["is_access_access_by_country"]:
210
                                            if dico_cste["patternAccessToSeek"] == infos[7]:# and gi.country_code_by_addr(infos[2]) != "CN":
211
                                                if re.match(reg_infos_url, infos[2]) is not None:
212
                                                    if gi.country_name_by_name(infos[2]) not in dico_access_by_country.keys():
213
                                                        dico_access_by_country[gi.country_name_by_name(infos[2])] = 1
214
                                                    else:
215
                                                        dico_access_by_country[gi.country_name_by_name(infos[2])] += 1
216
                                        #*********************************  ACCESS By MONTH ***************************************
217
                                        if cond_app["is_access_access_by_months"]:
218
                                            if dico_cste["patternAccessToSeek"] == infos[7]:# and gi.country_code_by_addr(infos[2]) != "CN":
219
                                                if (my_time.month, my_time.year) not in dico_access_by_month.keys():
220
                                                    dico_access_by_month[my_time.month, my_time.year] = 1
221
                                                else:
222
                                                    dico_access_by_month[my_time.month, my_time.year] += 1
223
                                        #*********************************  ACCESS VO By IP ****************************************
224
                                '''
225
                                if dico_cste["patternVoConeSearchToSeek"] == infos[7] and \
226
                                                dico_cste["patternVoConeSearchToSeek"] != "":
227
                                    # incrementing of number access
228
                                    nb_vo_ip += 1
229

230
                                    if infos[2] not in dico_vo_ip.keys():
231
                                        dico_vo_ip[infos[2]] = 1
232
                                    else:
233
                                        dico_vo_ip[infos[2]] += 1
234
                                    if gi.country_name_by_name(infos[2]) not in dico_vo_by_country.keys():
235
                                        dico_vo_by_country[gi.country_name_by_name(infos[2])] = 1
236
                                    else:
237
                                        dico_vo_by_country[gi.country_name_by_name(infos[2])] += 1
238
                                '''
239
                    # Closing the log file
240
                    fileopen.close()
241
242
        # Creation des graphs :
243
        date_title_chart = ""
244
        if cond_app["period_date"]:
245
            date_title_chart = " entre le "+str(start_date)+" et le "+str(end_date)
246
247
        if cond_app["is_access_access_by_ip"]:
248
            dico_tmp_access_ip = reduce_dico(dico_access_ip,  dico_cste["dico_max_per_cent"]["is_access_access_by_ip"])
249
            access_ip_chart_svg_png = svg_chart(dico_tmp_access_ip, "Acces par IP"+str(date_title_chart))
250
        else:
251
            access_ip_chart_svg_png = None
252
        if cond_app["is_access_access_by_country"]:
253
            dico_tmp_access_country = reduce_dico(dico_access_by_country,
254
                                                  dico_cste["dico_max_per_cent"]["is_access_access_by_country"])
255
            access_country_chart_svg_png = svg_chart(dico_tmp_access_country, "Acces par pays"+str(date_title_chart))
256
        else:
257
            access_country_chart_svg_png = None
258
        if cond_app["is_access_access_by_months"]:
259
            access_month_chart_svg_png = svg_bar_chart(dico_access_by_month, start_date, end_date, "Acces par mois"+str(date_title_chart))
260
        else:
261
            access_month_chart_svg_png = None
262
        if cond_app["is_down_stat_by_project"]:
263
            total_down_per_project_chart = svg_chart(reduce_dico(total_down_per_project,
264
                                                     dico_cste["dico_max_per_cent"]["is_down_stat_by_project"]),
265
                                                     "Volume de telechargement par projet"+str(date_title_chart))
266
        else:
267
            total_down_per_project_chart = None
268
        if cond_app["is_down_stat_by_ip"]:
269
            total_down_per_ip_chart = svg_chart(reduce_dico(total_down_per_ip,
270
                                                dico_cste["dico_max_per_cent"]["is_down_stat_by_ip"]),
271
                                                "Volume de telechargement par IP"+str(date_title_chart))
272
        else:
273
            total_down_per_ip_chart = None
274
        if cond_app["is_down_stat_by_months"]:
275
            total_down_per_month_chart = svg_bar_chart(total_down_by_months, start_date, end_date,"Volume de telechargement par mois"+str(date_title_chart))
276
        else:
277
            total_down_per_month_chart = None
278
        if cond_app["is_down_stat_by_country"]:
279
            total_down_per_country_chart = svg_chart(reduce_dico(total_down_per_country,
280
                                                     dico_cste["dico_max_per_cent"]["is_down_stat_by_country"]),
281
                                                     "Volume de telechargement par Pays"+str(date_title_chart))
282
        else:
283
            total_down_per_country_chart = None
284
285
        return {"nb_download": nb_download, "vol_download": format_download_vol(vol_download), "nb_access_ip":
286
                nb_access_ip, "access_ip_chart_svg_png": access_ip_chart_svg_png, "access_country_chart_svg_png":
287
                access_country_chart_svg_png,"access_month_chart_svg_png": access_month_chart_svg_png,
288
                "total_down_per_project_chart": total_down_per_project_chart, "total_down_per_month_chart":
289
                total_down_per_month_chart, "total_down_per_country_chart": total_down_per_country_chart,
290
                "total_down_per_ip_chart": total_down_per_ip_chart, "dico_cste": dico_cste}
291
292
    except BaseException, e:
293
        print "Error : ", str(e)
294
295
296
def refactor_dico(dico, max):
297
298
    total = float(sum(dico.values()))
299
    dico2 = {}
300
    for key in dico:
301
        #print float(dico[key]/total)
302
        #print "dico[key] : ", dico[key], "     float(dico[key]/total) : ", float(dico[key])/total
303
        if float((dico[key])/total) > max:
304
            dico2[key] = dico[key]
305
    print dico2
306
    return dico2
307
308
309
def format_download_vol(vol_down):
310
    vol_data = ""
311
    '''
312
    try:
313
        if 1024 <= vol_down < 1048576:
314
            vol_data = str(round(vol_down/1024, 2))+" Ko"
315
        elif 1048576 <= vol_down < 1073741824:
316
            vol_data = str(round(vol_down/1048576, 2))+" Mo"
317
        elif 1073741824 <= vol_down < (1073741824*1024):
318
            vol_data = str(round(vol_down/1073741824, 2))+" Go"
319
    except BaseException, e:
320
        print str(e)
321
    '''
322
    try:
323
        if 1000 <= vol_down < 1000000:
324
            vol_data = str(round(vol_down/1000, 2))+" Ko"
325
        elif 1000000 <= vol_down < 1000000000:
326
            vol_data = str(round(vol_down/1000000, 2))+" Mo"
327
        elif 1000000000 <= vol_down < (1000000000*1000):
328
            vol_data = str(round(vol_down/1000000000, 2))+" Go"
329
    except BaseException, e:
330
        print str(e)
331
    return vol_data