Project

General

Profile

Download (18.8 KB) Statistics
| Branch: | Revision:

git_sitools_idoc / webstatDev / idoc_Maison / webstat / libStatSitools.py @ master

1
__version__ = "0.1"
2
__license__ = "GPL"
3
__author__ = "Marc NICOLAS"
4
__credit__ = "Marc NICOLAS"
5
__maintainer__ = "Marc NICOLAS"
6
__email__ = "marc.nicolas@ias.u-psud.fr"
7

    
8
try:
9
    import copy
10
except:
11
    sys.exit("Import failed in module libStatSitools :\n\tcopy module is required")
12
try:
13
    import pygeoip
14
except:
15
    sys.exit("Import failed in module libStatSitools :\n\tpygeoip module is required")
16
try:
17
    from datetime import datetime, timedelta
18
except:
19
    sys.exit("Import failed in module libStatSitools :\n\tdatetime module is required")
20
try:
21
    import os
22
except:
23
    sys.exit("Import failed in module libStatSitools :\n\tos module is required")
24
try:
25
    import string
26
except:
27
    sys.exit("Import failed in module libStatSitools :\n\tstring module is required")
28
try:
29
    import time
30
except:
31
    sys.exit("Import failed in module libStatSitools :\n\ttime module is required")
32
try:
33
    import copy
34
except:
35
    sys.exit("Import failed in module libStatSitools :\n\tcopy module is required")
36
try:
37
    from libChart4StatSitools2 import *
38
except:
39
    sys.exit("Import failed in module lancementStat :\n\tlibChart4StatSitools2 module is required")
40
try:
41
    from libStatSitoolsConstante import *
42
except:
43
    sys.exit("Import failed in module lancementStat :\n\tlibStatSitoolsConstante module is required")
44
try:
45
    import re
46
except:
47
    sys.exit("Import failed in module lancementStat :\n\tre module is required")
48
try:
49
    import operator
50
except:
51
    sys.exit("Import failed in module lancementStat :\n\toperator module is required")
52
try:
53
    #PATH FOR PROD
54
    #gi = pygeoip.GeoIP("/usr/local/sitools2_webstat/webstat/GeoIP.dat")
55
    #PATH FOR DEV
56
    gi = pygeoip.GeoIP("/home/marc/Projet/idoc_Maison/webstat/GeoIP.dat")
57
except:
58
    sys.exit("Import of GeoIP.dat failed. Verify that the GeoIP file exists !")
59

    
60

    
61
time_format = "%Y-%m-%d %H:%M:%S"
62
time_format_for_by_month = "%Y-%m-%d"
63

    
64

    
65
def reduce_dico(dico, max_per_cent):
66
    dico2 = refactor_dico(dico, max_per_cent)
67
    return dico2
68

    
69

    
70
def stat_log(application, cond_app):
71
    try:
72
        dico_cste = defCst(application)
73

    
74
        dico_project = dico_cste["dicoProject"]
75
        list_path_logs = dico_cste["listPathLogs"]
76
        start_date = cond_app["start_date"]
77
        end_date = cond_app["end_date"]
78

    
79
#        if cond_app["is_access_access_by_country"]:
80
        reg_infos_url = re.compile(ur'\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3}')
81

    
82
        #************************* VARIABLE POUR LES STAT D'ACCES *****************************************
83
        dico_access_ip = {}
84
        dico_access_by_country = {}
85
        dico_access_by_month = {}
86
        nb_access_ip = 0
87

    
88
        dico_vo_ip = {}
89
        dico_vo_by_country = {}
90
        nb_vo_ip = 0
91

    
92
        #************************* VARIABLE POUR LES STAT DE DOWNLOAD *****************************************
93
        vol_download = 0
94
        nb_download = 0
95
        total_down_by_months = {}
96
        total_down_during_period = 0
97
        total_down_per_country = {}
98
        total_down_per_project = {}
99
        total_down_per_user = {}
100
        total_down_per_ip = {}
101

    
102
        for pathLog in list_path_logs:
103
            listarbo = os.listdir(pathLog)
104
            print "PathLog : ", pathLog
105
            for fileToOpen in listarbo:
106
                pathfile = pathLog + "/" + fileToOpen
107
                if os.path.isfile(pathfile) and os.path.basename(fileToOpen).split(".")[1] == "log" and \
108
                                os.path.getsize(pathfile) != 0  and "sitools-log-service" in fileToOpen:
109
                    print "Traitement de ", pathfile
110
                    print "taille : ", os.path.getsize(pathfile)
111
                    fileopen = open(pathfile, "r")
112
                    lines = fileopen.readlines()
113
                    for line in lines:
114
                        infos = line.split("\t")
115
                        my_time = datetime.strptime(infos[0].split(" ")[0][:len(infos[0].split(" ")[0]) - 4].replace("T"," ").split(" ")[0], time_format_for_by_month).date()
116
                        if len(infos) > 14 and infos[2] not in dico_cste["listIpToExclude"] and dico_cste["patternBot"] not in infos[14] and dico_cste["patternYahoo"] not in infos[14] \
117
                                and not any(s in infos[14] for s in dico_cste["patternBaidu"]):
118
                            if start_date < my_time < end_date:
119
                                #------------------------------------------------------------------------------------
120
                                # **********************************   STATISTIQUE SUR LES DOWNLOAD ********************
121
                                #---------------------------------------------------------------------------------------
122
                                if len(infos) == 16 and infos[6] == dico_cste["pattern_http_method"] and infos[9] == dico_cste["pattern_code_retour"] and any(s in infos[7] for s in dico_cste["pattern_download"]) \
123
                                        and not any(s in infos[7] for s in dico_cste["sia"]) and not any(s in infos[7] for s in dico_cste["CutFits"]) and gi.country_code_by_addr(infos[2]) != "CN" :
124
                                    nb_download += 1
125
                                    try:
126
                                        vol_download += float(infos[10])
127
                                    except:
128
                                        pass
129
                                    ################################ DOWNLOAD BY PROJECT ##########################
130
                                    if cond_app["is_down_stat_by_project"]:
131
                                        if len(infos) == 16:
132
                                            project_tmp = infos[15].split("/")
133
                                            if project_tmp[0] == "-\n":
134
                                                project = infos[7].split("/")[8]
135
                                            elif len(project_tmp) == 7:
136
                                                project = project_tmp[5]
137
                                            else:
138
                                                project = "others"
139
                                            if project.lower() not in dico_project.keys():
140
                                                project = "others"
141
                                        else:
142
                                            if infos[15] != "-\n":
143
                                                project = "others"
144
                                            else:
145
                                                if len(infos[7].split("/")) < 8:
146
                                                    project = "others"
147
                                                else:
148
                                                    project_tmp2 = infos[7].split("/")[8]
149
                                                    if project_tmp2.lower() not in dico_project.keys():
150
                                                        project = "others"
151
                                                    else:
152
                                                        project = project_tmp2
153
                                        try:
154
                                            if dico_project[project.lower()] not in total_down_per_project.keys():
155
                                                total_down_per_project[dico_project[project.lower()]] = float(infos[10])
156
                                            else:
157
                                                total_down_per_project[dico_project[project.lower()]] += float(infos[10])
158
                                        except:
159
                                            pass
160
                                    ############################### DOWNLOAD BY MONTH ###############################
161
                                    if cond_app["is_down_stat_by_months"]:
162
                                            try:
163
                                                if (my_time.month, my_time.year) not in total_down_by_months.keys():
164
                                                    total_down_by_months[my_time.month, my_time.year] = float(infos[10])
165
                                                    total_down_during_period += float(infos[10])
166
                                                else:
167
                                                    total_down_by_months[my_time.month, my_time.year] += float(infos[10])
168
                                                    total_down_during_period += float(infos[10])
169
                                            except:
170
                                                pass
171
                                    ############################# DOWNLOAD BY COUNTRY ##################################
172
                                    if cond_app["is_down_stat_by_country"]:
173
                                        try:
174
                                            if gi.country_code_by_addr(infos[2]) not in total_down_per_country.keys():
175
                                                total_down_per_country[gi.country_code_by_addr(infos[2])] = float(infos[10])
176
                                            else:
177
                                                total_down_per_country[gi.country_code_by_addr(infos[2])] += float(infos[10])
178
                                        except:
179
                                            pass
180
                                    ######################### DOWNLOAD BY IP ##########################################
181
                                    if cond_app["is_down_stat_by_ip"]:
182
                                        try:
183
                                            if infos[2] not in total_down_per_ip.keys():
184
                                                total_down_per_ip[infos[2]] = float(infos[10])
185
                                            else:
186
                                                total_down_per_ip[infos[2]] += float(infos[10])
187
                                        except:
188
                                            pass
189
                                    ######################### DOWNLOAD BY USER #######################################
190
                                    if cond_app["is_down_stat_by_user"]:
191
                                        pass
192
                                # AJOUT D'UNE CONDITION SUR 127.0.0.1 POUR EXCLURE HOBBIT\
193
                                #                            PENDANT UNE PERIODE OU ON AVAIT PLUS ACCES AU "VRAI" IP
194
                                #--------------------------------------------------------------------------------------
195
                                # ***************************   STATISTIQUES SUR LES ACCESS ****************************
196
                                #---------------------------------------------------------------------------------------
197
                                if not infos[2] == "127.0.0.1" and re.match(reg_infos_url, infos[2]) \
198
                                        is not None and gi.country_code_by_addr(infos[2]) != "CN":
199
                                    #*********************************  ACCESS By IP *******************************
200
                                    if cond_app["is_access_access_by_ip"]:
201
                                        if dico_cste["patternAccessToSeek"] == infos[7] and \
202
                                                gi.country_code_by_addr(infos[2]) != "CN":
203
                                            # incrementing of number access
204
                                            nb_access_ip += 1
205
                                            if infos[2] not in dico_access_ip.keys():
206
                                                dico_access_ip[infos[2]] = 1
207
                                            else:
208
                                                dico_access_ip[infos[2]] += 1
209
                                    #*********************************  ACCESS By COUNTRY *************************
210
                                    if cond_app["is_access_access_by_country"]:
211
                                        if dico_cste["patternAccessToSeek"] == infos[7]\
212
                                                and gi.country_code_by_addr(infos[2]) != "CN":
213
                                            if re.match(reg_infos_url, infos[2]) is not None:
214
                                                if gi.country_name_by_name(infos[2]) not in dico_access_by_country.keys():
215
                                                    dico_access_by_country[gi.country_name_by_name(infos[2])] = 1
216
                                                else:
217
                                                    dico_access_by_country[gi.country_name_by_name(infos[2])] += 1
218
                                    #*********************************  ACCESS By MONTH ******************************
219
                                    if cond_app["is_access_access_by_months"]:
220
                                        if dico_cste["patternAccessToSeek"] == infos[7]\
221
                                                and gi.country_code_by_addr(infos[2]) != "CN":
222
                                            if (my_time.month, my_time.year) not in dico_access_by_month.keys():
223
                                                dico_access_by_month[my_time.month, my_time.year] = 1
224
                                            else:
225
                                                dico_access_by_month[my_time.month, my_time.year] += 1
226
                                    #*********************************  ACCESS VO By IP *******************************
227
                                '''
228
                                if dico_cste["patternVoConeSearchToSeek"] == infos[7] and \
229
                                                dico_cste["patternVoConeSearchToSeek"] != "":
230
                                    # incrementing of number access
231
                                    nb_vo_ip += 1
232

233
                                    if infos[2] not in dico_vo_ip.keys():
234
                                        dico_vo_ip[infos[2]] = 1
235
                                    else:
236
                                        dico_vo_ip[infos[2]] += 1
237
                                    if gi.country_name_by_name(infos[2]) not in dico_vo_by_country.keys():
238
                                        dico_vo_by_country[gi.country_name_by_name(infos[2])] = 1
239
                                    else:
240
                                        dico_vo_by_country[gi.country_name_by_name(infos[2])] += 1
241
                                '''
242
                    # Closing the log file
243
                    fileopen.close()
244

    
245
        # Creation des graphs :
246
        date_title_chart = ""
247
        if cond_app["period_date"]:
248
            date_title_chart = " entre le "+str(start_date)+" et le "+str(end_date)
249

    
250
        if cond_app["is_access_access_by_ip"]:
251
            dico_tmp_access_ip = reduce_dico(dico_access_ip,  dico_cste["dico_max_per_cent"]["is_access_access_by_ip"])
252
            access_ip_chart_svg_png = svg_chart(dico_tmp_access_ip, "Acces par IP"+str(date_title_chart))
253
        else:
254
            access_ip_chart_svg_png = None
255
        if cond_app["is_access_access_by_country"]:
256
            dico_tmp_access_country = reduce_dico(dico_access_by_country,
257
                                                  dico_cste["dico_max_per_cent"]["is_access_access_by_country"])
258
            access_country_chart_svg_png = svg_chart(dico_tmp_access_country, "Acces par pays"+str(date_title_chart))
259
        else:
260
            access_country_chart_svg_png = None
261
        if cond_app["is_access_access_by_months"]:
262
            access_month_chart_svg_png = svg_bar_chart(\
263
                dico_access_by_month, start_date, end_date, "Acces par mois"+str(date_title_chart))
264
        else:
265
            access_month_chart_svg_png = None
266
        if cond_app["is_down_stat_by_project"]:
267
            total_down_per_project_chart = svg_chart(reduce_dico(total_down_per_project,
268
                                                     dico_cste["dico_max_per_cent"]["is_down_stat_by_project"]),
269
                                                     "Volume de telechargement par projet"+str(date_title_chart))
270
        else:
271
            total_down_per_project_chart = None
272
        if cond_app["is_down_stat_by_ip"]:
273
            total_down_per_ip_chart = svg_chart(reduce_dico(total_down_per_ip,
274
                                                dico_cste["dico_max_per_cent"]["is_down_stat_by_ip"]),
275
                                                "Volume de telechargement par IP"+str(date_title_chart))
276
        else:
277
            total_down_per_ip_chart = None
278
        if cond_app["is_down_stat_by_months"]:
279
            total_down_per_month_chart = svg_bar_chart(\
280
                total_down_by_months, start_date, end_date,"Volume de telechargement par mois"+str(date_title_chart))
281
        else:
282
            total_down_per_month_chart = None
283
        if cond_app["is_down_stat_by_country"]:
284
            total_down_per_country_chart = svg_chart(reduce_dico(total_down_per_country,
285
                                                     dico_cste["dico_max_per_cent"]["is_down_stat_by_country"]),
286
                                                     "Volume de telechargement par Pays"+str(date_title_chart))
287
        else:
288
            total_down_per_country_chart = None
289

    
290
        return {"nb_download": nb_download, "vol_download": format_download_vol(vol_download), "nb_access_ip":
291
                nb_access_ip, "access_ip_chart_svg_png": access_ip_chart_svg_png, "access_country_chart_svg_png":
292
                access_country_chart_svg_png,"access_month_chart_svg_png": access_month_chart_svg_png,
293
                "total_down_per_project_chart": total_down_per_project_chart, "total_down_per_month_chart":
294
                total_down_per_month_chart, "total_down_per_country_chart": total_down_per_country_chart,
295
                "total_down_per_ip_chart": total_down_per_ip_chart, "dico_cste": dico_cste}
296

    
297
    except BaseException, e:
298
        print "Error : ", str(e)
299

    
300

    
301
def refactor_dico(dico, max):
302

    
303
    total = float(sum(dico.values()))
304
    dico2 = {}
305
    for key in dico:
306
        #print float(dico[key]/total)
307
        #print "dico[key] : ", dico[key], "     float(dico[key]/total) : ", float(dico[key])/total
308
        if float((dico[key])/total) > max:
309
            dico2[key] = dico[key]
310
    print dico2
311
    return dico2
312

    
313

    
314
def format_download_vol(vol_down):
315
    vol_data = ""
316
    '''
317
    try:
318
        if 1024 <= vol_down < 1048576:
319
            vol_data = str(round(vol_down/1024, 2))+" Ko"
320
        elif 1048576 <= vol_down < 1073741824:
321
            vol_data = str(round(vol_down/1048576, 2))+" Mo"
322
        elif 1073741824 <= vol_down < (1073741824*1024):
323
            vol_data = str(round(vol_down/1073741824, 2))+" Go"
324
    except BaseException, e:
325
        print str(e)
326
    '''
327
    try:
328
        if 1000 <= vol_down < 1000000:
329
            vol_data = str(round(vol_down/1000, 2))+" Ko"
330
        elif 1000000 <= vol_down < 1000000000:
331
            vol_data = str(round(vol_down/1000000, 2))+" Mo"
332
        elif 1000000000 <= vol_down < (1000000000*1000):
333
            vol_data = str(round(vol_down/1000000000, 2))+" Go"
334
    except BaseException, e:
335
        print str(e)
336
    return vol_data
337