Project

General

Profile

Download (18.3 KB) Statistics
| Branch: | Revision:

git_sitools_idoc / webstatDev / idoc_Maison / webstat / libStatSitools.COPIE.py @ master

1 8792cad8 Marc NICOLAS
__version__ = "0.1"
2
__license__ = "GPL"
3
__author__ = "Marc NICOLAS"
4
__credit__ = "Marc NICOLAS"
5
__maintainer__ = "Marc NICOLAS"
6
__email__ = "marc.nicolas@ias.u-psud.fr"
7
8
import sys, os
9
10
try:
11
    import copy
12
except:
13
    sys.exit("Import failed in module libStatSitools :\n\tcopy module is required")
14
try:
15
    import pygeoip
16
except:
17
    sys.exit("Import failed in module libStatSitools :\n\tpygeoip module is required")
18
try:
19
    from datetime import datetime, timedelta
20
except:
21
    sys.exit("Import failed in module libStatSitools :\n\tdatetime module is required")
22
try:
23
    import os
24
except:
25
    sys.exit("Import failed in module libStatSitools :\n\tos module is required")
26
try:
27
    import string
28
except:
29
    sys.exit("Import failed in module libStatSitools :\n\tstring module is required")
30
try:
31
    import time
32
except:
33
    sys.exit("Import failed in module libStatSitools :\n\ttime module is required")
34
try:
35
    import copy
36
except:
37
    sys.exit("Import failed in module libStatSitools :\n\tcopy module is required")
38
try:
39
    from libChart4StatSitools2 import *
40
except:
41
    sys.exit("Import failed in module lancementStat :\n\tlibChart4StatSitools2 module is required")
42
try:
43
    from libStatSitoolsConstante import *
44
except:
45
    sys.exit("Import failed in module lancementStat :\n\tlibStatSitoolsConstante module is required")
46
try:
47
    import re
48
except:
49
    sys.exit("Import failed in module lancementStat :\n\tre module is required")
50
try:
51
    import operator
52
except:
53
    sys.exit("Import failed in module lancementStat :\n\toperator module is required")
54
try:
55
    # PATH FOR PROD
56
    #gi = pygeoip.GeoIP("/usr/local/sitools2_webstat/webstat/GeoIP.dat")
57
    # PATH FOR DEV
58
    gi = pygeoip.GeoIP("/home/marc/Projet/idoc_Maison/webstat/GeoIP.dat")
59
except:
60
    sys.exit("Import of GeoIP.dat failed. Verify that the GeoIP file exists !")
61
62
63
time_format = "%Y-%m-%d %H:%M:%S"
64
time_format_for_by_month = "%Y-%m-%d"
65
66
67
def reduce_dico(dico, max_per_cent):
68
    dico2 = refactor_dico(dico, max_per_cent)
69
    return dico2
70
71
72
def stat_log(application, cond_app):
73
    try:
74
        dico_cste = defCst(application)
75
76
        dico_project = dico_cste["dicoProject"]
77
        list_path_logs = dico_cste["listPathLogs"]
78
        start_date = cond_app["start_date"]
79
        end_date = cond_app["end_date"]
80
81
        #if cond_app["is_access_access_by_country"]:
82
        reg_infos_url = re.compile(ur'\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3}')
83
84
        #************************* VARIABLE POUR LES STAT D'ACCES *****************************************
85
        dico_access_ip = {}
86
        dico_access_by_country = {}
87
        dico_access_by_month = {}
88
        nb_access_ip = 0
89
90
        dico_vo_ip = {}
91
        dico_vo_by_country = {}
92
        nb_vo_ip = 0
93
94
        #************************* VARIABLE POUR LES STAT DE DOWNLOAD *****************************************
95
        vol_download = 0
96
        nb_download = 0
97
        total_down_by_months = {}
98
        total_down_during_period = 0
99
        total_down_per_country = {}
100
        total_down_per_project = {}
101
        total_down_per_user = {}
102
        total_down_per_ip = {}
103
104
        for pathLog in list_path_logs:
105
            listarbo = os.listdir(pathLog)
106
            print "PathLog : ",pathLog
107
        for fileToOpen in listarbo:
108
            pathfile = pathLog + "/" + fileToOpen
109
            if os.path.isfile(pathfile) and os.path.basename(fileToOpen).split(".")[1] == "log" and \
110
                os.path.getsize(pathfile) != 0  and "sitools-log-service" in fileToOpen:
111
                print "Traitement de ",pathfile
112
                print  "taille : ",os.path.getsize(pathfile)
113
                fileopen = open(pathfile, "r")
114
                lines = fileopen.readlines()
115
                for line in lines:
116
                    infos = line.split("\t")
117
                    my_time = datetime.strptime(infos[0].split(" ")[0][:len(infos[0].split(" ")[0]) - 4].replace("T", " ").split(" ")[0], time_format_for_by_month).date()
118
                    if len(infos) > 14 and infos[2] not in dico_cste["listIpToExclude"] and dico_cste["patternBot"] not in infos[14] and dico_cste["patternYahoo"] not in infos[14] \
119
                        and not any(s in infos[14] for s in dico_cste["patternBaidu"]):
120
                        if start_date < my_time < end_date:
121
                        #----------------------------------------------------------------------------------------------
122
                        #  **************************** STATISTIQUE SUR LES DOWNLOAD *******************************
123
                        #-----------------------------------------------------------------------------------------------
124
                            if len(infos) == 16 and infos[6] == dico_cste["pattern_http_method"] and infos[9] == dico_cste["pattern_code_retour"] and any(s in infos[7] for s in dico_cste["pattern_download"]) \
125
                                and not any(s in infos[7] for s in dico_cste["sia"]) and not any(s in infos[7] for s in dico_cste["CutFits"]) and gi.country_code_by_addr(infos[2]) != "CN" :
126
                                    nb_download += 1
127
                                    try:
128
                                        vol_download += float(infos[10])
129
                                    except:
130
                                        pass
131
                                    ################################ DOWNLOAD BY PROJECT ##########################
132
                                    if cond_app["is_down_stat_by_project"]:
133
                                        if len(infos) == 16:
134
                                            project_tmp = infos[15].split("/")
135
                                            if project_tmp[0] == "-\n":
136
                                                project = infos[7].split("/")[8]
137
                                            elif len(project_tmp) == 7:
138
                                                project = project_tmp[5]
139
                                            else:
140
                                                project = "others"
141
                                            if project.lower() not in dico_project.keys():
142
                                                project = "others"
143
                                        else:
144
                                            if infos[15] != "-\n":
145
                                                project = "others"
146
                                            else:
147
                                                if len(infos[7].split("/")) < 8:
148
                                                    project = "others"
149
                                                else:
150
                                                    project_tmp2 = infos[7].split("/")[8]
151
                                                    #print "project_tmp2 : ",project_tmp2
152
                                                    if project_tmp2.lower() not in dico_project.keys():
153
                                                        project = "others"
154
                                                    else:
155
                                                        project = project_tmp2
156
                                        try:
157
                                            if dico_project[project.lower()] not in total_down_per_project.keys():
158
                                                total_down_per_project[dico_project[project.lower()]] = float(infos[10])
159
                                            else:
160
                                                total_down_per_project[dico_project[project.lower()]] += float(infos[10])
161
                                        except:
162
                                            pass
163
                                    ############################### DOWNLOAD BY MONTH ###############################
164
                                    if cond_app["is_down_stat_by_months"]:
165
                                            try:
166
                                                if (my_time.month, my_time.year) not in total_down_by_months.keys():
167
                                                    total_down_by_months[my_time.month, my_time.year] = float(infos[10])
168
                                                    total_down_during_period += float(infos[10])
169
                                                else:
170
                                                    total_down_by_months[my_time.month, my_time.year] += float(infos[10])
171
                                                    total_down_during_period += float(infos[10])
172
                                            except:
173
                                                pass
174
                                    ############################# DOWNLOAD BY COUNTRY ##################################
175
                                    if cond_app["is_down_stat_by_country"]:
176
                                        try:
177
                                            if gi.country_code_by_addr(infos[2]) not in total_down_per_country.keys():
178
                                                total_down_per_country[gi.country_code_by_addr(infos[2])] = float(infos[10])
179
                                            else:
180
                                                total_down_per_country[gi.country_code_by_addr(infos[2])] += float(infos[10])
181
                                        except:
182
                                            pass
183
                                    ######################### DOWNLOAD BY IP ##########################################
184
                                    if cond_app["is_down_stat_by_ip"]:
185
                                        try:
186
                                            if infos[2] not in total_down_per_ip.keys():
187
                                                total_down_per_ip[infos[2]] = float(infos[10])
188
                                            else:
189
                                                total_down_per_ip[infos[2]] += float(infos[10])
190
                                        except:
191
                                            pass
192
                                    ######################### DOWNLOAD BY USER #######################################
193
                                    if cond_app["is_down_stat_by_user"]:
194
                                        pass
195
                            # AJOUT D'UNE CONDITION SUR 127.0.0.1 POUR EXCLURE HOBBIT PENDANT UNE PERIODE OU ON AVAIT PLUS ACCES AU "VRAI" IP
196
                            # --------------------------------------------------------------------------------------
197
                            # ***************************************************   STATISTIQUES SUR LES ACCESS ****
198
199
                            if not infos[2] == "127.0.0.1" and re.match(reg_infos_url, infos[2]) is not None and gi.country_code_by_addr(infos[2]) != "CN":
200
                                #*********************************  ACCESS By IP ******************************************
201
                                if cond_app["is_access_access_by_ip"]:
202
                                    if dico_cste["patternAccessToSeek"] == infos[7]:# and gi.country_code_by_addr(infos[2]) != "CN":
203
                                        # incrementing of number access
204
                                        nb_access_ip += 1
205
                                        if infos[2] not in dico_access_ip.keys():
206
                                            dico_access_ip[infos[2]] = 1
207
                                        else:
208
                                            dico_access_ip[infos[2]] += 1
209
                                #*********************************  ACCESS By COUNTRY *************************************
210
                                if cond_app["is_access_access_by_country"]:
211
                                    if dico_cste["patternAccessToSeek"] == infos[7]:# and gi.country_code_by_addr(infos[2]) != "CN":
212
                                        if re.match(reg_infos_url, infos[2]) is not None:
213
                                            if gi.country_name_by_name(infos[2]) not in dico_access_by_country.keys():
214
                                                dico_access_by_country[gi.country_name_by_name(infos[2])] = 1
215
                                            else:
216
                                                dico_access_by_country[gi.country_name_by_name(infos[2])] += 1
217
                                #*********************************  ACCESS By MONTH ***************************************
218
                                if cond_app["is_access_access_by_months"]:
219
                                    if dico_cste["patternAccessToSeek"] == infos[7]:# and gi.country_code_by_addr(infos[2]) != "CN":
220
                                        if (my_time.month, my_time.year) not in dico_access_by_month.keys():
221
                                            dico_access_by_month[my_time.month, my_time.year] = 1
222
                                        else:
223
                                            dico_access_by_month[my_time.month, my_time.year] += 1
224
                                #*********************************  ACCESS VO By IP ****************************************
225
                            '''
226
                            if dico_cste["patternVoConeSearchToSeek"] == infos[7] and \
227
                                            dico_cste["patternVoConeSearchToSeek"] != "":
228
                                # incrementing of number access
229
                                nb_vo_ip += 1
230

231
                                if infos[2] not in dico_vo_ip.keys():
232
                                    dico_vo_ip[infos[2]] = 1
233
                                else:
234
                                    dico_vo_ip[infos[2]] += 1
235
                                if gi.country_name_by_name(infos[2]) not in dico_vo_by_country.keys():
236
                                    dico_vo_by_country[gi.country_name_by_name(infos[2])] = 1
237
                                else:
238
                                    dico_vo_by_country[gi.country_name_by_name(infos[2])] += 1
239
                            '''
240
                    # Closing the log file
241
                    fileopen.close()
242
243
        # Creation des graphs :
244
        date_title_chart = ""
245
        if cond_app["period_date"]:
246
            date_title_chart = " entre le "+str(start_date)+" et le "+str(end_date)
247
        
248
        if cond_app["is_access_access_by_ip"]:
249
            dico_tmp_access_ip = reduce_dico(dico_access_ip,  dico_cste["dico_max_per_cent"]["is_access_access_by_ip"])
250
            access_ip_chart_svg_png = svg_chart(dico_tmp_access_ip, "Acces par IP"+str(date_title_chart))
251
        else:
252
            access_ip_chart_svg_png = None
253
        if cond_app["is_access_access_by_country"]:
254
            dico_tmp_access_country = reduce_dico(dico_access_by_country,
255
                                                  dico_cste["dico_max_per_cent"]["is_access_access_by_country"])
256
            access_country_chart_svg_png = svg_chart(dico_tmp_access_country, "Acces par pays"+str(date_title_chart))
257
        else:
258
            access_country_chart_svg_png = None
259
        if cond_app["is_access_access_by_months"]:
260
            access_month_chart_svg_png = svg_bar_chart(dico_access_by_month, start_date, end_date, "Acces par mois"+str(date_title_chart))
261
        else:
262
            access_month_chart_svg_png = None
263
        if cond_app["is_down_stat_by_project"]:
264
            total_down_per_project_chart = svg_chart(reduce_dico(total_down_per_project,
265
                                                     dico_cste["dico_max_per_cent"]["is_down_stat_by_project"]),
266
                                                     "Volume de telechargement par projet"+str(date_title_chart))
267
        else:
268
            total_down_per_project_chart = None
269
        if cond_app["is_down_stat_by_ip"]:
270
            total_down_per_ip_chart = svg_chart(reduce_dico(total_down_per_ip,
271
                                                dico_cste["dico_max_per_cent"]["is_down_stat_by_ip"]),
272
                                                "Volume de telechargement par IP"+str(date_title_chart))
273
        else:
274
            total_down_per_ip_chart = None
275
        if cond_app["is_down_stat_by_months"]:
276
            total_down_per_month_chart = svg_bar_chart(total_down_by_months, start_date, end_date,"Volume de telechargement par mois"+str(date_title_chart))
277
        else:
278
            total_down_per_month_chart = None
279
        if cond_app["is_down_stat_by_country"]:
280
            total_down_per_country_chart = svg_chart(reduce_dico(total_down_per_country,
281
                                                     dico_cste["dico_max_per_cent"]["is_down_stat_by_country"]),
282
                                                     "Volume de telechargement par Pays"+str(date_title_chart))
283
        else:
284
            total_down_per_country_chart = None
285
286
        return {"nb_download": nb_download, "vol_download": format_download_vol(vol_download), "nb_access_ip":
287
                nb_access_ip, "access_ip_chart_svg_png": access_ip_chart_svg_png, "access_country_chart_svg_png":
288
                access_country_chart_svg_png,"access_month_chart_svg_png": access_month_chart_svg_png,
289
                "total_down_per_project_chart": total_down_per_project_chart, "total_down_per_month_chart":
290
                total_down_per_month_chart, "total_down_per_country_chart": total_down_per_country_chart,
291
                "total_down_per_ip_chart": total_down_per_ip_chart, "dico_cste": dico_cste}
292
293
    except BaseException, e:
294
        print "Error : ", str(e)
295
296
297
def refactor_dico(dico, max):
298
299
    total = float(sum(dico.values()))
300
    dico2 = {}
301
    for key in dico:
302
        #print float(dico[key]/total)
303
        #print "dico[key] : ", dico[key], "     float(dico[key]/total) : ", float(dico[key])/total
304
        if float((dico[key])/total) > max:
305
            dico2[key] = dico[key]
306
    print dico2
307
    return dico2
308
309
310
def format_download_vol(vol_down):
311
    vol_data = ""
312
    '''
313
    try:
314
        if 1024 <= vol_down < 1048576:
315
            vol_data = str(round(vol_down/1024, 2))+" Ko"
316
        elif 1048576 <= vol_down < 1073741824:
317
            vol_data = str(round(vol_down/1048576, 2))+" Mo"
318
        elif 1073741824 <= vol_down < (1073741824*1024):
319
            vol_data = str(round(vol_down/1073741824, 2))+" Go"
320
    except BaseException, e:
321
        print str(e)
322
    '''
323
    try:
324
        if 1000 <= vol_down < 1000000:
325
            vol_data = str(round(vol_down/1000, 2))+" Ko"
326
        elif 1000000 <= vol_down < 1000000000:
327
            vol_data = str(round(vol_down/1000000, 2))+" Mo"
328
        elif 1000000000 <= vol_down < (1000000000*1000):
329
            vol_data = str(round(vol_down/1000000000, 2))+" Go"
330
    except BaseException, e:
331
        print str(e)
332
    return vol_data
333