Package pywurfl :: Package algorithms :: Package wurfl :: Module normalizers
[hide private]
[frames] | no frames]

Source Code for Module pywurfl.algorithms.wurfl.normalizers

  1  # pywurfl - Wireless Universal Resource File Tools in Python 
  2  # Copyright (C) 2006-2011 Armand Lynch 
  3  # 
  4  # This library is free software; you can redistribute it and/or modify it 
  5  # under the terms of the GNU Lesser General Public License as published by the 
  6  # Free Software Foundation; either version 2.1 of the License, or (at your 
  7  # option) any later version. 
  8  # 
  9  # This library is distributed in the hope that it will be useful, but WITHOUT 
 10  # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
 11  # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 
 12  # details. 
 13  # 
 14  # You should have received a copy of the GNU Lesser General Public License 
 15  # along with this library; if not, write to the Free Software Foundation, Inc., 
 16  # 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 
 17  # 
 18  # Armand Lynch <lyncha@users.sourceforge.net> 
 19   
 20  __doc__ = """ 
 21  This module contains the supporting classes for the Two Step Analysis user agent 
 22  algorithm that is used as the primary way to match user agents with the Java API 
 23  for the WURFL. 
 24   
 25  A description of the way the following source is intended to work can be found 
 26  within the source for the original Java API implementation here: 
 27  http://sourceforge.net/projects/wurfl/files/WURFL Java API/ 
 28   
 29  The original Java code is GPLd and Copyright (c) WURFL-Pro srl 
 30  """ 
 31   
 32  __author__ = "Armand Lynch <lyncha@users.sourceforge.net>" 
 33  __copyright__ = "Copyright 2011, Armand Lynch" 
 34  __license__ = "LGPL" 
 35  __url__ = "http://celljam.net/" 
 36  __version__ = "1.2.1" 
 37   
 38  import re 
 39   
 40  #from pywurfl.algorithms.wurfl import utils 
 41   
 42   
 43  # generic user agent normalizers 
 44   
 45  babel_fish_re = re.compile(ur"\s*\(via babelfish.yahoo.com\)\s*", re.UNICODE) 
 46  uplink_re = re.compile(ur"\s*UP\.Link.+$", re.UNICODE) 
 47  yeswap_re = re.compile(ur"\s*Mozilla/4\.0 \(YesWAP mobile phone proxy\)", 
 48                         re.UNICODE) 
 49  safari_re = re.compile(ur"(Mozilla\/5\.0.*)(\;\s*U\;.*?)(Safari\/\d{0,3})", 
 50                         re.UNICODE) 
 51  locale_re = re.compile(ur"(; [a-z]{2}(-[a-zA-Z]{0,2})?)", re.UNICODE) 
 52  serial_number_re = re.compile(ur"(\[(TF|NT|ST)[\d|X]+\])|(\/SN[\d|X]+)", 
 53                                re.UNICODE) 
 54  android_re = re.compile(ur"(Android[\s/]\d.\d)(.*?;)", re.UNICODE) 
 55  konqueror_re = re.compile(ur"(Konqueror\/\d)", re.UNICODE) 
56 #ibm_wbi_re = re.compile(ur"\(via IBM WBI \d+\.\d+\)", re.UNICODE) 57 #novarra_google_re = re.compile(ur"(\sNovarra-Vision.*)|(,gzip\(gfe\)\s+\(via translate.google.com\))", re.UNICODE) 58 #gmcc_re = re.compile(ur"GMCC/\d\.\d", re.UNICODE) 59 #lguplus_re = re.compile(ur"Mozilla.*lgtelecom;.*;(.*);.*", re.UNICODE) 60 61 62 -def babelfish(user_agent):
63 """Replace the "via babelfish.yahoo.com" with ''""" 64 #print "normalizer babelfish" 65 return babel_fish_re.sub('', user_agent)
66
67 68 -def blackberry(user_agent):
69 """ Replaces the heading "BlackBerry" string with ''""" 70 #print "normalizer blackberry" 71 try: 72 index = user_agent.index(u"BlackBerry") 73 if u"AppleWebKit" not in user_agent: 74 return user_agent[index:] 75 except ValueError: 76 pass 77 return user_agent
78 84
85 86 -def yeswap(user_agent):
87 """Replace the "YesWAP mobile phone proxy" with ''""" 88 #print "normalizer yeswap" 89 return yeswap_re.sub('', user_agent)
90
91 92 -def locale_remover(user_agent):
93 return locale_re.sub('', user_agent, 1)
94
95 96 -def serial_no(user_agent):
97 return serial_number_re.sub("", user_agent, 1)
98
99 100 -def _combine_funcs(*funcs):
101 def normalizer(user_agent): 102 #print "applying default normalizer" 103 for f in funcs: 104 user_agent = f(user_agent) 105 return user_agent.replace(' ', ' ').strip()
106 return normalizer 107 108 109 generic = _combine_funcs(serial_no, blackberry, uplink, yeswap, babelfish, 110 locale_remover)
111 112 113 -def prenormalized(normalizer_func):
114 def combined_normalizer(user_agent): 115 user_agent = generic(user_agent) 116 return normalizer_func(user_agent)
117 combined_normalizer.__doc__ = normalizer_func.__doc__ 118 return combined_normalizer 119
120 121 # specific user agent normalizers 122 123 -def _specific_normalizer(user_agent, search_string, vsn_size):
124 if search_string in user_agent: 125 start = user_agent.index(search_string) 126 user_agent = user_agent[start:start + vsn_size] 127 return user_agent
128
129 130 @prenormalized 131 -def chrome(user_agent):
132 #print "chrome normalizer" 133 return _specific_normalizer(user_agent, u"Chrome", 8)
134
135 136 @prenormalized 137 -def firefox(user_agent):
138 #print "firefox normalizer" 139 return _specific_normalizer(user_agent, u"Firefox", 11)
140
141 142 @prenormalized 143 -def konqueror(user_agent):
144 #print "konqueror normalizer" 145 match = konqueror_re.search(user_agent) 146 if match: 147 user_agent = match.group(1) 148 return user_agent
149
150 151 @prenormalized 152 -def msie(user_agent):
153 #print "msie normalizer" 154 if u"MSIE" in user_agent: 155 user_agent = user_agent[0:user_agent.index(u"MSIE")+9] 156 return user_agent
157
158 159 @prenormalized 160 -def safari(user_agent):
161 """ 162 Return the safari user agent stripping out all the characters between 163 U; and Safari/xxx 164 165 e.g Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_4_11; fr) AppleWebKit/525.18 (KHTML, like Gecko) Version/3.1.1 Safari/525.18 166 becomes 167 Mozilla/5.0 (Macintosh Safari/525 168 """ 169 #print "safari normalizer" 170 match = safari_re.search(user_agent) 171 if match and len(match.groups()) >= 3: 172 user_agent = " ".join([match.group(1).strip(), match.group(3).strip()]) 173 return user_agent
174
175 176 @prenormalized 177 -def lg(user_agent):
178 try: 179 lg_index = user_agent.index(u"LG") 180 return user_agent[lg_index:] 181 except ValueError: 182 return user_agent
183
184 185 @prenormalized 186 -def maemo(user_agent):
187 try: 188 maemo_index = user_agent.index(u"Maemo") 189 return user_agent[maemo_index:] 190 except ValueError: 191 return user_agent
192
193 194 @prenormalized 195 -def android(user_agent):
196 #print "android normalizer" 197 match = android_re.search(user_agent) 198 if match: 199 user_agent = android_re.sub(match.group(1) + ";", user_agent) 200 return user_agent
201 202 203 #def novarra_google_translator_remover(user_agent): 204 # return novarra_google_re.sub('', user_agent, 1) 205 206 #def opera(user_agent): 207 # #print "opera normalizer" 208 # return _specific_normalizer(user_agent, u"Opera", 7) 209 210 211 #def lguplus(user_agent): 212 # match = lguplus_re.search(user_agent) 213 # if match: 214 # user_agent = match.group(1) 215 # return user_agent 216 217 #def ibm_wbi(user_agent): 218 # #print "normalizer ibm_wbi" 219 # return ibm_wbi_re.sub('', user_agent) 220 221 #def gmcc(user_agent): 222 # #print "normalizer gmcc" 223 # return gmcc_re.sub('', user_agent) 224