Package pywurfl :: Package algorithms
[hide private]
[frames] | no frames]

Source Code for Package pywurfl.algorithms

  1  # pywurfl Algorithms - Wireless Universal Resource File UA search algorithms 
  2  # Copyright (C) 2006-2011 Armand Lynch 
  3  # 
  4  # This library is free software; you can redistribute it and/or modify it 
  5  # under the terms of the GNU Lesser General Public License as published by the 
  6  # Free Software Foundation; either version 2.1 of the License, or (at your 
  7  # option) any later version. 
  8  # 
  9  # This library is distributed in the hope that it will be useful, but WITHOUT 
 10  # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
 11  # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 
 12  # details. 
 13  # 
 14  # You should have received a copy of the GNU Lesser General Public License 
 15  # along with this library; if not, write to the Free Software Foundation, Inc., 
 16  # 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 
 17  # 
 18  # Armand Lynch <lyncha@users.sourceforge.net> 
 19   
 20  __doc__ = \ 
 21  """ 
 22  pywurfl search algorithms 
 23  """ 
 24   
 25  import re 
 26   
 27  from pywurfl.exceptions import DeviceNotFound 
 28  from pywurfl.algorithms.wurfl.handlers import handlers 
 29   
 30  import Levenshtein 
 31   
 32   
 33  __author__ = "Armand Lynch <lyncha@users.sourceforge.net>" 
 34  __copyright__ = "Copyright 2006-2011, Armand Lynch" 
 35  __license__ = "LGPL" 
 36  __url__ = "http://celljam.net/" 
 37   
 38   
39 -class Algorithm(object):
40 """ 41 Base class for all pywurfl search algorithms 42 """
43 - def __call__(self, ua, devices):
44 """ 45 Every pywurfl algorithm class must define a __call__ method. 46 47 @param ua: The user agent 48 @type ua: string 49 @param devices: The devices object to search 50 @type devices: Devices 51 @rtype: Device 52 """ 53 raise NotImplementedError
54 55
56 -class JaroWinkler(Algorithm):
57 """ 58 Jaro-Winkler Search Algorithm 59 """ 60
61 - def __init__(self, accuracy=1.0, weight=0.05, use_normalized_ua=True):
62 """ 63 @param accuracy: The tolerance that the Jaro-Winkler algorithm will 64 use to determine if a user agent matches 65 0.0 <= accuracy <= 1.0 66 @type accuracy: float 67 @param weight: The prefix weight is inverse value of common prefix 68 length sufficient to consider the strings 69 'identical' (excerpt from the Levenshtein module 70 documentation). 71 @type weight: float 72 @param use_normalized_ua: Specify whether or not that this algorithm 73 requires a normalized user agent for search. 74 @type use_normalized_ua: bool 75 """ 76 77 self.accuracy = accuracy 78 self.weight = weight 79 self.use_normalized_ua = use_normalized_ua
80
81 - def __call__(self, ua, devices):
82 """ 83 @param ua: The user agent 84 @type ua: string 85 @param devices: The devices object to search 86 @type devices: Devices 87 @rtype: Device 88 @raises pywurfl.DeviceNotFound 89 """ 90 match = max((Levenshtein.jaro_winkler(x, ua, self.weight), x) for 91 x in devices.devuas) 92 if match[0] >= self.accuracy: 93 return devices.devuas[match[1]] 94 else: 95 raise DeviceNotFound(ua)
96 97
98 -class LevenshteinDistance(Algorithm):
99 """ 100 Levenshtein distance Search Algorithm 101 """ 102
103 - def __init__(self, use_normalized_ua=True):
104 """ 105 @param use_normalized_ua: Specify whether or not that this algorithm 106 requires a normalized user agent for search. 107 @type use_normalized_ua: bool 108 """ 109 110 self.use_normalized_ua = use_normalized_ua
111
112 - def __call__(self, ua, devices):
113 """ 114 @param ua: The user agent 115 @type ua: string 116 @param devices: The devices object to search 117 @type devices: Devices 118 @rtype: Device 119 """ 120 121 match = min((Levenshtein.distance(ua, x), x) for x in 122 devices.devuas) 123 return devices.devuas[match[1]]
124 125
126 -class Tokenizer(Algorithm):
127 """ 128 Tokenizer Search Algorithm 129 """ 130 tokenize_chars = ('/', '.', ',', ';', '-', '_', ' ', '(', ')') 131 base_regex = '[\\'+'\\'.join(tokenize_chars)+']*' 132
133 - def __init__(self, devwindow=30, use_normalized_ua=True):
134 """ 135 @param devwindow: If more than devwindow user agents match, 136 return empty device. 137 @type devwindow: integer 138 @param use_normalized_ua: Specify whether or not that this algorithm 139 requires a normalized user agent for search. 140 @type use_normalized_ua: bool 141 """ 142 self.devwindow = devwindow 143 self.use_normalized_ua = use_normalized_ua
144
145 - def _tokenize(self, s):
146 """ 147 @param s: The user agent to tokenize 148 @type s: string 149 """ 150 for d in self.tokenize_chars: 151 s = s.replace(d, ' ') 152 return [re.escape(x) for x in s.split()]
153
154 - def __call__(self, ua, devices):
155 """ 156 @param ua: The user agent 157 @type ua: string 158 @param devices: The devices object to search 159 @type devices: Devices 160 @rtype: Device 161 """ 162 uas = devices.devuas.keys() 163 tokens = self._tokenize(ua) 164 regex = '' 165 for t in tokens: 166 if regex: 167 regex += self.base_regex + t 168 else: 169 regex += t 170 171 regex2 = regex + '.*' 172 173 uare = re.compile(regex2, re.I) 174 uas2 = [x for x in uas if uare.match(x)] 175 176 # If the last regex didn't produce any matches and more than 177 # devwindow devices were matched before, return a generic device. 178 # Else, there is a device that "looks" like some others so return 179 # the first one. 180 if len(uas2) == 0 and len(uas) > self.devwindow: 181 return devices.devids['generic'] 182 elif len(uas2) == 0 and len(uas) <= self.devwindow: 183 #uas.sort() 184 return devices.devuas[uas[0]] 185 186 # We found one good looking match 187 if len(uas2) == 1: 188 #uas2.sort() 189 return devices.devuas[uas2[0]] 190 191 # We've got matches so search some more 192 uas = uas2 193 194 # We've got some matches but we ran out of tokens so search with. 195 # If we matched more than devwindow, return a generic device. 196 # Else we've got some devices within the devwindow so return the first 197 # one. 198 if len(uas2) > self.devwindow: 199 return devices.devids['generic'] 200 else: 201 #uas2.sort() 202 return devices.devuas[uas2[0]]
203 204
205 -class TwoStepAnalysis(Algorithm):
206 """ 207 WURFL Two Step Analysis algorithm based on the Java API implementation 208 """
209 - def __init__(self, devices, use_normalized_ua=False):
210 """ 211 @param use_normalized_ua: Specify whether or not that this algorithm 212 requires a normalized user agent for search. 213 @type use_normalized_ua: bool 214 """ 215 self.use_normalized_ua = use_normalized_ua 216 for ua in devices.uas: 217 for h in handlers: 218 if h.can_handle(ua): 219 h.add(ua, devices.devuas[ua].devid) 220 break
221
222 - def _determine_handler(self, ua):
223 for handler in handlers: 224 if handler.can_handle(ua): 225 #print "Handling with %s" % handler.__class__ 226 return handler
227
228 - def __call__(self, ua, devices=None):
229 handler = self._determine_handler(ua) 230 match = handler(ua) 231 try: 232 return devices.devids[match] 233 except KeyError: 234 # AJL Tue Dec 14 16:08:13 EST 2010 235 # Execution here should not happen unless there is a bug within 236 # the handlers or the WURFL has not been sufficiently patched 237 # (ie. the desktop patch has not been applied). 238 return devices.devids[u"generic"]
239