Package parsedatetime :: Module pdt_locales
[hide private]
[frames] | no frames]

Source Code for Module parsedatetime.pdt_locales

  1  # -*- coding: utf-8 -*- 
  2  """ 
  3  pdt_locales 
  4   
  5  All of the included locale classes shipped with pdt. 
  6  """ 
  7   
  8  import datetime 
  9   
 10  try: 
 11     import PyICU as pyicu 
 12  except: 
 13     pyicu = None 
 14   
 15   
16 -def lcase(x):
17 return x.lower()
18 19
20 -class pdtLocale_base(object):
21 """ 22 default values for Locales 23 """ 24 locale_keys = [ 'MonthOffsets', 'Months', 'WeekdayOffsets', 'Weekdays', 25 'dateFormats', 'dateSep', 'dayOffsets', 'dp_order', 26 'localeID', 'meridian', 'Modifiers', 're_sources', 're_values', 27 'shortMonths', 'shortWeekdays', 'timeFormats', 'timeSep', 'units', 28 'uses24', 'usesMeridian', 'numbers' ] 29
30 - def __init__(self):
31 self.localeID = None # don't use a unicode string 32 self.dateSep = [ '/', '.' ] 33 self.timeSep = [ ':' ] 34 self.meridian = [ 'AM', 'PM' ] 35 self.usesMeridian = True 36 self.uses24 = True 37 38 self.WeekdayOffsets = {} 39 self.MonthOffsets = {} 40 41 # always lowercase any lookup values - helper code expects that 42 self.Weekdays = [ 'monday', 'tuesday', 'wednesday', 43 'thursday', 'friday', 'saturday', 'sunday', 44 ] 45 self.shortWeekdays = [ 'mon', 'tues', 'wed', 46 'th', 'fri', 'sat', 'sun', 47 ] 48 self.Months = [ 'january', 'february', 'march', 49 'april', 'may', 'june', 50 'july', 'august', 'september', 51 'october', 'november', 'december', 52 ] 53 self.shortMonths = [ 'jan', 'feb', 'mar', 54 'apr', 'may', 'jun', 55 'jul', 'aug', 'sep', 56 'oct', 'nov', 'dec', 57 ] 58 # use the same formats as ICU by default 59 self.dateFormats = { 'full': 'EEEE, MMMM d, yyyy', 60 'long': 'MMMM d, yyyy', 61 'medium': 'MMM d, yyyy', 62 'short': 'M/d/yy', 63 } 64 self.timeFormats = { 'full': 'h:mm:ss a z', 65 'long': 'h:mm:ss a z', 66 'medium': 'h:mm:ss a', 67 'short': 'h:mm a', 68 } 69 70 self.dp_order = [ 'm', 'd', 'y' ] 71 72 # Used to parse expressions like "in 5 hours" 73 self.numbers = { 'zero': 0, 'one': 1, 'two': 2, 'three': 3, 'four': 4, 74 'five': 5, 'six': 6, 'seven': 7, 'eight': 8, 'nine': 9, 75 'ten': 10, 'eleven': 11, 'twelve': 12, 'thirteen': 13, 76 'fourteen': 14, 'fifteen': 15, 'sixteen': 16, 77 'seventeen': 17, 'eighteen': 18, 'nineteen': 19, 78 'twenty': 20 } 79 80 81 # this will be added to re_values later 82 self.units = { 'seconds': [ 'second', 'seconds', 'sec', 's' ], 83 'minutes': [ 'minute', 'minutes', 'min', 'm' ], 84 'hours': [ 'hour', 'hours', 'hr', 'h' ], 85 'days': [ 'day', 'days', 'dy', 'd' ], 86 'weeks': [ 'week', 'weeks', 'wk', 'w' ], 87 'months': [ 'month', 'months', 'mth' ], 88 'years': [ 'year', 'years', 'yr', 'y' ], 89 } 90 91 # text constants to be used by later regular expressions 92 self.re_values = { 'specials': 'in|on|of|at', 93 'timeseperator': ':', 94 'rangeseperator': '-', 95 'daysuffix': 'rd|st|nd|th', 96 'meridian': 'am|pm|a.m.|p.m.|a|p', 97 'qunits': 'h|m|s|d|w|y', 98 'now': [ 'now' ], 99 } 100 101 # Used to adjust the returned date before/after the source 102 self.Modifiers = { 'from': 1, 103 'before': -1, 104 'after': 1, 105 'ago': -1, 106 'prior': -1, 107 'prev': -1, 108 'last': -1, 109 'next': 1, 110 'previous': -1, 111 'in a': 2, 112 'end of': 0, 113 'eod': 1, 114 'eom': 1, 115 'eoy': 1, 116 } 117 118 self.dayOffsets = { 'tomorrow': 1, 119 'today': 0, 120 'yesterday': -1, 121 } 122 123 # special day and/or times, i.e. lunch, noon, evening 124 # each element in the dictionary is a dictionary that is used 125 # to fill in any value to be replace - the current date/time will 126 # already have been populated by the method buildSources 127 self.re_sources = { 'noon': { 'hr': 12, 'mn': 0, 'sec': 0 }, 128 'lunch': { 'hr': 12, 'mn': 0, 'sec': 0 }, 129 'morning': { 'hr': 6, 'mn': 0, 'sec': 0 }, 130 'breakfast': { 'hr': 8, 'mn': 0, 'sec': 0 }, 131 'dinner': { 'hr': 19, 'mn': 0, 'sec': 0 }, 132 'evening': { 'hr': 18, 'mn': 0, 'sec': 0 }, 133 'midnight': { 'hr': 0, 'mn': 0, 'sec': 0 }, 134 'night': { 'hr': 21, 'mn': 0, 'sec': 0 }, 135 'tonight': { 'hr': 21, 'mn': 0, 'sec': 0 }, 136 'eod': { 'hr': 17, 'mn': 0, 'sec': 0 }, 137 }
138 139
140 -class pdtLocale_icu(pdtLocale_base):
141 """ 142 Create a locale from pyICU 143 """
144 - def __init__(self, localeID):
145 super( pdtLocale_icu, self ).__init__() 146 147 self.icu = None 148 149 if pyicu is not None: 150 if localeID is None: 151 localeID = 'en_US' 152 self.icu = pyicu.Locale(localeID) 153 154 if self.icu is not None: 155 # grab spelled out format of all numbers from 0 to 100 156 rbnf = pyicu.RuleBasedNumberFormat(pyicu.URBNFRuleSetTag.SPELLOUT, self.icu) 157 try: 158 self.numbers = dict([(rbnf.format(i), i) for i in xrange(0, 100)]) 159 except NameError: 160 self.numbers = dict([(rbnf.format(i), i) for i in range(0, 100)]) 161 162 self.symbols = pyicu.DateFormatSymbols(self.icu) 163 164 # grab ICU list of weekdays, skipping first entry which 165 # is always blank 166 wd = list(map(lcase, self.symbols.getWeekdays()[1:])) 167 swd = list(map(lcase, self.symbols.getShortWeekdays()[1:])) 168 169 # store them in our list with Monday first (ICU puts Sunday first) 170 self.Weekdays = wd[1:] + wd[0:1] 171 self.shortWeekdays = swd[1:] + swd[0:1] 172 self.Months = list(map(lcase, self.symbols.getMonths())) 173 self.shortMonths = list(map(lcase, self.symbols.getShortMonths())) 174 175 self.icu_df = { 'full': pyicu.DateFormat.createDateInstance(pyicu.DateFormat.kFull, self.icu), 176 'long': pyicu.DateFormat.createDateInstance(pyicu.DateFormat.kLong, self.icu), 177 'medium': pyicu.DateFormat.createDateInstance(pyicu.DateFormat.kMedium, self.icu), 178 'short': pyicu.DateFormat.createDateInstance(pyicu.DateFormat.kShort, self.icu), 179 } 180 self.icu_tf = { 'full': pyicu.DateFormat.createTimeInstance(pyicu.DateFormat.kFull, self.icu), 181 'long': pyicu.DateFormat.createTimeInstance(pyicu.DateFormat.kLong, self.icu), 182 'medium': pyicu.DateFormat.createTimeInstance(pyicu.DateFormat.kMedium, self.icu), 183 'short': pyicu.DateFormat.createTimeInstance(pyicu.DateFormat.kShort, self.icu), 184 } 185 self.dateFormats = { 'full': self.icu_df['full'].toPattern(), 186 'long': self.icu_df['long'].toPattern(), 187 'medium': self.icu_df['medium'].toPattern(), 188 'short': self.icu_df['short'].toPattern(), 189 } 190 self.timeFormats = { 'full': self.icu_tf['full'].toPattern(), 191 'long': self.icu_tf['long'].toPattern(), 192 'medium': self.icu_tf['medium'].toPattern(), 193 'short': self.icu_tf['short'].toPattern(), 194 } 195 196 am = '' 197 pm = '' 198 ts = '' 199 200 # ICU doesn't seem to provide directly the date or time seperator 201 # so we have to figure it out 202 o = self.icu_tf['short'] 203 s = self.timeFormats['short'] 204 205 self.usesMeridian = 'a' in s 206 self.uses24 = 'H' in s 207 208 # '11:45 AM' or '11:45' 209 s = o.format(datetime.datetime(2003, 10, 30, 11, 45)) 210 211 # ': AM' or ':' 212 s = s.replace('11', '').replace('45', '') 213 214 if len(s) > 0: 215 ts = s[0] 216 217 if self.usesMeridian: 218 # '23:45 AM' or '23:45' 219 am = s[1:].strip() 220 s = o.format(datetime.datetime(2003, 10, 30, 23, 45)) 221 222 if self.uses24: 223 s = s.replace('23', '') 224 else: 225 s = s.replace('11', '') 226 227 # 'PM' or '' 228 pm = s.replace('45', '').replace(ts, '').strip() 229 230 self.timeSep = [ ts ] 231 self.meridian = [ am, pm ] 232 233 o = self.icu_df['short'] 234 s = o.format(datetime.datetime(2003, 10, 30, 11, 45)) 235 s = s.replace('10', '').replace('30', '').replace('03', '').replace('2003', '') 236 237 if len(s) > 0: 238 ds = s[0] 239 else: 240 ds = '/' 241 242 self.dateSep = [ ds ] 243 s = self.dateFormats['short'] 244 l = s.lower().split(ds) 245 dp_order = [] 246 247 for s in l: 248 if len(s) > 0: 249 dp_order.append(s[:1]) 250 251 self.dp_order = dp_order
252 253
254 -class pdtLocale_en(pdtLocale_base):
255 """ 256 en_US Locale 257 """
258 - def __init__(self):
259 super( pdtLocale_en, self ).__init__() 260 261 self.localeID = 'en_US' # don't use a unicode string 262 self.uses24 = False
263 264
265 -class pdtLocale_au(pdtLocale_base):
266 """ 267 en_AU Locale 268 """
269 - def __init__(self):
270 super( pdtLocale_au, self ).__init__() 271 272 self.localeID = 'en_A' # don't use a unicode string 273 self.dateSep = [ '-', '/' ] 274 self.uses24 = False 275 276 self.dateFormats['full'] = 'EEEE, d MMMM yyyy' 277 self.dateFormats['long'] = 'd MMMM yyyy' 278 self.dateFormats['medium'] = 'dd/MM/yyyy' 279 self.dateFormats['short'] = 'd/MM/yy' 280 281 self.timeFormats['long'] = self.timeFormats['full'] 282 283 self.dp_order = [ 'd', 'm', 'y' ]
284 285
286 -class pdtLocale_es(pdtLocale_base):
287 """ 288 es Locale 289 290 Note that I don't speak Spanish so many of the items below are still in English 291 """
292 - def __init__(self):
293 super( pdtLocale_es, self ).__init__() 294 295 self.localeID = 'es' # don't use a unicode string 296 self.dateSep = [ '/' ] 297 self.usesMeridian = False 298 self.uses24 = True 299 300 self.Weekdays = [ 'lunes', 'martes', 'mi\xe9rcoles', 301 'jueves', 'viernes', 's\xe1bado', 'domingo', 302 ] 303 self.shortWeekdays = [ 'lun', 'mar', 'mi\xe9', 304 'jue', 'vie', 's\xe1b', 'dom', 305 ] 306 self.Months = [ 'enero', 'febrero', 'marzo', 307 'abril', 'mayo', 'junio', 308 'julio', 'agosto', 'septiembre', 309 'octubre', 'noviembre', 'diciembre' 310 ] 311 self.shortMonths = [ 'ene', 'feb', 'mar', 312 'abr', 'may', 'jun', 313 'jul', 'ago', 'sep', 314 'oct', 'nov', 'dic' 315 ] 316 self.dateFormats['full'] = "EEEE d' de 'MMMM' de 'yyyy" 317 self.dateFormats['long'] = "d' de 'MMMM' de 'yyyy" 318 self.dateFormats['medium'] = "dd-MMM-yy" 319 self.dateFormats['short'] = "d/MM/yy" 320 321 self.timeFormats['full'] = "HH'H'mm' 'ss z" 322 self.timeFormats['long'] = "HH:mm:ss z" 323 self.timeFormats['medium'] = "HH:mm:ss" 324 self.timeFormats['short'] = "HH:mm" 325 326 self.dp_order = [ 'd', 'm', 'y' ]
327 328
329 -class pdtLocale_de(pdtLocale_base):
330 """ 331 de_DE Locale constants 332 333 Contributed by Debian parsedatetime package maintainer Bernd Zeimetz <bzed@debian.org> 334 """
335 - def __init__(self):
336 super( pdtLocale_de, self ).__init__() 337 338 self.localeID = 'de_DE' # don't use a unicode string 339 self.dateSep = [ '.' ] 340 self.timeSep = [ ':' ] 341 self.meridian = [ ] 342 self.usesMeridian = False 343 self.uses24 = True 344 345 self.Weekdays = [ 'montag', 'dienstag', 'mittwoch', 346 'donnerstag', 'freitag', 'samstag', 'sonntag', 347 ] 348 self.shortWeekdays = [ 'mo', 'di', 'mi', 349 'do', 'fr', 'sa', 'so', 350 ] 351 self.Months = [ 'januar', 'februar', 'm\xe4rz', 352 'april', 'mai', 'juni', 353 'juli', 'august', 'september', 354 'oktober', 'november', 'dezember', 355 ] 356 self.shortMonths = [ 'jan', 'feb', 'mrz', 357 'apr', 'mai', 'jun', 358 'jul', 'aug', 'sep', 359 'okt', 'nov', 'dez', 360 ] 361 self.dateFormats['full'] = 'EEEE, d. MMMM yyyy' 362 self.dateFormats['long'] = 'd. MMMM yyyy' 363 self.dateFormats['medium'] = 'dd.MM.yyyy' 364 self.dateFormats['short'] = 'dd.MM.yy' 365 366 self.timeFormats['full'] = 'HH:mm:ss v' 367 self.timeFormats['long'] = 'HH:mm:ss z' 368 self.timeFormats['medium'] = 'HH:mm:ss' 369 self.timeFormats['short'] = 'HH:mm' 370 371 self.dp_order = [ 'd', 'm', 'y' ] 372 373 self.units['seconds'] = [ 'sekunden', 'sek', 's' ] 374 self.units['minutes'] = [ 'minuten', 'min' , 'm' ] 375 self.units['hours'] = [ 'stunden', 'std', 'h' ] 376 self.units['days'] = [ 'tag', 'tage', 't' ] 377 self.units['weeks'] = [ 'wochen', 'w' ] 378 self.units['months'] = [ 'monat', 'monate' ] #the short version would be a capital M, 379 #as I understand it we can't distinguish 380 #between m for minutes and M for months. 381 self.units['years'] = [ 'jahr', 'jahre', 'j' ] 382 383 self.re_values['specials'] = 'am|dem|der|im|in|den|zum' 384 self.re_values['timeseperator'] = ':' 385 self.re_values['rangeseperator'] = '-' 386 self.re_values['daysuffix'] = '' 387 self.re_values['qunits'] = 'h|m|s|t|w|m|j' 388 self.re_values['now'] = [ 'jetzt' ] 389 390 # Used to adjust the returned date before/after the source 391 #still looking for insight on how to translate all of them to german. 392 self.Modifiers['from'] = 1 393 self.Modifiers['before'] = -1 394 self.Modifiers['after'] = 1 395 self.Modifiers['vergangener'] = -1 396 self.Modifiers['vorheriger'] = -1 397 self.Modifiers['prev'] = -1 398 self.Modifiers['letzter'] = -1 399 self.Modifiers['n\xe4chster'] = 1 400 self.Modifiers['dieser'] = 0 401 self.Modifiers['previous'] = -1 402 self.Modifiers['in a'] = 2 403 self.Modifiers['end of'] = 0 404 self.Modifiers['eod'] = 0 405 self.Modifiers['eo'] = 0 406 407 #morgen/abermorgen does not work, see http://code.google.com/p/parsedatetime/issues/detail?id=19 408 self.dayOffsets['morgen'] = 1 409 self.dayOffsets['heute'] = 0 410 self.dayOffsets['gestern'] = -1 411 self.dayOffsets['vorgestern'] = -2 412 self.dayOffsets['\xfcbermorgen'] = 2 413 414 # special day and/or times, i.e. lunch, noon, evening 415 # each element in the dictionary is a dictionary that is used 416 # to fill in any value to be replace - the current date/time will 417 # already have been populated by the method buildSources 418 self.re_sources['mittag'] = { 'hr': 12, 'mn': 0, 'sec': 0 } 419 self.re_sources['mittags'] = { 'hr': 12, 'mn': 0, 'sec': 0 } 420 self.re_sources['mittagessen'] = { 'hr': 12, 'mn': 0, 'sec': 0 } 421 self.re_sources['morgen'] = { 'hr': 6, 'mn': 0, 'sec': 0 } 422 self.re_sources['morgens'] = { 'hr': 6, 'mn': 0, 'sec': 0 } 423 self.re_sources['fr\e4hst\xe4ck'] = { 'hr': 8, 'mn': 0, 'sec': 0 } 424 self.re_sources['abendessen'] = { 'hr': 19, 'mn': 0, 'sec': 0 } 425 self.re_sources['abend'] = { 'hr': 18, 'mn': 0, 'sec': 0 } 426 self.re_sources['abends'] = { 'hr': 18, 'mn': 0, 'sec': 0 } 427 self.re_sources['mitternacht'] = { 'hr': 0, 'mn': 0, 'sec': 0 } 428 self.re_sources['nacht'] = { 'hr': 21, 'mn': 0, 'sec': 0 } 429 self.re_sources['nachts'] = { 'hr': 21, 'mn': 0, 'sec': 0 } 430 self.re_sources['heute abend'] = { 'hr': 21, 'mn': 0, 'sec': 0 } 431 self.re_sources['heute nacht'] = { 'hr': 21, 'mn': 0, 'sec': 0 } 432 self.re_sources['feierabend'] = { 'hr': 17, 'mn': 0, 'sec': 0 }
433