Package parsedatetime
[hide private]
[frames] | no frames]

Source Code for Package parsedatetime

   1  #!/usr/bin/env python 
   2  # -*- coding: utf-8 -*- 
   3  # 
   4  # vim: sw=2 ts=2 sts=2 
   5  # 
   6  # Copyright 2004-2014 Mike Taylor 
   7  # 
   8  # Licensed under the Apache License, Version 2.0 (the "License"); 
   9  # you may not use this file except in compliance with the License. 
  10  # You may obtain a copy of the License at 
  11  # 
  12  #     http://www.apache.org/licenses/LICENSE-2.0 
  13  # 
  14  # Unless required by applicable law or agreed to in writing, software 
  15  # distributed under the License is distributed on an "AS IS" BASIS, 
  16  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
  17  # See the License for the specific language governing permissions and 
  18  # limitations under the License. 
  19   
  20  """parsedatetime 
  21   
  22  Parse human-readable date/time text. 
  23   
  24  Requires Python 2.6 or later 
  25  """ 
  26   
  27  __author__       = 'Mike Taylor (bear@bear.im)' 
  28  __copyright__    = 'Copyright (c) 2004 Mike Taylor' 
  29  __license__      = 'Apache v2.0' 
  30  __version__      = '1.5' 
  31  __contributors__ = [ 'Darshana Chhajed', 
  32                       'Michael Lim (lim.ck.michael@gmail.com)', 
  33                       'Bernd Zeimetz (bzed@debian.org)', 
  34                     ] 
  35   
  36  import re 
  37  import time 
  38  import datetime 
  39  import calendar 
  40  import logging 
  41  import email.utils 
  42   
  43  try: 
  44      from itertools import imap 
  45  except ImportError: 
  46      imap = map 
  47  from itertools import chain 
  48   
  49  from . import pdt_locales 
  50   
  51  # as a library, do *not* setup logging 
  52  # see http://docs.python.org/2/howto/logging.html#configuring-logging-for-a-library 
  53  # Set default logging handler to avoid "No handler found" warnings. 
  54  import logging 
  55   
  56  try:  # Python 2.7+ 
  57      from logging import NullHandler 
  58  except ImportError: 
59 - class NullHandler(logging.Handler):
60 - def emit(self, record):
61 pass
62 63 log = logging.getLogger(__name__) 64 log.addHandler(NullHandler()) 65 66 pdtLocales = { 'icu': pdt_locales.pdtLocale_icu, 67 'en_US': pdt_locales.pdtLocale_en, 68 'en_AU': pdt_locales.pdtLocale_au, 69 'es_ES': pdt_locales.pdtLocale_es, 70 'de_DE': pdt_locales.pdtLocale_de, 71 } 72 73 # Copied from feedparser.py 74 # Universal Feedparser 75 # Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved. 76 # Originally a def inside of _parse_date_w3dtf()
77 -def _extract_date(m):
78 year = int(m.group('year')) 79 if year < 100: 80 year = 100 * int(time.gmtime()[0] / 100) + int(year) 81 if year < 1000: 82 return 0, 0, 0 83 julian = m.group('julian') 84 if julian: 85 julian = int(julian) 86 month = julian / 30 + 1 87 day = julian % 30 + 1 88 jday = None 89 while jday != julian: 90 t = time.mktime((year, month, day, 0, 0, 0, 0, 0, 0)) 91 jday = time.gmtime(t)[-2] 92 diff = abs(jday - julian) 93 if jday > julian: 94 if diff < day: 95 day = day - diff 96 else: 97 month = month - 1 98 day = 31 99 elif jday < julian: 100 if day + diff < 28: 101 day = day + diff 102 else: 103 month = month + 1 104 return year, month, day 105 month = m.group('month') 106 day = 1 107 if month is None: 108 month = 1 109 else: 110 month = int(month) 111 day = m.group('day') 112 if day: 113 day = int(day) 114 else: 115 day = 1 116 return year, month, day
117 118 # Copied from feedparser.py 119 # Universal Feedparser 120 # Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved. 121 # Originally a def inside of _parse_date_w3dtf()
122 -def _extract_time(m):
123 if not m: 124 return 0, 0, 0 125 hours = m.group('hours') 126 if not hours: 127 return 0, 0, 0 128 hours = int(hours) 129 minutes = int(m.group('minutes')) 130 seconds = m.group('seconds') 131 if seconds: 132 seconds = int(seconds) 133 else: 134 seconds = 0 135 return hours, minutes, seconds
136 137 138 # Copied from feedparser.py 139 # Universal Feedparser 140 # Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved. 141 # Modified to return a tuple instead of mktime 142 # 143 # Original comment: 144 # W3DTF-style date parsing adapted from PyXML xml.utils.iso8601, written by 145 # Drake and licensed under the Python license. Removed all range checking 146 # for month, day, hour, minute, and second, since mktime will normalize 147 # these later
148 -def _parse_date_w3dtf(dateString):
149 # the __extract_date and __extract_time methods were 150 # copied-out so they could be used by my code --bear 151 def __extract_tzd(m): 152 '''Return the Time Zone Designator as an offset in seconds from UTC.''' 153 if not m: 154 return 0 155 tzd = m.group('tzd') 156 if not tzd: 157 return 0 158 if tzd == 'Z': 159 return 0 160 hours = int(m.group('tzdhours')) 161 minutes = m.group('tzdminutes') 162 if minutes: 163 minutes = int(minutes) 164 else: 165 minutes = 0 166 offset = (hours*60 + minutes) * 60 167 if tzd[0] == '+': 168 return -offset 169 return offset
170 171 __date_re = ('(?P<year>\d\d\d\d)' 172 '(?:(?P<dsep>-|)' 173 '(?:(?P<julian>\d\d\d)' 174 '|(?P<month>\d\d)(?:(?P=dsep)(?P<day>\d\d))?))?') 175 __tzd_re = '(?P<tzd>[-+](?P<tzdhours>\d\d)(?::?(?P<tzdminutes>\d\d))|Z)' 176 __tzd_rx = re.compile(__tzd_re) 177 __time_re = ('(?P<hours>\d\d)(?P<tsep>:|)(?P<minutes>\d\d)' 178 '(?:(?P=tsep)(?P<seconds>\d\d(?:[.,]\d+)?))?' 179 + __tzd_re) 180 __datetime_re = '%s(?:T%s)?' % (__date_re, __time_re) 181 __datetime_rx = re.compile(__datetime_re) 182 m = __datetime_rx.match(dateString) 183 if (m is None) or (m.group() != dateString): return 184 return _extract_date(m) + _extract_time(m) + (0, 0, 0) 185 186 187 _monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 188 'aug', 'sep', 'oct', 'nov', 'dec', 189 'january', 'february', 'march', 'april', 'may', 'june', 'july', 190 'august', 'september', 'october', 'november', 'december'] 191 _daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'] 192 193 # Copied from feedparser.py 194 # Universal Feedparser 195 # Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved. 196 # Modified to return a tuple instead of mktime 197 #
198 -def _parse_date_rfc822(dateString):
199 '''Parse an RFC822, RFC1123, RFC2822, or asctime-style date''' 200 data = dateString.split() 201 if data[0][-1] in (',', '.') or data[0].lower() in _daynames: 202 del data[0] 203 if len(data) == 4: 204 s = data[3] 205 i = s.find('+') 206 if i > 0: 207 data[3:] = [s[:i], s[i+1:]] 208 else: 209 data.append('') 210 dateString = " ".join(data) 211 if len(data) < 5: 212 dateString += ' 00:00:00 GMT' 213 return email.utils.parsedate_tz(dateString)
214 215 # # rfc822.py defines several time zones, but we define some extra ones. 216 # # 'ET' is equivalent to 'EST', etc. 217 # _additional_timezones = {'AT': -400, 'ET': -500, 218 # 'CT': -600, 'MT': -700, 219 # 'PT': -800} 220 # email.utils._timezones.update(_additional_timezones) 221 222
223 -class Calendar:
224 """ 225 A collection of routines to input, parse and manipulate date and times. 226 The text can either be 'normal' date values or it can be human readable. 227 """ 228
229 - def __init__(self, constants=None):
230 """ 231 Default constructor for the L{Calendar} class. 232 233 @type constants: object 234 @param constants: Instance of the class L{Constants} 235 236 @rtype: object 237 @return: L{Calendar} instance 238 """ 239 # if a constants reference is not included, use default 240 if constants is None: 241 self.ptc = Constants() 242 else: 243 self.ptc = constants 244 245 self.weekdyFlag = False # monday/tuesday/... 246 self.dateStdFlag = False # 07/21/06 247 self.dateStrFlag = False # July 21st, 2006 248 self.timeStdFlag = False # 5:50 249 self.meridianFlag = False # am/pm 250 self.dayStrFlag = False # tomorrow/yesterday/today/.. 251 self.timeStrFlag = False # lunch/noon/breakfast/... 252 self.modifierFlag = False # after/before/prev/next/.. 253 self.modifier2Flag = False # after/before/prev/next/.. 254 self.unitsFlag = False # hrs/weeks/yrs/min/.. 255 self.qunitsFlag = False # h/m/t/d.. 256 257 self.timeFlag = 0 258 self.dateFlag = 0
259 260
261 - def _convertUnitAsWords(self, unitText):
262 """ 263 Converts text units into their number value 264 265 Five = 5 266 Twenty Five = 25 267 Two hundred twenty five = 225 268 Two thousand and twenty five = 2025 269 Two thousand twenty five = 2025 270 271 @type unitText: string 272 @param unitText: number text to convert 273 274 @rtype: integer 275 @return: numerical value of unitText 276 """ 277 # TODO: implement this 278 pass
279 280
281 - def _buildTime(self, source, quantity, modifier, units):
282 """ 283 Take C{quantity}, C{modifier} and C{unit} strings and convert them into values. 284 After converting, calcuate the time and return the adjusted sourceTime. 285 286 @type source: time 287 @param source: time to use as the base (or source) 288 @type quantity: string 289 @param quantity: quantity string 290 @type modifier: string 291 @param modifier: how quantity and units modify the source time 292 @type units: string 293 @param units: unit of the quantity (i.e. hours, days, months, etc) 294 295 @rtype: struct_time 296 @return: C{struct_time} of the calculated time 297 """ 298 log.debug('_buildTime: [%s][%s][%s]' % (quantity, modifier, units)) 299 300 if source is None: 301 source = time.localtime() 302 303 if quantity is None: 304 quantity = '' 305 else: 306 quantity = quantity.strip() 307 308 qty = self._quantityToInt(quantity) 309 310 if modifier in self.ptc.Modifiers: 311 qty = qty * self.ptc.Modifiers[modifier] 312 313 if units is None or units == '': 314 units = 'dy' 315 316 # plurals are handled by regex's (could be a bug tho) 317 318 (yr, mth, dy, hr, mn, sec, _, _, _) = source 319 320 start = datetime.datetime(yr, mth, dy, hr, mn, sec) 321 target = start 322 #realunit = next((key for key, values in self.ptc.units.items() if any(imap(units.__contains__, values))), None) 323 realunit = units 324 for key, values in self.ptc.units.items(): 325 if units in values: 326 realunit = key 327 break 328 329 log.debug('units %s --> realunit %s' % (units, realunit)) 330 331 if realunit == 'years': 332 target = self.inc(start, year=qty) 333 self.dateFlag = 1 334 elif realunit == 'months': 335 target = self.inc(start, month=qty) 336 self.dateFlag = 1 337 else: 338 if realunit == 'days': 339 target = start + datetime.timedelta(days=qty) 340 self.dateFlag = 1 341 elif realunit == 'hours': 342 target = start + datetime.timedelta(hours=qty) 343 self.timeFlag = 2 344 elif realunit == 'minutes': 345 target = start + datetime.timedelta(minutes=qty) 346 self.timeFlag = 2 347 elif realunit == 'seconds': 348 target = start + datetime.timedelta(seconds=qty) 349 self.timeFlag = 2 350 elif realunit == 'weeks': 351 target = start + datetime.timedelta(weeks=qty) 352 self.dateFlag = 1 353 354 return target.timetuple()
355 356
357 - def parseDate(self, dateString, sourceTime=None):
358 """ 359 Parse short-form date strings:: 360 361 '05/28/2006' or '04.21' 362 363 @type dateString: string 364 @param dateString: text to convert to a C{datetime} 365 366 @rtype: struct_time 367 @return: calculated C{struct_time} value of dateString 368 """ 369 if sourceTime is None: 370 yr, mth, dy, hr, mn, sec, wd, yd, isdst = time.localtime() 371 else: 372 yr, mth, dy, hr, mn, sec, wd, yd, isdst = sourceTime 373 374 # values pulled from regex's will be stored here and later 375 # assigned to mth, dy, yr based on information from the locale 376 # -1 is used as the marker value because we want zero values 377 # to be passed thru so they can be flagged as errors later 378 v1 = -1 379 v2 = -1 380 v3 = -1 381 382 s = dateString 383 m = self.ptc.CRE_DATE2.search(s) 384 if m is not None: 385 index = m.start() 386 v1 = int(s[:index]) 387 s = s[index + 1:] 388 389 m = self.ptc.CRE_DATE2.search(s) 390 if m is not None: 391 index = m.start() 392 v2 = int(s[:index]) 393 v3 = int(s[index + 1:]) 394 else: 395 v2 = int(s.strip()) 396 397 v = [ v1, v2, v3 ] 398 d = { 'm': mth, 'd': dy, 'y': yr } 399 400 for i in range(0, 3): 401 n = v[i] 402 c = self.ptc.dp_order[i] 403 if n >= 0: 404 d[c] = n 405 406 # if the year is not specified and the date has already 407 # passed, increment the year 408 if v3 == -1 and ((mth > d['m']) or (mth == d['m'] and dy > d['d'])): 409 yr = d['y'] + 1 410 else: 411 yr = d['y'] 412 413 mth = d['m'] 414 dy = d['d'] 415 416 # birthday epoch constraint 417 if yr < self.ptc.BirthdayEpoch: 418 yr += 2000 419 elif yr < 100: 420 yr += 1900 421 422 log.debug('parseDate: %s %s %s %s' % (yr, mth, dy, self.ptc.daysInMonth(mth, yr))) 423 424 if (mth > 0 and mth <= 12) and \ 425 (dy > 0 and dy <= self.ptc.daysInMonth(mth, yr)): 426 sourceTime = (yr, mth, dy, hr, mn, sec, wd, yd, isdst) 427 else: 428 self.dateFlag = 0 429 self.timeFlag = 0 430 sourceTime = time.localtime() # return current time if date 431 # string is invalid 432 433 return sourceTime
434 435
436 - def parseDateText(self, dateString, sourceTime=None):
437 """ 438 Parse long-form date strings:: 439 440 'May 31st, 2006' 441 'Jan 1st' 442 'July 2006' 443 444 @type dateString: string 445 @param dateString: text to convert to a datetime 446 447 @rtype: struct_time 448 @return: calculated C{struct_time} value of dateString 449 """ 450 if sourceTime is None: 451 yr, mth, dy, hr, mn, sec, wd, yd, isdst = time.localtime() 452 else: 453 yr, mth, dy, hr, mn, sec, wd, yd, isdst = sourceTime 454 455 currentMth = mth 456 currentDy = dy 457 458 s = dateString.lower() 459 m = self.ptc.CRE_DATE3.search(s) 460 mth = m.group('mthname') 461 mth = self.ptc.MonthOffsets[mth] 462 463 if m.group('day') != None: 464 dy = int(m.group('day')) 465 else: 466 dy = 1 467 468 if m.group('year') != None: 469 yr = int(m.group('year')) 470 471 # birthday epoch constraint 472 if yr < self.ptc.BirthdayEpoch: 473 yr += 2000 474 elif yr < 100: 475 yr += 1900 476 477 elif (mth < currentMth) or (mth == currentMth and dy < currentDy): 478 # if that day and month have already passed in this year, 479 # then increment the year by 1 480 yr += self.ptc.YearParseStyle 481 482 if dy > 0 and dy <= self.ptc.daysInMonth(mth, yr): 483 sourceTime = (yr, mth, dy, hr, mn, sec, wd, yd, isdst) 484 else: 485 # Return current time if date string is invalid 486 self.dateFlag = 0 487 self.timeFlag = 0 488 sourceTime = time.localtime() 489 490 return sourceTime
491 492
493 - def evalRanges(self, datetimeString, sourceTime=None):
494 """ 495 Evaluate the C{datetimeString} text and determine if 496 it represents a date or time range. 497 498 @type datetimeString: string 499 @param datetimeString: datetime text to evaluate 500 @type sourceTime: struct_time 501 @param sourceTime: C{struct_time} value to use as the base 502 503 @rtype: tuple 504 @return: tuple of: start datetime, end datetime and the invalid flag 505 """ 506 startTime = '' 507 endTime = '' 508 startDate = '' 509 endDate = '' 510 rangeFlag = 0 511 512 s = datetimeString.strip().lower() 513 514 if self.ptc.rangeSep in s: 515 s = s.replace(self.ptc.rangeSep, ' %s ' % self.ptc.rangeSep) 516 s = s.replace(' ', ' ') 517 518 m = self.ptc.CRE_TIMERNG1.search(s) 519 if m is not None: 520 rangeFlag = 1 521 else: 522 m = self.ptc.CRE_TIMERNG2.search(s) 523 if m is not None: 524 rangeFlag = 2 525 else: 526 m = self.ptc.CRE_TIMERNG4.search(s) 527 if m is not None: 528 rangeFlag = 7 529 else: 530 m = self.ptc.CRE_TIMERNG3.search(s) 531 if m is not None: 532 rangeFlag = 3 533 else: 534 m = self.ptc.CRE_DATERNG1.search(s) 535 if m is not None: 536 rangeFlag = 4 537 else: 538 m = self.ptc.CRE_DATERNG2.search(s) 539 if m is not None: 540 rangeFlag = 5 541 else: 542 m = self.ptc.CRE_DATERNG3.search(s) 543 if m is not None: 544 rangeFlag = 6 545 546 log.debug('evalRanges: rangeFlag = %s [%s]' % (rangeFlag, s)) 547 548 if m is not None: 549 if (m.group() != s): 550 # capture remaining string 551 parseStr = m.group() 552 chunk1 = s[:m.start()] 553 chunk2 = s[m.end():] 554 s = '%s %s' % (chunk1, chunk2) 555 flag = 1 556 557 sourceTime, flag = self.parse(s, sourceTime) 558 559 if flag == 0: 560 sourceTime = None 561 else: 562 parseStr = s 563 564 if rangeFlag == 1: 565 m = re.search(self.ptc.rangeSep, parseStr) 566 startTime, sflag = self.parse((parseStr[:m.start()]), sourceTime) 567 endTime, eflag = self.parse((parseStr[(m.start() + 1):]), sourceTime) 568 569 if (eflag != 0) and (sflag != 0): 570 return (startTime, endTime, 2) 571 572 elif rangeFlag == 2: 573 m = re.search(self.ptc.rangeSep, parseStr) 574 startTime, sflag = self.parse((parseStr[:m.start()]), sourceTime) 575 endTime, eflag = self.parse((parseStr[(m.start() + 1):]), sourceTime) 576 577 if (eflag != 0) and (sflag != 0): 578 return (startTime, endTime, 2) 579 580 elif rangeFlag == 3 or rangeFlag == 7: 581 m = re.search(self.ptc.rangeSep, parseStr) 582 # capturing the meridian from the end time 583 if self.ptc.usesMeridian: 584 ampm = re.search(self.ptc.am[0], parseStr) 585 586 # appending the meridian to the start time 587 if ampm is not None: 588 startTime, sflag = self.parse((parseStr[:m.start()] + self.ptc.meridian[0]), sourceTime) 589 else: 590 startTime, sflag = self.parse((parseStr[:m.start()] + self.ptc.meridian[1]), sourceTime) 591 else: 592 startTime, sflag = self.parse((parseStr[:m.start()]), sourceTime) 593 594 endTime, eflag = self.parse(parseStr[(m.start() + 1):], sourceTime) 595 596 if (eflag != 0) and (sflag != 0): 597 return (startTime, endTime, 2) 598 599 elif rangeFlag == 4: 600 m = re.search(self.ptc.rangeSep, parseStr) 601 startDate, sflag = self.parse((parseStr[:m.start()]), sourceTime) 602 endDate, eflag = self.parse((parseStr[(m.start() + 1):]), sourceTime) 603 604 if (eflag != 0) and (sflag != 0): 605 return (startDate, endDate, 1) 606 607 elif rangeFlag == 5: 608 m = re.search(self.ptc.rangeSep, parseStr) 609 endDate = parseStr[(m.start() + 1):] 610 611 # capturing the year from the end date 612 date = self.ptc.CRE_DATE3.search(endDate) 613 endYear = date.group('year') 614 615 # appending the year to the start date if the start date 616 # does not have year information and the end date does. 617 # eg : "Aug 21 - Sep 4, 2007" 618 if endYear is not None: 619 startDate = (parseStr[:m.start()]).strip() 620 date = self.ptc.CRE_DATE3.search(startDate) 621 startYear = date.group('year') 622 623 if startYear is None: 624 startDate = startDate + ', ' + endYear 625 else: 626 startDate = parseStr[:m.start()] 627 628 startDate, sflag = self.parse(startDate, sourceTime) 629 endDate, eflag = self.parse(endDate, sourceTime) 630 631 if (eflag != 0) and (sflag != 0): 632 return (startDate, endDate, 1) 633 634 elif rangeFlag == 6: 635 m = re.search(self.ptc.rangeSep, parseStr) 636 637 startDate = parseStr[:m.start()] 638 639 # capturing the month from the start date 640 mth = self.ptc.CRE_DATE3.search(startDate) 641 mth = mth.group('mthname') 642 643 # appending the month name to the end date 644 endDate = mth + parseStr[(m.start() + 1):] 645 646 startDate, sflag = self.parse(startDate, sourceTime) 647 endDate, eflag = self.parse(endDate, sourceTime) 648 649 if (eflag != 0) and (sflag != 0): 650 return (startDate, endDate, 1) 651 else: 652 # if range is not found 653 sourceTime = time.localtime() 654 655 return (sourceTime, sourceTime, 0)
656 657
658 - def _CalculateDOWDelta(self, wd, wkdy, offset, style, currentDayStyle):
659 """ 660 Based on the C{style} and C{currentDayStyle} determine what 661 day-of-week value is to be returned. 662 663 @type wd: integer 664 @param wd: day-of-week value for the current day 665 @type wkdy: integer 666 @param wkdy: day-of-week value for the parsed day 667 @type offset: integer 668 @param offset: offset direction for any modifiers (-1, 0, 1) 669 @type style: integer 670 @param style: normally the value set in C{Constants.DOWParseStyle} 671 @type currentDayStyle: integer 672 @param currentDayStyle: normally the value set in C{Constants.CurrentDOWParseStyle} 673 674 @rtype: integer 675 @return: calculated day-of-week 676 """ 677 if offset == 1: 678 # modifier is indicating future week eg: "next". 679 # DOW is calculated as DOW of next week 680 diff = 7 - wd + wkdy 681 682 elif offset == -1: 683 # modifier is indicating past week eg: "last","previous" 684 # DOW is calculated as DOW of previous week 685 diff = wkdy - wd - 7 686 687 elif offset == 0: 688 # modifier is indiacting current week eg: "this" 689 # DOW is calculated as DOW of this week 690 diff = wkdy - wd 691 692 elif offset == 2: 693 # no modifier is present. 694 # i.e. string to be parsed is just DOW 695 if style == 1: 696 # next occurance of the DOW is calculated 697 if currentDayStyle == True: 698 if wkdy >= wd: 699 diff = wkdy - wd 700 else: 701 diff = 7 - wd + wkdy 702 else: 703 if wkdy > wd: 704 diff = wkdy - wd 705 else: 706 diff = 7 - wd + wkdy 707 708 elif style == -1: 709 # last occurance of the DOW is calculated 710 if currentDayStyle == True: 711 if wkdy <= wd: 712 diff = wkdy - wd 713 else: 714 diff = wkdy - wd - 7 715 else: 716 if wkdy < wd: 717 diff = wkdy - wd 718 else: 719 diff = wkdy - wd - 7 720 else: 721 # occurance of the DOW in the current week is calculated 722 diff = wkdy - wd 723 724 log.debug("wd %s, wkdy %s, offset %d, style %d" % (wd, wkdy, offset, style)) 725 726 return diff
727
728 - def _quantityToInt(self, quantity):
729 """ 730 Convert a quantity, either spelled-out or numeric, to an integer 731 732 @type quantity: string 733 @param quantity: quantity to parse to int 734 @rtype: int 735 @return: the quantity as an integer, defaulting to 0 736 """ 737 if(len(quantity) == 0): 738 return 1 739 740 try: 741 return int(quantity) 742 except ValueError: 743 pass 744 745 try: 746 return self.ptc.numbers[quantity] 747 except KeyError: 748 pass 749 750 return 0
751
752 - def _evalModifier(self, modifier, chunk1, chunk2, sourceTime):
753 """ 754 Evaluate the C{modifier} string and following text (passed in 755 as C{chunk1} and C{chunk2}) and if they match any known modifiers 756 calculate the delta and apply it to C{sourceTime}. 757 758 @type modifier: string 759 @param modifier: modifier text to apply to sourceTime 760 @type chunk1: string 761 @param chunk1: first text chunk that followed modifier (if any) 762 @type chunk2: string 763 @param chunk2: second text chunk that followed modifier (if any) 764 @type sourceTime: struct_time 765 @param sourceTime: C{struct_time} value to use as the base 766 767 @rtype: tuple 768 @return: tuple of: remaining text and the modified sourceTime 769 """ 770 771 offset = self.ptc.Modifiers[modifier] 772 773 if sourceTime is not None: 774 (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = sourceTime 775 else: 776 (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = time.localtime() 777 778 # capture the units after the modifier and the remaining 779 # string after the unit 780 m = self.ptc.CRE_REMAINING.search(chunk2) 781 if m is not None: 782 index = m.start() + 1 783 unit = chunk2[:m.start()] 784 chunk2 = chunk2[index:] 785 else: 786 unit = chunk2 787 chunk2 = '' 788 789 flag = False 790 791 log.debug("modifier [%s] chunk1 [%s] chunk2 [%s] unit [%s] flag %s" % (modifier, chunk1, chunk2, unit, flag)) 792 793 if unit == 'month' or \ 794 unit == 'mth' or \ 795 unit == 'm': 796 if offset == 0: 797 dy = self.ptc.daysInMonth(mth, yr) 798 sourceTime = (yr, mth, dy, 9, 0, 0, wd, yd, isdst) 799 elif offset == 2: 800 # if day is the last day of the month, calculate the last day 801 # of the next month 802 if dy == self.ptc.daysInMonth(mth, yr): 803 dy = self.ptc.daysInMonth(mth + 1, yr) 804 805 start = datetime.datetime(yr, mth, dy, 9, 0, 0) 806 target = self.inc(start, month=1) 807 sourceTime = target.timetuple() 808 else: 809 start = datetime.datetime(yr, mth, 1, 9, 0, 0) 810 target = self.inc(start, month=offset) 811 sourceTime = target.timetuple() 812 813 flag = True 814 self.dateFlag = 1 815 816 if unit == 'week' or \ 817 unit == 'wk' or \ 818 unit == 'w': 819 if offset == 0: 820 start = datetime.datetime(yr, mth, dy, 17, 0, 0) 821 target = start + datetime.timedelta(days=(4 - wd)) 822 sourceTime = target.timetuple() 823 elif offset == 2: 824 start = datetime.datetime(yr, mth, dy, 9, 0, 0) 825 target = start + datetime.timedelta(days=7) 826 sourceTime = target.timetuple() 827 else: 828 return self._evalModifier(modifier, chunk1, "monday " + chunk2, sourceTime) 829 830 flag = True 831 self.dateFlag = 1 832 833 if unit == 'day' or \ 834 unit == 'dy' or \ 835 unit == 'd': 836 if offset == 0: 837 sourceTime = (yr, mth, dy, 17, 0, 0, wd, yd, isdst) 838 self.timeFlag = 2 839 elif offset == 2: 840 start = datetime.datetime(yr, mth, dy, hr, mn, sec) 841 target = start + datetime.timedelta(days=1) 842 sourceTime = target.timetuple() 843 else: 844 start = datetime.datetime(yr, mth, dy, 9, 0, 0) 845 target = start + datetime.timedelta(days=offset) 846 sourceTime = target.timetuple() 847 848 flag = True 849 self.dateFlag = 1 850 851 if unit == 'hour' or \ 852 unit == 'hr': 853 if offset == 0: 854 sourceTime = (yr, mth, dy, hr, 0, 0, wd, yd, isdst) 855 else: 856 start = datetime.datetime(yr, mth, dy, hr, 0, 0) 857 target = start + datetime.timedelta(hours=offset) 858 sourceTime = target.timetuple() 859 860 flag = True 861 self.timeFlag = 2 862 863 if unit == 'year' or \ 864 unit == 'yr' or \ 865 unit == 'y': 866 if offset == 0: 867 sourceTime = (yr, 12, 31, hr, mn, sec, wd, yd, isdst) 868 elif offset == 2: 869 sourceTime = (yr + 1, mth, dy, hr, mn, sec, wd, yd, isdst) 870 else: 871 sourceTime = (yr + offset, 1, 1, 9, 0, 0, wd, yd, isdst) 872 873 flag = True 874 self.dateFlag = 1 875 876 if not flag: 877 if modifier == 'eom': 878 self.modifierFlag = False 879 dy = self.ptc.daysInMonth(mth, yr) 880 sourceTime = (yr, mth, dy, 9, 0, 0, wd, yd, isdst) 881 self.dateFlag = 2 882 flag = True 883 elif modifier == 'eoy': 884 self.modifierFlag = False 885 mth = 12 886 dy = self.ptc.daysInMonth(mth, yr) 887 sourceTime = (yr, mth, dy, 9, 0, 0, wd, yd, isdst) 888 self.dateFlag = 2 889 flag = True 890 891 if not flag: 892 m = self.ptc.CRE_WEEKDAY.match(unit) 893 if m is not None: 894 log.debug('CRE_WEEKDAY matched') 895 wkdy = m.group() 896 self.dateFlag = 1 897 898 if modifier == 'eod': 899 # Calculate the upcoming weekday 900 self.modifierFlag = False 901 (sourceTime, _) = self.parse(wkdy, sourceTime) 902 sources = self.ptc.buildSources(sourceTime) 903 self.timeFlag = 2 904 905 if modifier in sources: 906 sourceTime = sources[modifier] 907 else: 908 wkdy = self.ptc.WeekdayOffsets[wkdy] 909 diff = self._CalculateDOWDelta(wd, wkdy, offset, 910 self.ptc.DOWParseStyle, 911 self.ptc.CurrentDOWParseStyle) 912 start = datetime.datetime(yr, mth, dy, 9, 0, 0) 913 target = start + datetime.timedelta(days=diff) 914 sourceTime = target.timetuple() 915 916 flag = True 917 self.dateFlag = 1 918 919 if not flag: 920 m = self.ptc.CRE_TIME.match(unit) 921 if m is not None or unit in self.ptc.re_values['now']: 922 log.debug('CRE_TIME matched') 923 self.modifierFlag = False 924 (yr, mth, dy, hr, mn, sec, wd, yd, isdst), _ = self.parse(unit) 925 926 start = datetime.datetime(yr, mth, dy, hr, mn, sec) 927 target = start + datetime.timedelta(days=offset) 928 sourceTime = target.timetuple() 929 flag = True 930 else: 931 self.modifierFlag = False 932 933 log.debug('check for modifications to source time') 934 935 # check if the remaining text is parsable and if so, 936 # use it as the base time for the modifier source time 937 t, flag2 = self.parse('%s %s' % (chunk1, unit), sourceTime) 938 939 log.debug('flag2 = %s t = %s' % (flag2, t)) 940 941 if flag2 != 0: 942 sourceTime = t 943 944 sources = self.ptc.buildSources(sourceTime) 945 946 log.debug('looking for %s in %s' % (modifier, sources)) 947 948 if modifier in sources: 949 log.debug('modifier found in sources') 950 sourceTime = sources[modifier] 951 flag = True 952 self.timeFlag = 2 953 954 # if the word after next is a number, the string is more than likely 955 # to be "next 4 hrs" which we will have to combine the units with the 956 # rest of the string 957 if not flag: 958 if offset < 0: 959 # if offset is negative, the unit has to be made negative 960 unit = '-%s' % unit 961 962 chunk2 = '%s %s' % (unit, chunk2) 963 964 self.modifierFlag = False 965 966 log.debug('returning chunk = "%s" and sourceTime = %s' % (chunk2, sourceTime)) 967 968 #return '%s %s' % (chunk1, chunk2), sourceTime 969 return '%s' % chunk2, sourceTime
970
971 - def _evalModifier2(self, modifier, chunk1 , chunk2, sourceTime):
972 """ 973 Evaluate the C{modifier} string and following text (passed in 974 as C{chunk1} and C{chunk2}) and if they match any known modifiers 975 calculate the delta and apply it to C{sourceTime}. 976 977 @type modifier: string 978 @param modifier: modifier text to apply to C{sourceTime} 979 @type chunk1: string 980 @param chunk1: first text chunk that followed modifier (if any) 981 @type chunk2: string 982 @param chunk2: second text chunk that followed modifier (if any) 983 @type sourceTime: struct_time 984 @param sourceTime: C{struct_time} value to use as the base 985 986 @rtype: tuple 987 @return: tuple of: remaining text and the modified sourceTime 988 """ 989 990 offset = self.ptc.Modifiers[modifier] 991 992 self.modifier2Flag = False 993 log.debug("modifier2 [%s] chunk1 [%s] chunk2 [%s] sourceTime %s" % (modifier, chunk1, chunk2, sourceTime)) 994 995 # If the string after the negative modifier starts with digits, 996 # then it is likely that the string is similar to ' before 3 days' 997 # or 'evening prior to 3 days'. 998 # In this case, the total time is calculated by subtracting '3 days' 999 # from the current date. 1000 # So, we have to identify the quantity and negate it before parsing 1001 # the string. 1002 # This is not required for strings not starting with digits since the 1003 # string is enough to calculate the sourceTime 1004 if chunk2 != '': 1005 1006 currDOWParseStyle = self.ptc.DOWParseStyle 1007 if offset < 0: 1008 m = self.ptc.CRE_NUMBER.match(chunk2.strip()) 1009 if m is not None: 1010 qty = self._quantityToInt(m.group()) * -1 1011 chunk2 = chunk2[m.end():] 1012 chunk2 = '%d%s' % (qty, chunk2) 1013 else: 1014 # enforce selection of the previous period 1015 # driven by DOWParseStyle and CurrentDOWParseStyle 1016 # FIXME: this is not threadsafe! 1017 self.ptc.DOWParseStyle = -1 1018 1019 sourceTime, flag1 = self.parse(chunk2, sourceTime) 1020 # restore DOWParseStyle setting 1021 self.DOWParseStyle = currDOWParseStyle 1022 if flag1 == 0: 1023 flag1 = True 1024 else: 1025 flag1 = False 1026 flag2 = False 1027 else: 1028 flag1 = False 1029 1030 if chunk1 != '': 1031 if offset < 0: 1032 m = self.ptc.CRE_NUMBER.search(chunk1.strip()) 1033 if m is not None: 1034 qty = self._quantityToInt(m.group()) * -1 1035 chunk1 = chunk1[m.end():] 1036 chunk1 = '%d%s' % (qty, chunk1) 1037 1038 tempDateFlag = self.dateFlag 1039 tempTimeFlag = self.timeFlag 1040 sourceTime2, flag2 = self.parse(chunk1, sourceTime) 1041 else: 1042 return sourceTime, (flag1 and flag2) 1043 1044 # if chunk1 is not a datetime and chunk2 is then do not use datetime 1045 # value returned by parsing chunk1 1046 if not (flag1 == False and flag2 == 0): 1047 sourceTime = sourceTime2 1048 else: 1049 self.timeFlag = tempTimeFlag 1050 self.dateFlag = tempDateFlag 1051 1052 return sourceTime, (flag1 and flag2)
1053 1054
1055 - def _evalString(self, datetimeString, sourceTime=None):
1056 """ 1057 Calculate the datetime based on flags set by the L{parse()} routine 1058 1059 Examples handled:: 1060 RFC822, W3CDTF formatted dates 1061 HH:MM[:SS][ am/pm] 1062 MM/DD/YYYY 1063 DD MMMM YYYY 1064 1065 @type datetimeString: string 1066 @param datetimeString: text to try and parse as more "traditional" 1067 date/time text 1068 @type sourceTime: struct_time 1069 @param sourceTime: C{struct_time} value to use as the base 1070 1071 @rtype: datetime 1072 @return: calculated C{struct_time} value or current C{struct_time} 1073 if not parsed 1074 """ 1075 s = datetimeString.strip() 1076 now = time.localtime() 1077 1078 log.debug('_evalString(%s, %s)' % (datetimeString, sourceTime)) 1079 1080 # Given string date is a RFC822 date 1081 if sourceTime is None: 1082 sourceTime = _parse_date_rfc822(s) 1083 log.debug('attempt to parse as rfc822 - %s' % str(sourceTime)) 1084 1085 if sourceTime is not None: 1086 (yr, mth, dy, hr, mn, sec, wd, yd, isdst, _) = sourceTime 1087 self.dateFlag = 1 1088 1089 if (hr != 0) and (mn != 0) and (sec != 0): 1090 self.timeFlag = 2 1091 1092 sourceTime = (yr, mth, dy, hr, mn, sec, wd, yd, isdst) 1093 1094 # Given string date is a W3CDTF date 1095 if sourceTime is None: 1096 sourceTime = _parse_date_w3dtf(s) 1097 1098 if sourceTime is not None: 1099 self.dateFlag = 1 1100 self.timeFlag = 2 1101 1102 if sourceTime is None: 1103 s = s.lower() 1104 1105 # Given string is in the format HH:MM(:SS)(am/pm) 1106 if self.meridianFlag: 1107 if sourceTime is None: 1108 (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = now 1109 else: 1110 (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = sourceTime 1111 1112 m = self.ptc.CRE_TIMEHMS2.search(s) 1113 if m is not None: 1114 dt = s[:m.start('meridian')].strip() 1115 if len(dt) <= 2: 1116 hr = int(dt) 1117 mn = 0 1118 sec = 0 1119 else: 1120 hr, mn, sec = _extract_time(m) 1121 1122 if hr == 24: 1123 hr = 0 1124 1125 sourceTime = (yr, mth, dy, hr, mn, sec, wd, yd, isdst) 1126 meridian = m.group('meridian').lower() 1127 1128 # if 'am' found and hour is 12 - force hour to 0 (midnight) 1129 if (meridian in self.ptc.am) and hr == 12: 1130 sourceTime = (yr, mth, dy, 0, mn, sec, wd, yd, isdst) 1131 1132 # if 'pm' found and hour < 12, add 12 to shift to evening 1133 if (meridian in self.ptc.pm) and hr < 12: 1134 sourceTime = (yr, mth, dy, hr + 12, mn, sec, wd, yd, isdst) 1135 1136 # invalid time 1137 if hr > 24 or mn > 59 or sec > 59: 1138 sourceTime = now 1139 self.dateFlag = 0 1140 self.timeFlag = 0 1141 1142 self.meridianFlag = False 1143 1144 # Given string is in the format HH:MM(:SS) 1145 if self.timeStdFlag: 1146 if sourceTime is None: 1147 (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = now 1148 else: 1149 (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = sourceTime 1150 1151 m = self.ptc.CRE_TIMEHMS.search(s) 1152 if m is not None: 1153 hr, mn, sec = _extract_time(m) 1154 if hr == 24: 1155 hr = 0 1156 1157 if hr > 24 or mn > 59 or sec > 59: 1158 # invalid time 1159 sourceTime = now 1160 self.dateFlag = 0 1161 self.timeFlag = 0 1162 else: 1163 sourceTime = (yr, mth, dy, hr, mn, sec, wd, yd, isdst) 1164 1165 self.timeStdFlag = False 1166 1167 # Given string is in the format 07/21/2006 1168 if self.dateStdFlag: 1169 sourceTime = self.parseDate(s, sourceTime) 1170 self.dateStdFlag = False 1171 1172 # Given string is in the format "May 23rd, 2005" 1173 if self.dateStrFlag: 1174 log.debug('checking for MMM DD YYYY') 1175 sourceTime = self.parseDateText(s, sourceTime) 1176 log.debug('parseDateText(%s) returned %s' % (s, sourceTime)) 1177 self.dateStrFlag = False 1178 1179 # Given string is a weekday 1180 if self.weekdyFlag: 1181 if sourceTime is None: 1182 (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = now 1183 else: 1184 (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = sourceTime 1185 1186 start = datetime.datetime(yr, mth, dy, hr, mn, sec) 1187 wkdy = self.ptc.WeekdayOffsets[s] 1188 1189 if wkdy > wd: 1190 qty = self._CalculateDOWDelta(wd, wkdy, 2, 1191 self.ptc.DOWParseStyle, 1192 self.ptc.CurrentDOWParseStyle) 1193 else: 1194 qty = self._CalculateDOWDelta(wd, wkdy, 2, 1195 self.ptc.DOWParseStyle, 1196 self.ptc.CurrentDOWParseStyle) 1197 1198 target = start + datetime.timedelta(days=qty) 1199 wd = wkdy 1200 1201 sourceTime = target.timetuple() 1202 self.weekdyFlag = False 1203 1204 # Given string is a natural language time string like 1205 # lunch, midnight, etc 1206 if self.timeStrFlag: 1207 if s in self.ptc.re_values['now']: 1208 sourceTime = now 1209 else: 1210 sources = self.ptc.buildSources(sourceTime) 1211 1212 if s in sources: 1213 sourceTime = sources[s] 1214 else: 1215 sourceTime = now 1216 self.dateFlag = 0 1217 self.timeFlag = 0 1218 1219 self.timeStrFlag = False 1220 1221 # Given string is a natural language date string like today, tomorrow.. 1222 if self.dayStrFlag: 1223 if sourceTime is None: 1224 sourceTime = now 1225 1226 (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = sourceTime 1227 1228 if s in self.ptc.dayOffsets: 1229 offset = self.ptc.dayOffsets[s] 1230 else: 1231 offset = 0 1232 1233 start = datetime.datetime(yr, mth, dy, 9, 0, 0) 1234 target = start + datetime.timedelta(days=offset) 1235 sourceTime = target.timetuple() 1236 1237 self.dayStrFlag = False 1238 1239 # Given string is a time string with units like "5 hrs 30 min" 1240 if self.unitsFlag: 1241 modifier = '' # TODO 1242 1243 if sourceTime is None: 1244 sourceTime = now 1245 1246 m = self.ptc.CRE_UNITS.search(s) 1247 if m is not None: 1248 units = m.group('units') 1249 quantity = s[:m.start('units')] 1250 1251 sourceTime = self._buildTime(sourceTime, quantity, modifier, units) 1252 self.unitsFlag = False 1253 1254 # Given string is a time string with single char units like "5 h 30 m" 1255 if self.qunitsFlag: 1256 modifier = '' # TODO 1257 1258 if sourceTime is None: 1259 sourceTime = now 1260 1261 m = self.ptc.CRE_QUNITS.search(s) 1262 if m is not None: 1263 units = m.group('qunits') 1264 quantity = s[:m.start('qunits')] 1265 1266 sourceTime = self._buildTime(sourceTime, quantity, modifier, units) 1267 self.qunitsFlag = False 1268 1269 # Given string does not match anything 1270 if sourceTime is None: 1271 sourceTime = now 1272 self.dateFlag = 0 1273 self.timeFlag = 0 1274 1275 return sourceTime
1276
1277 - def _UnitsTrapped(self, s, m, key):
1278 # check if a day suffix got trapped by a unit match 1279 # for example Dec 31st would match for 31s (aka 31 seconds) 1280 # Dec 31st 1281 # ^ ^ 1282 # | +-- m.start('units') 1283 # | and also m2.start('suffix') 1284 # +---- m.start('qty') 1285 # and also m2.start('day') 1286 m2 = self.ptc.CRE_DAY2.search(s) 1287 if m2 is not None: 1288 t = '%s%s' % (m2.group('day'), m.group(key)) 1289 if ((m.start(key) == m2.start('suffix')) and 1290 (m.start('qty') == m2.start('day')) and 1291 (m.group('qty') == t)): 1292 return True 1293 else: 1294 return False 1295 else: 1296 return False
1297
1298 - def parseDT(self, datetimeString, sourceTime=None, tzinfo=None):
1299 """ 1300 C{datetimeString} is as C{.parse}, C{sourceTime} has the same semantic 1301 meaning as C{.parse}, but now also accepts datetime objects. C{tzinfo} 1302 accepts a tzinfo object. It is advisable to use pytz. 1303 1304 1305 @type datetimeString: string 1306 @param datetimeString: date/time text to evaluate 1307 @type sourceTime: struct_time, datetime, date, time 1308 @param sourceTime: time value to use as the base 1309 @type tzinfo: tzinfo 1310 @param tzinfo: Timezone to apply to generated datetime objs. 1311 1312 @rtype: tuple 1313 @return: tuple of datetime object and an int of the return code 1314 1315 see .parse for return code details. 1316 """ 1317 # if sourceTime has a timetuple method, use thet, else, just pass the 1318 # entire thing to parse and prey the user knows what the hell they are 1319 # doing. 1320 sourceTime = getattr(sourceTime, 'timetuple', (lambda: sourceTime))() 1321 # You REALLY SHOULD be using pytz. Using localize if available, 1322 # hacking if not. Note, None is a valid tzinfo object in the case of 1323 # the ugly hack. 1324 localize = getattr( 1325 tzinfo, 1326 'localize', 1327 (lambda dt: dt.replace(tzinfo=tzinfo)), # ugly hack is ugly :( 1328 ) 1329 1330 # Punt 1331 time_struct, ret_code = self.parse( 1332 datetimeString, 1333 sourceTime=sourceTime 1334 ) 1335 1336 # Comments from GHI indicate that it is desired to have the same return 1337 # signature on this method as that one it punts to, with the exception 1338 # of using datetime objects instead of time_structs. 1339 dt = localize(datetime.datetime(*time_struct[:6])) 1340 return (dt, ret_code)
1341
1342 - def parse(self, datetimeString, sourceTime=None):
1343 """ 1344 Splits the given C{datetimeString} into tokens, finds the regex 1345 patterns that match and then calculates a C{struct_time} value from 1346 the chunks. 1347 1348 If C{sourceTime} is given then the C{struct_time} value will be 1349 calculated from that value, otherwise from the current date/time. 1350 1351 If the C{datetimeString} is parsed and date/time value found then 1352 the second item of the returned tuple will be a flag to let you know 1353 what kind of C{struct_time} value is being returned:: 1354 1355 0 = not parsed at all 1356 1 = parsed as a C{date} 1357 2 = parsed as a C{time} 1358 3 = parsed as a C{datetime} 1359 1360 @type datetimeString: string 1361 @param datetimeString: date/time text to evaluate 1362 @type sourceTime: struct_time 1363 @param sourceTime: C{struct_time} value to use as the base 1364 1365 @rtype: tuple 1366 @return: tuple of: modified C{sourceTime} and the result flag 1367 """ 1368 1369 datetimeString = re.sub(r'(\w)(\.)(\s)', r'\1\3', datetimeString) 1370 datetimeString = re.sub(r'(\w)(\'|")(\s|$)', r'\1 \3', datetimeString) 1371 datetimeString = re.sub(r'(\s|^)(\'|")(\w)', r'\1 \3', datetimeString) 1372 1373 if sourceTime: 1374 if isinstance(sourceTime, datetime.datetime): 1375 log.debug('coercing datetime to timetuple') 1376 sourceTime = sourceTime.timetuple() 1377 else: 1378 if not isinstance(sourceTime, time.struct_time) and \ 1379 not isinstance(sourceTime, tuple): 1380 raise Exception('sourceTime is not a struct_time') 1381 1382 s = datetimeString.strip().lower() 1383 parseStr = '' 1384 totalTime = sourceTime 1385 1386 if s == '' : 1387 if sourceTime is not None: 1388 return (sourceTime, self.dateFlag + self.timeFlag) 1389 else: 1390 return (time.localtime(), 0) 1391 1392 self.timeFlag = 0 1393 self.dateFlag = 0 1394 1395 while len(s) > 0: 1396 flag = False 1397 chunk1 = '' 1398 chunk2 = '' 1399 1400 log.debug('parse (top of loop): [%s][%s]' % (s, parseStr)) 1401 1402 if parseStr == '': 1403 # Modifier like next\prev.. 1404 m = self.ptc.CRE_MODIFIER.search(s) 1405 if m is not None: 1406 self.modifierFlag = True 1407 if (m.group('modifier') != s): 1408 # capture remaining string 1409 parseStr = m.group('modifier') 1410 chunk1 = s[:m.start('modifier')].strip() 1411 chunk2 = s[m.end('modifier'):].strip() 1412 flag = True 1413 else: 1414 parseStr = s 1415 1416 if parseStr == '': 1417 # Modifier like from\after\prior.. 1418 m = self.ptc.CRE_MODIFIER2.search(s) 1419 if m is not None: 1420 self.modifier2Flag = True 1421 if (m.group('modifier') != s): 1422 # capture remaining string 1423 parseStr = m.group('modifier') 1424 chunk1 = s[:m.start('modifier')].strip() 1425 chunk2 = s[m.end('modifier'):].strip() 1426 flag = True 1427 else: 1428 parseStr = s 1429 1430 if parseStr == '': 1431 # Quantity + Units 1432 m = self.ptc.CRE_UNITS.search(s) 1433 if m is not None: 1434 log.debug('CRE_UNITS matched') 1435 if self._UnitsTrapped(s, m, 'units'): 1436 log.debug('day suffix trapped by unit match') 1437 else: 1438 self.unitsFlag = True 1439 if (m.group('qty') != s): 1440 # capture remaining string 1441 parseStr = m.group('qty') 1442 chunk1 = s[:m.start('qty')].strip() 1443 chunk2 = s[m.end('qty'):].strip() 1444 1445 if chunk1[-1:] == '-': 1446 parseStr = '-%s' % parseStr 1447 chunk1 = chunk1[:-1] 1448 1449 s = '%s %s' % (chunk1, chunk2) 1450 flag = True 1451 else: 1452 parseStr = s 1453 1454 if parseStr == '': 1455 # Quantity + Units 1456 m = self.ptc.CRE_QUNITS.search(s) 1457 if m is not None: 1458 log.debug('CRE_QUNITS matched') 1459 if self._UnitsTrapped(s, m, 'qunits'): 1460 log.debug('day suffix trapped by qunit match') 1461 else: 1462 self.qunitsFlag = True 1463 1464 if (m.group('qty') != s): 1465 # capture remaining string 1466 parseStr = m.group('qty') 1467 chunk1 = s[:m.start('qty')].strip() 1468 chunk2 = s[m.end('qty'):].strip() 1469 1470 if chunk1[-1:] == '-': 1471 parseStr = '-%s' % parseStr 1472 chunk1 = chunk1[:-1] 1473 1474 s = '%s %s' % (chunk1, chunk2) 1475 flag = True 1476 else: 1477 parseStr = s 1478 1479 if parseStr == '': 1480 valid_date = False 1481 for match in self.ptc.CRE_DATE3.finditer(s): 1482 # to prevent "HH:MM(:SS) time strings" expressions from triggering 1483 # this regex, we checks if the month field exists in the searched 1484 # expression, if it doesn't exist, the date field is not valid 1485 if match.group('mthname'): 1486 m = self.ptc.CRE_DATE3.search(s, match.start()) 1487 valid_date = True 1488 break 1489 1490 # String date format 1491 if valid_date: 1492 self.dateStrFlag = True 1493 self.dateFlag = 1 1494 if (m.group('date') != s): 1495 # capture remaining string 1496 parseStr = m.group('date') 1497 chunk1 = s[:m.start('date')] 1498 chunk2 = s[m.end('date'):] 1499 s = '%s %s' % (chunk1, chunk2) 1500 flag = True 1501 else: 1502 parseStr = s 1503 1504 if parseStr == '': 1505 # Standard date format 1506 m = self.ptc.CRE_DATE.search(s) 1507 if m is not None: 1508 self.dateStdFlag = True 1509 self.dateFlag = 1 1510 if (m.group('date') != s): 1511 # capture remaining string 1512 parseStr = m.group('date') 1513 chunk1 = s[:m.start('date')] 1514 chunk2 = s[m.end('date'):] 1515 s = '%s %s' % (chunk1, chunk2) 1516 flag = True 1517 else: 1518 parseStr = s 1519 1520 if parseStr == '': 1521 # Natural language day strings 1522 m = self.ptc.CRE_DAY.search(s) 1523 if m is not None: 1524 self.dayStrFlag = True 1525 self.dateFlag = 1 1526 if (m.group('day') != s): 1527 # capture remaining string 1528 parseStr = m.group('day') 1529 chunk1 = s[:m.start('day')] 1530 chunk2 = s[m.end('day'):] 1531 s = '%s %s' % (chunk1, chunk2) 1532 flag = True 1533 else: 1534 parseStr = s 1535 1536 if parseStr == '': 1537 # Weekday 1538 m = self.ptc.CRE_WEEKDAY.search(s) 1539 if m is not None: 1540 gv = m.group('weekday') 1541 if s not in self.ptc.dayOffsets: 1542 self.weekdyFlag = True 1543 self.dateFlag = 1 1544 if (gv != s): 1545 # capture remaining string 1546 parseStr = gv 1547 chunk1 = s[:m.start('weekday')] 1548 chunk2 = s[m.end('weekday'):] 1549 s = '%s %s' % (chunk1, chunk2) 1550 flag = True 1551 else: 1552 parseStr = s 1553 1554 if parseStr == '': 1555 # Natural language time strings 1556 m = self.ptc.CRE_TIME.search(s) 1557 if m is not None or s in self.ptc.re_values['now']: 1558 self.timeStrFlag = True 1559 self.timeFlag = 2 1560 if (m and m.group('time') != s): 1561 # capture remaining string 1562 parseStr = m.group('time') 1563 chunk1 = s[:m.start('time')] 1564 chunk2 = s[m.end('time'):] 1565 s = '%s %s' % (chunk1, chunk2) 1566 flag = True 1567 else: 1568 parseStr = s 1569 1570 if parseStr == '': 1571 # HH:MM(:SS) am/pm time strings 1572 m = self.ptc.CRE_TIMEHMS2.search(s) 1573 if m is not None: 1574 self.meridianFlag = True 1575 self.timeFlag = 2 1576 if m.group('minutes') is not None: 1577 if m.group('seconds') is not None: 1578 parseStr = '%s:%s:%s %s' % (m.group('hours'), 1579 m.group('minutes'), 1580 m.group('seconds'), 1581 m.group('meridian')) 1582 else: 1583 parseStr = '%s:%s %s' % (m.group('hours'), 1584 m.group('minutes'), 1585 m.group('meridian')) 1586 else: 1587 parseStr = '%s %s' % (m.group('hours'), 1588 m.group('meridian')) 1589 1590 chunk1 = s[:m.start('hours')] 1591 chunk2 = s[m.end('meridian'):] 1592 1593 s = '%s %s' % (chunk1, chunk2) 1594 flag = True 1595 1596 if parseStr == '': 1597 # HH:MM(:SS) time strings 1598 m = self.ptc.CRE_TIMEHMS.search(s) 1599 if m is not None: 1600 self.timeStdFlag = True 1601 self.timeFlag = 2 1602 if m.group('seconds') is not None: 1603 parseStr = '%s:%s:%s' % (m.group('hours'), 1604 m.group('minutes'), 1605 m.group('seconds')) 1606 chunk1 = s[:m.start('hours')] 1607 chunk2 = s[m.end('seconds'):] 1608 else: 1609 parseStr = '%s:%s' % (m.group('hours'), 1610 m.group('minutes')) 1611 chunk1 = s[:m.start('hours')] 1612 chunk2 = s[m.end('minutes'):] 1613 1614 s = '%s %s' % (chunk1, chunk2) 1615 flag = True 1616 1617 # if string does not match any regex, empty string to 1618 # come out of the while loop 1619 if not flag: 1620 s = '' 1621 1622 log.debug('parse (bottom) [%s][%s][%s][%s]' % (s, parseStr, chunk1, chunk2)) 1623 log.debug('weekday %s, dateStd %s, dateStr %s, time %s, timeStr %s, meridian %s' % \ 1624 (self.weekdyFlag, self.dateStdFlag, self.dateStrFlag, self.timeStdFlag, self.timeStrFlag, self.meridianFlag)) 1625 log.debug('dayStr %s, modifier %s, modifier2 %s, units %s, qunits %s' % \ 1626 (self.dayStrFlag, self.modifierFlag, self.modifier2Flag, self.unitsFlag, self.qunitsFlag)) 1627 1628 # evaluate the matched string 1629 1630 if parseStr != '': 1631 if self.modifierFlag == True: 1632 t, totalTime = self._evalModifier(parseStr, chunk1, chunk2, totalTime) 1633 # t is the unparsed part of the chunks. 1634 # If it is not date/time, return current 1635 # totalTime as it is; else return the output 1636 # after parsing t. 1637 if (t != '') and (t != None): 1638 tempDateFlag = self.dateFlag 1639 tempTimeFlag = self.timeFlag 1640 (totalTime2, flag) = self.parse(t, totalTime) 1641 1642 if flag == 0 and totalTime is not None: 1643 self.timeFlag = tempTimeFlag 1644 self.dateFlag = tempDateFlag 1645 1646 log.debug('return 1') 1647 return (totalTime, self.dateFlag + self.timeFlag) 1648 else: 1649 log.debug('return 2') 1650 return (totalTime2, self.dateFlag + self.timeFlag) 1651 1652 elif self.modifier2Flag == True: 1653 totalTime, invalidFlag = self._evalModifier2(parseStr, chunk1, chunk2, totalTime) 1654 if invalidFlag == True: 1655 self.dateFlag = 0 1656 self.timeFlag = 0 1657 1658 else: 1659 totalTime = self._evalString(parseStr, totalTime) 1660 parseStr = '' 1661 1662 # String is not parsed at all 1663 if totalTime is None or totalTime == sourceTime: 1664 totalTime = time.localtime() 1665 self.dateFlag = 0 1666 self.timeFlag = 0 1667 log.debug('return') 1668 return (totalTime, self.dateFlag + self.timeFlag)
1669 1670
1671 - def inc(self, source, month=None, year=None):
1672 """ 1673 Takes the given C{source} date, or current date if none is 1674 passed, and increments it according to the values passed in 1675 by month and/or year. 1676 1677 This routine is needed because Python's C{timedelta()} function 1678 does not allow for month or year increments. 1679 1680 @type source: struct_time 1681 @param source: C{struct_time} value to increment 1682 @type month: integer 1683 @param month: optional number of months to increment 1684 @type year: integer 1685 @param year: optional number of years to increment 1686 1687 @rtype: datetime 1688 @return: C{source} incremented by the number of months and/or years 1689 """ 1690 yr = source.year 1691 mth = source.month 1692 dy = source.day 1693 1694 if year: 1695 try: 1696 yi = int(year) 1697 except ValueError: 1698 yi = 0 1699 1700 yr += yi 1701 1702 if month: 1703 try: 1704 mi = int(month) 1705 except ValueError: 1706 mi = 0 1707 1708 m = abs(mi) 1709 y = m // 12 # how many years are in month increment 1710 m = m % 12 # get remaining months 1711 1712 if mi < 0: 1713 y *= -1 # otherwise negative mi will give future dates 1714 mth = mth - m # sub months from start month 1715 if mth < 1: # cross start-of-year? 1716 y -= 1 # yes - decrement year 1717 mth += 12 # and fix month 1718 else: 1719 mth = mth + m # add months to start month 1720 if mth > 12: # cross end-of-year? 1721 y += 1 # yes - increment year 1722 mth -= 12 # and fix month 1723 1724 yr += y 1725 1726 # if the day ends up past the last day of 1727 # the new month, set it to the last day 1728 if dy > self.ptc.daysInMonth(mth, yr): 1729 dy = self.ptc.daysInMonth(mth, yr) 1730 1731 d = source.replace(year=yr, month=mth, day=dy) 1732 1733 return source + (d - source)
1734
1735 - def nlp(self, inputString, sourceTime=None):
1736 """Utilizes parse() after making judgements about what datetime information belongs together. 1737 1738 It makes logical groupings based on proximity and returns a parsed datetime for each matched grouping of 1739 datetime text, along with location info within the given inputString. 1740 1741 @type inputString: string 1742 @param inputString: natural language text to evaluate 1743 @type sourceTime: struct_time 1744 @param sourceTime: C{struct_time} value to use as the base 1745 1746 @rtype: tuple or None 1747 @return: tuple of tuples in the format (parsed_datetime as datetime.datetime, flags as int, start_pos as int, 1748 end_pos as int, matched_text as string) or None if there were no matches 1749 """ 1750 1751 orig_inputstring = inputString 1752 1753 # replace periods at the end of sentences w/ spaces 1754 # opposed to removing them altogether in order to 1755 # retain relative positions (identified by alpha, period, space). 1756 # this is required for some of the regex patterns to match 1757 inputString = re.sub(r'(\w)(\.)(\s)', r'\1 \3', inputString).lower() 1758 inputString = re.sub(r'(\w)(\'|")(\s|$)', r'\1 \3', inputString) 1759 inputString = re.sub(r'(\s|^)(\'|")(\w)', r'\1 \3', inputString) 1760 1761 startpos = 0 # the start position in the inputString during the loop 1762 1763 matches = [] # list of lists in format: [startpos, endpos, matchedstring, flags, type] 1764 1765 while startpos < len(inputString): 1766 1767 # empty match 1768 leftmost_match = [0, 0, None, 0, None] 1769 1770 # Modifier like next\prev.. 1771 m = self.ptc.CRE_MODIFIER.search(inputString[startpos:]) 1772 if m is not None: 1773 if leftmost_match[1] == 0 or leftmost_match[0] > m.start('modifier') + startpos: 1774 leftmost_match[0] = m.start('modifier') + startpos 1775 leftmost_match[1] = m.end('modifier') + startpos 1776 leftmost_match[2] = m.group('modifier') 1777 leftmost_match[3] = 0 1778 leftmost_match[4] = 'modifier' 1779 1780 # Modifier like from\after\prior.. 1781 m = self.ptc.CRE_MODIFIER2.search(inputString[startpos:]) 1782 if m is not None: 1783 if leftmost_match[1] == 0 or leftmost_match[0] > m.start('modifier') + startpos: 1784 leftmost_match[0] = m.start('modifier') + startpos 1785 leftmost_match[1] = m.end('modifier') + startpos 1786 leftmost_match[2] = m.group('modifier') 1787 leftmost_match[3] = 0 1788 leftmost_match[4] = 'modifier2' 1789 1790 # Quantity + Units 1791 m = self.ptc.CRE_UNITS.search(inputString[startpos:]) 1792 if m is not None: 1793 log.debug('CRE_UNITS matched') 1794 if self._UnitsTrapped(inputString[startpos:], m, 'units'): 1795 log.debug('day suffix trapped by unit match') 1796 else: 1797 1798 if leftmost_match[1] == 0 or leftmost_match[0] > m.start('qty') + startpos: 1799 leftmost_match[0] = m.start('qty') + startpos 1800 leftmost_match[1] = m.end('qty') + startpos 1801 leftmost_match[2] = m.group('qty') 1802 leftmost_match[3] = 3 1803 leftmost_match[4] = 'units' 1804 1805 if m.start('qty') > 0 and inputString[m.start('qty') - 1] == '-': 1806 leftmost_match[0] = leftmost_match[0] - 1 1807 leftmost_match[2] = '-' + leftmost_match[2] 1808 1809 # Quantity + Units 1810 m = self.ptc.CRE_QUNITS.search(inputString[startpos:]) 1811 if m is not None: 1812 log.debug('CRE_QUNITS matched') 1813 if self._UnitsTrapped(inputString[startpos:], m, 'qunits'): 1814 log.debug('day suffix trapped by qunit match') 1815 else: 1816 if leftmost_match[1] == 0 or leftmost_match[0] > m.start('qty') + startpos: 1817 leftmost_match[0] = m.start('qty') + startpos 1818 leftmost_match[1] = m.end('qty') + startpos 1819 leftmost_match[2] = m.group('qty') 1820 leftmost_match[3] = 3 1821 leftmost_match[4] = 'qunits' 1822 1823 if m.start('qty') > 0 and inputString[m.start('qty') - 1] == '-': 1824 leftmost_match[0] = leftmost_match[0] - 1 1825 leftmost_match[2] = '-' + leftmost_match[2] 1826 1827 valid_date = False 1828 for match in self.ptc.CRE_DATE3.finditer(inputString[startpos:]): 1829 # to prevent "HH:MM(:SS) time strings" expressions from triggering 1830 # this regex, we checks if the month field exists in the searched 1831 # expression, if it doesn't exist, the date field is not valid 1832 if match.group('mthname'): 1833 m = self.ptc.CRE_DATE3.search(inputString[startpos:], match.start()) 1834 valid_date = True 1835 break 1836 1837 # String date format 1838 if valid_date: 1839 if leftmost_match[1] == 0 or leftmost_match[0] > m.start('date') + startpos: 1840 leftmost_match[0] = m.start('date') + startpos 1841 leftmost_match[1] = m.end('date') + startpos 1842 leftmost_match[2] = m.group('date') 1843 leftmost_match[3] = 1 1844 leftmost_match[4] = 'dateStr' 1845 1846 # Standard date format 1847 m = self.ptc.CRE_DATE.search(inputString[startpos:]) 1848 if m is not None: 1849 if leftmost_match[1] == 0 or leftmost_match[0] > m.start('date') + startpos: 1850 leftmost_match[0] = m.start('date') + startpos 1851 leftmost_match[1] = m.end('date') + startpos 1852 leftmost_match[2] = m.group('date') 1853 leftmost_match[3] = 1 1854 leftmost_match[4] = 'dateStd' 1855 1856 # Natural language day strings 1857 m = self.ptc.CRE_DAY.search(inputString[startpos:]) 1858 if m is not None: 1859 if leftmost_match[1] == 0 or leftmost_match[0] > m.start('day') + startpos: 1860 leftmost_match[0] = m.start('day') + startpos 1861 leftmost_match[1] = m.end('day') + startpos 1862 leftmost_match[2] = m.group('day') 1863 leftmost_match[3] = 1 1864 leftmost_match[4] = 'dayStr' 1865 1866 # Weekday 1867 m = self.ptc.CRE_WEEKDAY.search(inputString[startpos:]) 1868 if m is not None: 1869 if inputString[startpos:] not in self.ptc.dayOffsets: 1870 if leftmost_match[1] == 0 or leftmost_match[0] > m.start('weekday') + startpos: 1871 leftmost_match[0] = m.start('weekday') + startpos 1872 leftmost_match[1] = m.end('weekday') + startpos 1873 leftmost_match[2] = m.group('weekday') 1874 leftmost_match[3] = 1 1875 leftmost_match[4] = 'weekdy' 1876 1877 # Natural language time strings 1878 m = self.ptc.CRE_TIME.search(inputString[startpos:]) 1879 if m is not None: 1880 if leftmost_match[1] == 0 or leftmost_match[0] > m.start('time') + startpos: 1881 leftmost_match[0] = m.start('time') + startpos 1882 leftmost_match[1] = m.end('time') + startpos 1883 leftmost_match[2] = m.group('time') 1884 leftmost_match[3] = 2 1885 leftmost_match[4] = 'timeStr' 1886 1887 # HH:MM(:SS) am/pm time strings 1888 m = self.ptc.CRE_TIMEHMS2.search(inputString[startpos:]) 1889 if m is not None: 1890 if leftmost_match[1] == 0 or leftmost_match[0] > m.start('hours') + startpos: 1891 leftmost_match[0] = m.start('hours') + startpos 1892 leftmost_match[1] = m.end('meridian') + startpos 1893 leftmost_match[2] = inputString[leftmost_match[0]:leftmost_match[1]] 1894 leftmost_match[3] = 2 1895 leftmost_match[4] = 'meridian' 1896 1897 # HH:MM(:SS) time strings 1898 m = self.ptc.CRE_TIMEHMS.search(inputString[startpos:]) 1899 if m is not None: 1900 if leftmost_match[1] == 0 or leftmost_match[0] > m.start('hours') + startpos: 1901 leftmost_match[0] = m.start('hours') + startpos 1902 if m.group('seconds') is not None: 1903 leftmost_match[1] = m.end('seconds') + startpos 1904 else: 1905 leftmost_match[1] = m.end('minutes') + startpos 1906 leftmost_match[2] = inputString[leftmost_match[0]:leftmost_match[1]] 1907 leftmost_match[3] = 2 1908 leftmost_match[4] = 'timeStd' 1909 1910 # set the start position to the end pos of the leftmost match 1911 startpos = leftmost_match[1] 1912 1913 # nothing was detected 1914 # so break out of the loop 1915 if startpos == 0: 1916 startpos = len(inputString) 1917 else: 1918 if leftmost_match[3] > 0: 1919 m = self.ptc.CRE_NLP_PREFIX.search(inputString[:leftmost_match[0]] + ' ' + str(leftmost_match[3])) 1920 if m is not None: 1921 leftmost_match[0] = m.start('nlp_prefix') 1922 leftmost_match[2] = inputString[leftmost_match[0]:leftmost_match[1]] 1923 matches.append(leftmost_match) 1924 1925 # find matches in proximity with one another and return all the parsed values 1926 proximity_matches = [] 1927 if len(matches) > 1: 1928 combined = '' 1929 from_match_index = 0 1930 modifier1 = matches[0][4] == 'modifier' 1931 modifier2 = matches[0][4] == 'modifier2' 1932 date = matches[0][3] == 1 1933 time = matches[0][3] == 2 1934 units = matches[0][3] == 3 1935 for i in range(1, len(matches)): 1936 1937 # test proximity (are there characters between matches?) 1938 endofprevious = matches[i - 1][1] 1939 begofcurrent = matches[i][0] 1940 if orig_inputstring[endofprevious:begofcurrent].lower().strip() != '': 1941 # this one isn't in proximity, but maybe 1942 # we have enough to make a datetime 1943 # todo: make sure the combination of formats (modifier, dateStd, etc) makes logical sense before parsing together 1944 if date or time: 1945 combined = orig_inputstring[matches[from_match_index][0]:matches[i - 1][1]] 1946 parsed_datetime, flags = self.parse(combined, sourceTime) 1947 proximity_matches.append((datetime.datetime(parsed_datetime[0], parsed_datetime[1], parsed_datetime[2], parsed_datetime[3], parsed_datetime[4], parsed_datetime[5]), flags, matches[from_match_index][0], matches[i - 1][1], combined)) 1948 #proximity_matches.append((parsed_datetime, flags, matches[from_match_index][0], matches[i - 1][1], combined)) 1949 # not in proximity, reset starting from current 1950 from_match_index = i 1951 modifier1 = matches[i][4] == 'modifier' 1952 modifier2 = matches[i][4] == 'modifier2' 1953 date = matches[i][3] == 1 1954 time = matches[i][3] == 2 1955 units = matches[i][3] == 3 1956 continue 1957 else: 1958 if matches[i][4] == 'modifier': 1959 modifier1 = True 1960 if matches[i][4] == 'modifier2': 1961 modifier2 = True 1962 if matches[i][3] == 1: 1963 date = True 1964 if matches[i][3] == 2: 1965 time = True 1966 if matches[i][3] == 3: 1967 units = True 1968 1969 # check last 1970 # we have enough to make a datetime 1971 if date or time or units: 1972 1973 combined = orig_inputstring[matches[from_match_index][0]:matches[len(matches) - 1][1]] 1974 parsed_datetime, flags = self.parse(combined, sourceTime) 1975 proximity_matches.append((datetime.datetime(parsed_datetime[0], parsed_datetime[1], parsed_datetime[2], parsed_datetime[3], parsed_datetime[4], parsed_datetime[5]), flags, matches[from_match_index][0], matches[len(matches) - 1][1], combined)) 1976 #proximity_matches.append((parsed_datetime, flags, matches[from_match_index][0], matches[len(matches) - 1][1], combined)) 1977 1978 elif len(matches) == 0: 1979 return None 1980 else: 1981 if matches[0][3] == 0: # not enough info to parse 1982 return None 1983 else: 1984 parsed_datetime, flags = self.parse(matches[0][2], sourceTime) 1985 proximity_matches.append((datetime.datetime(parsed_datetime[0], parsed_datetime[1], parsed_datetime[2], parsed_datetime[3], parsed_datetime[4], parsed_datetime[5]), flags, matches[0][0], matches[0][1], matches[0][2])) 1986 #proximity_matches.append((parsed_datetime, flags, matches[0][0], matches[0][1], matches[0][2])) 1987 1988 return tuple(proximity_matches)
1989 1990
1991 -def _initSymbols(ptc):
1992 """ 1993 Initialize symbols and single character constants. 1994 """ 1995 # build am and pm lists to contain 1996 # original case, lowercase, first-char and dotted 1997 # versions of the meridian text 1998 1999 if len(ptc.locale.meridian) > 0: 2000 am = ptc.locale.meridian[0] 2001 ptc.am = [ am ] 2002 2003 if len(am) > 0: 2004 ptc.am.append(am[0]) 2005 ptc.am.append('{0}.{1}.'.format(am[0], am[1])) 2006 am = am.lower() 2007 ptc.am.append(am) 2008 ptc.am.append(am[0]) 2009 ptc.am.append('{0}.{1}.'.format(am[0], am[1])) 2010 else: 2011 am = '' 2012 ptc.am = [ '', '' ] 2013 2014 if len(ptc.locale.meridian) > 1: 2015 pm = ptc.locale.meridian[1] 2016 ptc.pm = [ pm ] 2017 2018 if len(pm) > 0: 2019 ptc.pm.append(pm[0]) 2020 ptc.pm.append('{0}.{1}.'.format(pm[0], pm[1])) 2021 pm = pm.lower() 2022 ptc.pm.append(pm) 2023 ptc.pm.append(pm[0]) 2024 ptc.pm.append('{0}.{1}.'.format(pm[0], pm[1])) 2025 else: 2026 pm = '' 2027 ptc.pm = [ '', '' ]
2028 2029
2030 -class Constants(object):
2031 """ 2032 Default set of constants for parsedatetime. 2033 2034 If PyICU is present, then the class will first try to get PyICU 2035 to return a locale specified by C{localeID}. If either C{localeID} is 2036 None or if the locale does not exist within PyICU, then each of the 2037 locales defined in C{fallbackLocales} is tried in order. 2038 2039 If PyICU is not present or none of the specified locales can be used, 2040 then the class will initialize itself to the en_US locale. 2041 2042 if PyICU is not present or not requested, only the locales defined by 2043 C{pdtLocales} will be searched. 2044 """
2045 - def __init__(self, localeID=None, usePyICU=True, fallbackLocales=['en_US']):
2046 self.localeID = localeID 2047 self.fallbackLocales = fallbackLocales 2048 2049 if 'en_US' not in self.fallbackLocales: 2050 self.fallbackLocales.append('en_US') 2051 2052 # define non-locale specific constants 2053 2054 self.locale = None 2055 self.usePyICU = usePyICU 2056 2057 # starting cache of leap years 2058 # daysInMonth will add to this if during 2059 # runtime it gets a request for a year not found 2060 self._leapYears = [ 1904, 1908, 1912, 1916, 1920, 1924, 1928, 1932, 1936, 1940, 1944, 2061 1948, 1952, 1956, 1960, 1964, 1968, 1972, 1976, 1980, 1984, 1988, 2062 1992, 1996, 2000, 2004, 2008, 2012, 2016, 2020, 2024, 2028, 2032, 2063 2036, 2040, 2044, 2048, 2052, 2056, 2060, 2064, 2068, 2072, 2076, 2064 2080, 2084, 2088, 2092, 2096 ] 2065 2066 self.Second = 1 2067 self.Minute = 60 * self.Second 2068 self.Hour = 60 * self.Minute 2069 self.Day = 24 * self.Hour 2070 self.Week = 7 * self.Day 2071 self.Month = 30 * self.Day 2072 self.Year = 365 * self.Day 2073 2074 self._DaysInMonthList = (31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) 2075 self.rangeSep = '-' 2076 self.BirthdayEpoch = 50 2077 2078 # YearParseStyle controls how we parse "Jun 12", i.e. dates that do 2079 # not have a year present. The default is to compare the date given 2080 # to the current date, and if prior, then assume the next year. 2081 # Setting this to 0 will prevent that. 2082 2083 self.YearParseStyle = 1 2084 2085 # DOWParseStyle controls how we parse "Tuesday" 2086 # If the current day was Thursday and the text to parse is "Tuesday" 2087 # then the following table shows how each style would be returned 2088 # -1, 0, +1 2089 # 2090 # Current day marked as *** 2091 # 2092 # Sun Mon Tue Wed Thu Fri Sat 2093 # week -1 2094 # current -1,0 *** 2095 # week +1 +1 2096 # 2097 # If the current day was Monday and the text to parse is "Tuesday" 2098 # then the following table shows how each style would be returned 2099 # -1, 0, +1 2100 # 2101 # Sun Mon Tue Wed Thu Fri Sat 2102 # week -1 -1 2103 # current *** 0,+1 2104 # week +1 2105 2106 self.DOWParseStyle = 1 2107 2108 # CurrentDOWParseStyle controls how we parse "Friday" 2109 # If the current day was Friday and the text to parse is "Friday" 2110 # then the following table shows how each style would be returned 2111 # True/False. This also depends on DOWParseStyle. 2112 # 2113 # Current day marked as *** 2114 # 2115 # DOWParseStyle = 0 2116 # Sun Mon Tue Wed Thu Fri Sat 2117 # week -1 2118 # current T,F 2119 # week +1 2120 # 2121 # DOWParseStyle = -1 2122 # Sun Mon Tue Wed Thu Fri Sat 2123 # week -1 F 2124 # current T 2125 # week +1 2126 # 2127 # DOWParseStyle = +1 2128 # 2129 # Sun Mon Tue Wed Thu Fri Sat 2130 # week -1 2131 # current T 2132 # week +1 F 2133 2134 self.CurrentDOWParseStyle = False 2135 2136 if self.usePyICU: 2137 self.locale = pdtLocales['icu'](self.localeID) 2138 2139 if self.locale.icu is None: 2140 self.usePyICU = False 2141 self.locale = None 2142 2143 if self.locale is None: 2144 if not self.localeID in pdtLocales: 2145 for id in range(0, len(self.fallbackLocales)): 2146 self.localeID = self.fallbackLocales[id] 2147 if self.localeID in pdtLocales: 2148 break 2149 2150 self.locale = pdtLocales[self.localeID]() 2151 2152 if self.locale is not None: 2153 # escape any regex special characters that may be found 2154 wd = tuple(map(re.escape, self.locale.Weekdays)) 2155 swd = tuple(map(re.escape, self.locale.shortWeekdays)) 2156 mth = tuple(map(re.escape, self.locale.Months)) 2157 smth = tuple(map(re.escape, self.locale.shortMonths)) 2158 2159 self.locale.re_values['months'] = '%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s' % mth 2160 self.locale.re_values['shortmonths'] = '%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s' % smth 2161 self.locale.re_values['days'] = '%s|%s|%s|%s|%s|%s|%s' % wd 2162 self.locale.re_values['shortdays'] = '%s|%s|%s|%s|%s|%s|%s' % swd 2163 2164 self.locale.re_values['numbers'] = '|'.join(map(re.escape, self.locale.numbers)) 2165 2166 l = [] 2167 for s in self.locale.units: 2168 l = l + self.locale.units[s] 2169 self.locale.re_values['units'] = '|'.join(tuple(map(re.escape, l))) 2170 2171 l = [] 2172 lbefore = [] 2173 lafter = [] 2174 for s in self.locale.Modifiers: 2175 l.append(s) 2176 if self.locale.Modifiers[s] < 0: 2177 lbefore.append(s) 2178 elif self.locale.Modifiers[s] > 0: 2179 lafter.append(s) 2180 self.locale.re_values['modifiers'] = '|'.join(tuple(map(re.escape, l))) 2181 self.locale.re_values['modifiers-before'] = '|'.join(tuple(map(re.escape, lbefore))) 2182 self.locale.re_values['modifiers-after'] = '|'.join(tuple(map(re.escape, lafter))) 2183 2184 # todo: analyze all the modifiers to figure out which ones truly belong where. 2185 # while it is obvious looking at the code that _evalModifier2 should be 2186 # handling 'after', it remains to be researched which ones belong where 2187 # and how to make it locale-independent 2188 lmodifiers = [] 2189 lmodifiers2 = [] 2190 for s in self.locale.Modifiers: 2191 if self.locale.Modifiers[s] < 0 or s in ['after', 'from']: 2192 lmodifiers2.append(s) 2193 elif self.locale.Modifiers[s] > 0: 2194 lmodifiers.append(s) 2195 self.locale.re_values['modifiers-one'] = '|'.join(tuple(map(re.escape, lmodifiers))) 2196 self.locale.re_values['modifiers-two'] = '|'.join(tuple(map(re.escape, lmodifiers2))) 2197 2198 l = [] 2199 for s in self.locale.re_sources: 2200 l.append(s) 2201 self.locale.re_values['sources'] = '|'.join(tuple(map(re.escape, l))) 2202 2203 # build weekday offsets - yes, it assumes the Weekday and shortWeekday 2204 # lists are in the same order and Mon..Sun (Python style) 2205 o = 0 2206 for key in self.locale.Weekdays: 2207 self.locale.WeekdayOffsets[key] = o 2208 o += 1 2209 o = 0 2210 for key in self.locale.shortWeekdays: 2211 self.locale.WeekdayOffsets[key] = o 2212 o += 1 2213 2214 # build month offsets - yes, it assumes the Months and shortMonths 2215 # lists are in the same order and Jan..Dec 2216 o = 1 2217 for key in self.locale.Months: 2218 self.locale.MonthOffsets[key] = o 2219 o += 1 2220 o = 1 2221 for key in self.locale.shortMonths: 2222 self.locale.MonthOffsets[key] = o 2223 o += 1 2224 2225 # self.locale.DaySuffixes = self.locale.re_values['daysuffix'].split('|') 2226 2227 _initSymbols(self) 2228 2229 # TODO add code to parse the date formats and build the regexes up from sub-parts 2230 # TODO find all hard-coded uses of date/time seperators 2231 2232 # not being used in code, but kept in case others are manually utilizing this regex for their own purposes 2233 self.RE_DATE4 = r'''(?P<date>(((?P<day>\d\d?)(?P<suffix>%(daysuffix)s)?(,)?(\s)?) 2234 (?P<mthname>(%(months)s|%(shortmonths)s))\s? 2235 (?P<year>\d\d(\d\d)?)? 2236 ) 2237 )''' % self.locale.re_values 2238 2239 # I refactored DATE3 to fix Issue 16 http://code.google.com/p/parsedatetime/issues/detail?id=16 2240 # I suspect the final line was for a trailing time - but testing shows it's not needed 2241 # ptc.RE_DATE3 = r'''(?P<date>((?P<mthname>(%(months)s|%(shortmonths)s))\s? 2242 # ((?P<day>\d\d?)(\s?|%(daysuffix)s|$)+)? 2243 # (,\s?(?P<year>\d\d(\d\d)?))?)) 2244 # (\s?|$|[^0-9a-zA-Z])''' % ptc.locale.re_values 2245 # self.RE_DATE3 = r'''(?P<date>( 2246 # (((?P<mthname>(%(months)s|%(shortmonths)s))| 2247 # ((?P<day>\d\d?)(?P<suffix>%(daysuffix)s)?))(\s)?){1,2} 2248 # ((,)?(\s)?(?P<year>\d\d(\d\d)?))? 2249 # ) 2250 # )''' % self.locale.re_values 2251 2252 # still not completely sure of the behavior of the regex and 2253 # whether it would be best to consume all possible irrelevant characters 2254 # before the option groups (but within the {1,3} repetition group 2255 # or inside of each option group, as it currently does 2256 # however, right now, all tests are passing that were, 2257 # including fixing the bug of matching a 4-digit year as ddyy 2258 # when the day is absent from the string 2259 self.RE_DATE3 = r'''(?P<date> 2260 ( 2261 ( 2262 (^|\s) 2263 (?P<mthname>(%(months)s|%(shortmonths)s)(?![a-zA-Z_])) 2264 ){1} 2265 | 2266 ( 2267 (^|\s) 2268 (?P<day>([1-9]|[0-2][0-9]|3[0-1])(?!(\d|pm|am))) 2269 (?P<suffix>%(daysuffix)s)? 2270 ){1} 2271 | 2272 ( 2273 ,?\s? 2274 (?P<year>\d\d(\d\d)?) 2275 ){1} 2276 ){1,3} 2277 )''' % self.locale.re_values 2278 # not being used in code, but kept in case others are manually utilizing this regex for their own purposes 2279 self.RE_MONTH = r'''(\s|^) 2280 (?P<month>( 2281 (?P<mthname>(%(months)s|%(shortmonths)s)) 2282 (\s?(?P<year>(\d\d\d\d)))? 2283 )) 2284 (\s|$|[^0-9a-zA-Z])''' % self.locale.re_values 2285 self.RE_WEEKDAY = r'''(\s|^) 2286 (?P<weekday>(%(days)s|%(shortdays)s)) 2287 (\s|$|[^0-9a-zA-Z])''' % self.locale.re_values 2288 2289 self.RE_NUMBER = r'(%(numbers)s|\d+)' % self.locale.re_values 2290 2291 self.RE_SPECIAL = r'(?P<special>^[%(specials)s]+)\s+' % self.locale.re_values 2292 self.RE_UNITS = r'''(?P<qty>(-?(\b(%(numbers)s)\b|\d+)\s* 2293 (?P<units>((\b%(units)s)s?)) 2294 ))''' % self.locale.re_values 2295 self.RE_QUNITS = r'''(?P<qty>(-?(\b(%(numbers)s)\b|\d+)\s? 2296 (?P<qunits>\b%(qunits)s) 2297 (\s?|,|$) 2298 ))''' % self.locale.re_values 2299 # self.RE_MODIFIER = r'''(\s?|^) 2300 # (?P<modifier> 2301 # (previous|prev|last|next|eod|eo|(end\sof)|(in\sa)))''' % self.locale.re_values 2302 # self.RE_MODIFIER2 = r'''(\s?|^) 2303 # (?P<modifier> 2304 # (from|before|after|ago|prior)) 2305 # (\s?|$|[^0-9a-zA-Z])''' % self.locale.re_values 2306 self.RE_MODIFIER = r'''(\s|^) 2307 (?P<modifier> 2308 (%(modifiers-one)s))''' % self.locale.re_values 2309 self.RE_MODIFIER2 = r'''(\s|^) 2310 (?P<modifier> 2311 (%(modifiers-two)s)) 2312 (\s|$|[^0-9a-zA-Z])''' % self.locale.re_values 2313 self.RE_TIMEHMS = r'''(\s?|^) 2314 (?P<hours>\d\d?) 2315 (?P<tsep>%(timeseperator)s|) 2316 (?P<minutes>\d\d) 2317 (?:(?P=tsep)(?P<seconds>\d\d(?:[.,]\d+)?))?''' % self.locale.re_values 2318 self.RE_TIMEHMS2 = r'''(?P<hours>(\d\d?)) 2319 ((?P<tsep>%(timeseperator)s|) 2320 (?P<minutes>(\d\d?)) 2321 (?:(?P=tsep) 2322 (?P<seconds>\d\d? 2323 (?:[.,]\d+)?))?)?''' % self.locale.re_values 2324 self.RE_NLP_PREFIX = r'''(?P<nlp_prefix> 2325 (on)(\s)+1 2326 | 2327 (at|in)(\s)+2 2328 | 2329 (in)(\s)+3 2330 )''' 2331 2332 if 'meridian' in self.locale.re_values: 2333 self.RE_TIMEHMS2 += r'\s?(?P<meridian>(%(meridian)s))' % self.locale.re_values 2334 2335 dateSeps = ''.join(self.locale.dateSep) + '.' 2336 2337 self.RE_DATE = r'''(\s?|^) 2338 (?P<date>(\d\d?[%s]\d\d?([%s]\d\d(\d\d)?)?)) 2339 (\s?|$|[^0-9a-zA-Z])''' % (dateSeps, dateSeps) 2340 self.RE_DATE2 = r'[%s]' % dateSeps 2341 self.RE_DAY = r'''(\s|^) 2342 (?P<day>(today|tomorrow|yesterday)) 2343 (\s|$|[^0-9a-zA-Z])''' % self.locale.re_values 2344 self.RE_DAY2 = r'''(?P<day>\d\d?)(?P<suffix>%(daysuffix)s)? 2345 ''' % self.locale.re_values 2346 # self.RE_TIME = r'''(\s?|^) 2347 # (?P<time>(morning|breakfast|noon|lunch|evening|midnight|tonight|dinner|night|now)) 2348 # (\s?|$|[^0-9a-zA-Z])''' % self.locale.re_values 2349 self.RE_TIME = r'''(\s?|^) 2350 (?P<time>(%(sources)s)) 2351 (\s?|$|[^0-9a-zA-Z])''' % self.locale.re_values 2352 self.RE_REMAINING = r'\s+' 2353 2354 # Regex for date/time ranges 2355 self.RE_RTIMEHMS = r'''(\s?|^) 2356 (\d\d?)%(timeseperator)s 2357 (\d\d) 2358 (%(timeseperator)s(\d\d))? 2359 (\s?|$)''' % self.locale.re_values 2360 self.RE_RTIMEHMS2 = r'''(\s?|^) 2361 (\d\d?) 2362 (%(timeseperator)s(\d\d?))? 2363 (%(timeseperator)s(\d\d?))?''' % self.locale.re_values 2364 2365 if 'meridian' in self.locale.re_values: 2366 self.RE_RTIMEHMS2 += r'\s?(%(meridian)s)' % self.locale.re_values 2367 2368 self.RE_RDATE = r'(\d+([%s]\d+)+)' % dateSeps 2369 self.RE_RDATE3 = r'''((((%(months)s))\s? 2370 ((\d\d?) 2371 (\s?|%(daysuffix)s|$)+)? 2372 (,\s?\d\d\d\d)?))''' % self.locale.re_values 2373 2374 # "06/07/06 - 08/09/06" 2375 self.DATERNG1 = self.RE_RDATE + r'\s?%(rangeseperator)s\s?' + self.RE_RDATE 2376 self.DATERNG1 = self.DATERNG1 % self.locale.re_values 2377 2378 # "march 31 - june 1st, 2006" 2379 self.DATERNG2 = self.RE_RDATE3 + r'\s?%(rangeseperator)s\s?' + self.RE_RDATE3 2380 self.DATERNG2 = self.DATERNG2 % self.locale.re_values 2381 2382 # "march 1rd -13th" 2383 self.DATERNG3 = self.RE_RDATE3 + r'\s?%(rangeseperator)s\s?(\d\d?)\s?(rd|st|nd|th)?' 2384 self.DATERNG3 = self.DATERNG3 % self.locale.re_values 2385 2386 # "4:00:55 pm - 5:90:44 am", '4p-5p' 2387 self.TIMERNG1 = self.RE_RTIMEHMS2 + r'\s?%(rangeseperator)s\s?' + self.RE_RTIMEHMS2 2388 self.TIMERNG1 = self.TIMERNG1 % self.locale.re_values 2389 2390 # "4:00 - 5:90 ", "4:55:55-3:44:55" 2391 self.TIMERNG2 = self.RE_RTIMEHMS + r'\s?%(rangeseperator)s\s?' + self.RE_RTIMEHMS 2392 self.TIMERNG2 = self.TIMERNG2 % self.locale.re_values 2393 2394 # "4-5pm " 2395 self.TIMERNG3 = r'\d\d?\s?%(rangeseperator)s\s?' + self.RE_RTIMEHMS2 2396 self.TIMERNG3 = self.TIMERNG3 % self.locale.re_values 2397 2398 # "4:30-5pm " 2399 self.TIMERNG4 = self.RE_RTIMEHMS + r'\s?%(rangeseperator)s\s?' + self.RE_RTIMEHMS2 2400 self.TIMERNG4 = self.TIMERNG4 % self.locale.re_values 2401 2402 self.re_option = re.IGNORECASE + re.VERBOSE 2403 self.cre_source = { 'CRE_SPECIAL': self.RE_SPECIAL, 2404 'CRE_NUMBER': self.RE_NUMBER, 2405 'CRE_UNITS': self.RE_UNITS, 2406 'CRE_QUNITS': self.RE_QUNITS, 2407 'CRE_MODIFIER': self.RE_MODIFIER, 2408 'CRE_MODIFIER2': self.RE_MODIFIER2, 2409 'CRE_TIMEHMS': self.RE_TIMEHMS, 2410 'CRE_TIMEHMS2': self.RE_TIMEHMS2, 2411 'CRE_DATE': self.RE_DATE, 2412 'CRE_DATE2': self.RE_DATE2, 2413 'CRE_DATE3': self.RE_DATE3, 2414 'CRE_DATE4': self.RE_DATE4, 2415 'CRE_MONTH': self.RE_MONTH, 2416 'CRE_WEEKDAY': self.RE_WEEKDAY, 2417 'CRE_DAY': self.RE_DAY, 2418 'CRE_DAY2': self.RE_DAY2, 2419 'CRE_TIME': self.RE_TIME, 2420 'CRE_REMAINING': self.RE_REMAINING, 2421 'CRE_RTIMEHMS': self.RE_RTIMEHMS, 2422 'CRE_RTIMEHMS2': self.RE_RTIMEHMS2, 2423 'CRE_RDATE': self.RE_RDATE, 2424 'CRE_RDATE3': self.RE_RDATE3, 2425 'CRE_TIMERNG1': self.TIMERNG1, 2426 'CRE_TIMERNG2': self.TIMERNG2, 2427 'CRE_TIMERNG3': self.TIMERNG3, 2428 'CRE_TIMERNG4': self.TIMERNG4, 2429 'CRE_DATERNG1': self.DATERNG1, 2430 'CRE_DATERNG2': self.DATERNG2, 2431 'CRE_DATERNG3': self.DATERNG3, 2432 'CRE_NLP_PREFIX': self.RE_NLP_PREFIX, 2433 } 2434 self.cre_keys = list(self.cre_source.keys())
2435
2436 - def __getattr__(self, name):
2437 if name in self.cre_keys: 2438 value = re.compile(self.cre_source[name], self.re_option) 2439 setattr(self, name, value) 2440 return value 2441 elif name in self.locale.locale_keys: 2442 return getattr(self.locale, name) 2443 else: 2444 raise AttributeError(name)
2445
2446 - def daysInMonth(self, month, year):
2447 """ 2448 Take the given month (1-12) and a given year (4 digit) return 2449 the number of days in the month adjusting for leap year as needed 2450 """ 2451 result = None 2452 log.debug('daysInMonth(%s, %s)' % (month, year)) 2453 if month > 0 and month <= 12: 2454 result = self._DaysInMonthList[month - 1] 2455 2456 if month == 2: 2457 if year in self._leapYears: 2458 result += 1 2459 else: 2460 if calendar.isleap(year): 2461 self._leapYears.append(year) 2462 result += 1 2463 2464 return result
2465
2466 - def buildSources(self, sourceTime=None):
2467 """ 2468 Return a dictionary of date/time tuples based on the keys 2469 found in self.re_sources. 2470 2471 The current time is used as the default and any specified 2472 item found in self.re_sources is inserted into the value 2473 and the generated dictionary is returned. 2474 """ 2475 if sourceTime is None: 2476 (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = time.localtime() 2477 else: 2478 (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = sourceTime 2479 2480 sources = {} 2481 defaults = { 'yr': yr, 'mth': mth, 'dy': dy, 2482 'hr': hr, 'mn': mn, 'sec': sec, } 2483 2484 for item in self.re_sources: 2485 values = {} 2486 source = self.re_sources[item] 2487 2488 for key in list(defaults.keys()): 2489 if key in source: 2490 values[key] = source[key] 2491 else: 2492 values[key] = defaults[key] 2493 2494 sources[item] = ( values['yr'], values['mth'], values['dy'], 2495 values['hr'], values['mn'], values['sec'], wd, yd, isdst ) 2496 2497 return sources
2498