Groups keyboard shortcuts have been updated
Dismiss
See shortcuts

Patch to optionally use Babel instead of PyICU for source data.

27 views
Skip to first unread message

Chris Lambacher

unread,
Sep 26, 2008, 8:13:42 AM9/26/08
to parsedatetime-dev
Hi,

I was having trouble getting a build of PyICU going for Windows since
I don't have Visual Studio and ICU4C's support of MingW seems to be
spotty at best. I was already using Babel in my project, and Babel
provides the same base information from the same source that ICU gets
it, so I added the option to use Babel as the data source. This should
significantly reduce the barrier to entry for people wanting to use
this project. A patch is included below. I have not yet included any
testing yet, because I was having trouble figuring out what was going
on there. It should pass all the PyICU tests using Babel. I'll see if
I can get some time this weekend to update the test suite.






Index: parsedatetime_consts.py
===================================================================
--- parsedatetime_consts.py (revision 84)
+++ parsedatetime_consts.py (working copy)
@@ -32,6 +32,10 @@
except:
pyicu = None

+try:
+ import babel
+except ImportError:
+ babel = None

import datetime
import calendar
@@ -487,6 +491,8 @@
return x.lower()

if pyicu and ptc.usePyICU:
+ ptc.useBabel = False
+
ptc.icuLocale = None

if ptc.localeID is not None:
@@ -536,6 +542,59 @@
'medium':
ptc.icu_tf['medium'].toPattern(),
'short':
ptc.icu_tf['short'].toPattern(),
}
+ elif babel and ptc.useBabel:
+ ptc.usePyICU = False
+
+ ptc.babelLocale = None
+
+ if ptc.localeID is not None:
+ ptc.babelLocale = babel.core.Locale.parse(ptc.localeID)
+
+ if ptc.babelLocale is None:
+ for id in range(0, len(ptc.fallbackLocales)):
+ ptc.localeID = ptc.fallbackLocales[id]
+ ptc.babelLocale =
babel.core.Locale.parse(ptc.localeID)
+
+ if ptc.babelLocale is not None:
+ break
+
+ # list of weekdays with monday first
+ wd = ptc.babelLocale.days['format']['wide']
+ swd = ptc.babelLocale.days['format']['abbreviated']
+ ptc.Weekdays = [lcase(wd[x]) for x in range(7)]
+ ptc.shortWeekdays = [lcase(swd[x]) for x in range(7)]
+
+ # list of months
+ mnth = ptc.babelLocale.months['format']['wide']
+ smnth = ptc.babelLocale.months['format']['abbreviated']
+ ptc.Months = [lcase(mnth[x]) for x in range(1,13)]
+ ptc.shortMonths = [lcase(smnth[x]) for x in range(1,13)]
+
+ ptc.re_consts = None
+ ptc.babel_df = { 'full':
ptc.babelLocale.date_formats['full'],
+ 'long':
ptc.babelLocale.date_formats['long'],
+ 'medium':
ptc.babelLocale.date_formats['medium'],
+ 'short':
ptc.babelLocale.date_formats['short']
+ }
+
+ ptc.babel_tf = { 'full':
ptc.babelLocale.time_formats['full'],
+ 'long':
ptc.babelLocale.time_formats['long'],
+ 'medium':
ptc.babelLocale.time_formats['medium'],
+ 'short':
ptc.babelLocale.time_formats['short']
+ }
+
+ ptc.dateFormats = { 'full': ptc.babel_df['full'].pattern,
+ 'long': ptc.babel_df['long'].pattern,
+ 'medium': ptc.babel_df['medium'].pattern,
+ 'short': ptc.babel_df['short'].pattern
+ }
+
+ ptc.timeFormats = { 'full': ptc.babel_tf['full'].pattern,
+ 'long': ptc.babel_tf['long'].pattern,
+ 'medium': ptc.babel_tf['medium'].pattern,
+ 'short': ptc.babel_tf['short'].pattern
+ }
+
else:
if not ptc.localeID in pdtLocales:
for id in range(0, len(ptc.fallbackLocales)):
@@ -546,6 +605,7 @@

ptc.locale = pdtLocales[ptc.localeID]
ptc.usePyICU = False
+ ptc.useBabel = False

ptc.Weekdays = ptc.locale.Weekdays
ptc.shortWeekdays = ptc.locale.shortWeekdays
@@ -668,6 +728,64 @@
dp_order.append(s[:1])

ptc.dp_order = dp_order
+
+ elif babel and ptc.useBabel:
+ am = u''
+ pm = u''
+ ts = ''
+
+ # ICU doesn't seem to provide directly the
+ # date or time seperator - so we have to
+ # figure it out
+ s = ptc.timeFormats['short']
+
+ ptc.usesMeridian = u'a' in s
+ ptc.uses24 = u'H' in s
+
+ # '11:45 AM' or '11:45'
+ s = babel.dates.format_time(datetime.datetime(2003, 10, 30,
11, 45), format='short', locale=ptc.localeID)
+
+ # ': AM' or ':'
+ s = s.replace('11', '').replace('45', '')
+
+ if len(s) > 0:
+ ts = s[0]
+
+ if ptc.usesMeridian:
+ # '23:45 AM' or '23:45'
+ am = s[1:].strip()
+ s = babel.dates.format_time(datetime.datetime(2003, 10,
30, 23, 45), format='short', locale=ptc.localeID)
+
+ if ptc.uses24:
+ s = s.replace('23', '')
+ else:
+ s = s.replace('11', '')
+
+ # 'PM' or ''
+ pm = s.replace('45', '').replace(ts, '').strip()
+
+ ptc.timeSep = [ ts ]
+ ptc.meridian = [ am, pm ]
+
+ s = babel.dates.format_date(datetime.datetime(2003, 10, 30,
11, 45), format='short', locale=ptc.localeID)
+ s = s.replace('10', '').replace('30', '').replace('03',
'').replace('2003', '')
+
+ if len(s) > 0:
+ ds = s[0]
+ else:
+ ds = '/'
+
+ ptc.dateSep = [ ds ]
+ s = ptc.dateFormats['short']
+ l = s.lower().split(ds)
+ dp_order = []
+
+ for s in l:
+ if len(s) > 0:
+ dp_order.append(s[:1])
+
+ ptc.dp_order = dp_order
+
else:
ptc.timeSep = ptc.locale.timeSep
ptc.dateSep = ptc.locale.dateSep
@@ -875,7 +993,7 @@
if PyICU is not present or not requested, only the locales
defined by
C{pdtLocales} will be searched.
"""
- def __init__(self, localeID=None, usePyICU=True,
fallbackLocales=['en_US']):
+ def __init__(self, localeID=None, usePyICU=True,
fallbackLocales=['en_US'], useBabel=True):
self.localeID = localeID
self.fallbackLocales = fallbackLocales

@@ -886,6 +1004,7 @@

self.locale = None
self.usePyICU = usePyICU
+ self.useBabel = useBabel

# starting cache of leap years
# daysInMonth will add to this if during

bear

unread,
Sep 26, 2008, 5:43:36 PM9/26/08
to parsedat...@googlegroups.com
nice!

I'll try and apply this patch this weekend. I do have a tight
deadline for my workplace, but I'll give it a go.

thanks!

--
---
Bear

be...@seesmic.com (work)
be...@code-bear.com (jabber & email)
http://code-bear.com/bearlog (weblog)

PGP Fingerprint = 9996 719F 973D B11B E111 D770 9331 E822 40B3 CD29

Chris Lambacher

unread,
Sep 27, 2008, 12:49:55 PM9/27/08
to parsedat...@googlegroups.com
No hurry,

It is still not actually totally working for me. Not because I think there is
anything wrong with getting the parse data from Babel, but because I can't
round trip en_CA, fr_FR and fr_CA date formats.

They come out as:
22 Sep 2008 (en_CA)
22 sept. 2008 (fr_FR, fr_CA)

And get parsed as:
datetime(2008, 9, 20, 22, current_minutes, current_seconds)

It looks like you don't expect the date of month to show up before the month,
and that 2008 is parsed as 20/08, which I think is kind of weird.

Interestingly, I can round trip a datetime format:
22 Sep 2008 12:45:11 (en_CA)
22 sept. 2008 12:45:11 (fr)

Any suggestions on where to start to get this sorted? Obviously I should be
expanding the fr_FR test case.

-Chris

Reply all
Reply to author
Forward
0 new messages