[rig3] r452 committed - Double unicode rainbow all the way....

13 views
Skip to first unread message

ri...@googlecode.com

unread,
Aug 30, 2010, 2:30:45 AM8/30/10
to rig3-d...@googlegroups.com
Revision: 452
Author: ralfoide
Date: Sun Aug 29 23:29:40 2010
Log: Double unicode rainbow all the way.
å is a pesky little bugger.
http://code.google.com/p/rig3/source/detail?r=452

Modified:
/trunk/rig3serv/misc/TaskList.txt
/trunk/rig3serv/src/rig/cache.py
/trunk/rig3serv/src/rig/hashable.py
/trunk/rig3serv/src/rig/site/site_default.py
/trunk/rig3serv/src/rig/source_item.py
/trunk/rig3serv/src/rig/template/tag.py
/trunk/rig3serv/src/rig/version.py
/trunk/rig3serv/testdata/album/blog1/file_items/2007-09-09 Izu File
Item.izu

=======================================
--- /trunk/rig3serv/misc/TaskList.txt Sun Aug 29 22:27:47 2010
+++ /trunk/rig3serv/misc/TaskList.txt Sun Aug 29 23:29:40 2010
@@ -38,6 +38,7 @@

---- Done Version 0.4 ----

+20100829 Engine: Generate UTF-8 files by default. Full unicode workflow
support.
20100829 Engine: Switch Hashable and Cache from MD5 to SHA1 keys
20100829 Engine: Add support for encoding. Switch to explicit UTF-8 by
default.
20100828 Engine: Experimental old izu blog reader, continued
=======================================
--- /trunk/rig3serv/src/rig/cache.py Sun Aug 29 22:22:31 2010
+++ /trunk/rig3serv/src/rig/cache.py Sun Aug 29 23:29:40 2010
@@ -245,11 +245,19 @@
for k, v in obj.iteritems():
self._ShaHash(md, k)
self._ShaHash(md, v)
+ elif isinstance(obj, unicode):
+ # Transforms the unicode string into a python string
representation
+ # of the unicode string, thus removing encodings.
+ md.update(obj.encode("unicode_escape"))
else:
- r = repr(obj)
- if "object at 0x" in r:
- raise AssertionError("Object %s does not override __repr__
for cache hash" % type(obj))
- md.update(r)
+ try:
+ r = repr(obj)
+ if "object at 0x" in r:
+ raise AssertionError("Object %s does not override
__repr__ for cache hash" % type(obj))
+ md.update(r)
+ except Exception, e:
+ self._log.Debug("Invalid cache object: %s", type(obj))
+ raise e

def _RemoveDir(self, dir_path):
"""
=======================================
--- /trunk/rig3serv/src/rig/hashable.py Sun Aug 29 22:26:43 2010
+++ /trunk/rig3serv/src/rig/hashable.py Sun Aug 29 23:29:40 2010
@@ -57,7 +57,9 @@
md.update(str(obj))

elif isinstance(obj, unicode):
- md.update(unicode(obj))
+ # Transforms the unicode string into a python string
representation
+ # of the unicode string, thus removing encodings.
+ md.update(obj.encode("unicode_escape"))

else:
md.update(repr(obj))
=======================================
--- /trunk/rig3serv/src/rig/site/site_default.py Sun Aug 29 21:19:47 2010
+++ /trunk/rig3serv/src/rig/site/site_default.py Sun Aug 29 23:29:40 2010
@@ -24,12 +24,13 @@
"""
__author__ = "ralfoide at gmail com"

-import re
-import os
import cgi
-import zlib
+import codecs
+import os
+import re
import time
import urllib
+import zlib
from datetime import date, datetime

from rig.parser.izu_parser import IzuParser
@@ -51,8 +52,11 @@
self.permalink = permalink

def __repr__(self):
+ content = self.content
+ if isinstance(content, unicode):
+ content = content.encode("unicode_escape")
return "<%s: title %s, date %s, link %s, content %s>" % (
- self.__class__.__name__, self.title, self.date,
self.permalink, self.content)
+ self.__class__.__name__, self.title, self.date,
self.permalink, content)

#------------------------
class MonthPageItem(object):
@@ -1174,7 +1178,8 @@
self._log.Info("[%s] Write %s",
self._site_settings.public_name,
dest_file)
- f = file(dest_file, mode="wb")
+
+ f = codecs.open(dest_file, mode="wb", encoding="utf-8")
f.write(data)
f.close()
return dest_file
=======================================
--- /trunk/rig3serv/src/rig/source_item.py Sun Aug 29 21:19:47 2010
+++ /trunk/rig3serv/src/rig/source_item.py Sun Aug 29 23:29:40 2010
@@ -263,9 +263,8 @@

def RigHash(self, md=None):
"""
- Computes a hash that depends on the real path of the file (like
- ContentHash) but *also* depends on the items date, source settings
- and categories.
+ Computes a hash that depends on the content (like ContentHash)
+ but *also* depends on the source settings and categories.
"""
md = super(SourceContent, self).RigHash(md)
md = self.ContentHash(md)
@@ -273,7 +272,7 @@

def ContentHash(self, md=None):
"""
- Computes a hash that only depends on the real path of the file.
+ Computes a hash that only depends on the content.
"""
md = super(SourceContent, self).ContentHash(md)
md = self.UpdateHash(md, self.tags)
@@ -283,11 +282,10 @@
return md

def __repr__(self):
- return "<%s (%s) %s, %s, %s, %s>" % (self.__class__.__name__,
+ return "<%s (%s) %s, %s, %s>" % (self.__class__.__name__,
self.date,
self.title,
self.tags,
- self.content,
self.source_settings)

def PrettyRepr(self):
=======================================
--- /trunk/rig3serv/src/rig/template/tag.py Wed Sep 2 21:57:04 2009
+++ /trunk/rig3serv/src/rig/template/tag.py Sun Aug 29 23:29:40 2010
@@ -82,9 +82,12 @@
def Generate(self, log, tag_node, context):
try:
result = eval(tag_node.Parameters(), dict(context))
- result = str(result)
+ if not isinstance(result, (str, unicode)):
+ result = str(result)
+ return result
except Exception, e:
- raise e.__class__("%s\nTag: %s\nContext: %s" % (e, tag_node,
context))
+ raise Exception("%s: %s\nTag: %s\nContext: %s\n" %
+ (type(e), e, tag_node, context))
return result


@@ -101,10 +104,11 @@
def Generate(self, log, tag_node, context):
try:
result = eval(tag_node.Parameters(), dict(context))
- result = cgi.escape(str(result))
+ if not isinstance(result, (str, unicode)):
+ result = str(result)
+ return cgi.escape(result)
except Exception, e:
raise e.__class__("%s\nTag: %s\nContext: %s" % (e, tag_node,
context))
- return result


#------------------------
@@ -123,7 +127,9 @@
def Generate(self, log, tag_node, context):
try:
result = eval(tag_node.Parameters(), dict(context))
- result = cgi.escape(str(result))
+ if not isinstance(result, (str, unicode)):
+ result = str(result)
+ return cgi.escape(result)
except Exception, e:
raise e.__class__("%s\nTag: %s\nContext: %s" % (e, tag_node,
context))
return result
@@ -142,7 +148,9 @@
def Generate(self, log, tag_node, context):
try:
result = eval(tag_node.Parameters(), dict(context))
- result = _RE_URL.sub(_UrlEncode, str(result))
+ if not isinstance(result, (str, unicode)):
+ result = str(result)
+ result = _RE_URL.sub(_UrlEncode, result)
except Exception, e:
raise e.__class__("%s\nTag: %s\nContext: %s" % (e, tag_node,
context))
return result
=======================================
--- /trunk/rig3serv/src/rig/version.py Sun Aug 29 22:27:47 2010
+++ /trunk/rig3serv/src/rig/version.py Sun Aug 29 23:29:40 2010
@@ -13,7 +13,7 @@
To enable substitutions, do something like this:
$ svn propset svn:keywords "Date Author Revision HeadURL Id" version.py

-----
+-----

Part of Rig3.
Copyright (C) 2007-2009 ralfoide gmail com
=======================================
--- /trunk/rig3serv/testdata/album/blog1/file_items/2007-09-09 Izu File
Item.izu Sun Aug 29 21:19:47 2010
+++ /trunk/rig3serv/testdata/album/blog1/file_items/2007-09-09 Izu File
Item.izu Sun Aug 29 23:29:40 2010
@@ -11,6 +11,12 @@
when using the directory-based items.
Rig link: [This is a rig link|riglink:T12896*.jpg]

+This little accent here causes a __lot__ of trouble because it encodes to
0xE5
+and the encoding behavior is different whether I run this using Windows'
WPython
+or cygwin's python (sigh, no kidding):
+ "å" from
+ "Deepzooming at the top of the Mandelbrot set has already been done,
for example in this page from Bengt Månsson."
+
[s:fr]
Un mot ou deux en français, avec des accents en UTF-8:
ça, où est le pré près du prêt?
Reply all
Reply to author
Forward
0 new messages