bendikro : Changed decode_string to always return unicode.

1 view
Skip to first unread message

g...@deluge-torrent.org

unread,
Dec 10, 2012, 8:38:50 PM12/10/12
to deluge...@googlegroups.com
Module: deluge
Branch: master
Commit: 60f196ff933795980e62d579da6713df7e76dc1f

Author: bendikro <bend...@gmail.com>
Date: Sun Nov 25 13:01:12 2012 +0100

Changed decode_string to always return unicode.

---

deluge/common.py | 33 ++++++++++++++++++++++++---------
deluge/ui/common.py | 10 +++++-----
2 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/deluge/common.py b/deluge/common.py
index ea615df..9a91531 100644
--- a/deluge/common.py
+++ b/deluge/common.py
@@ -608,22 +608,34 @@ def xml_encode(string):

def decode_string(s, encoding="utf8"):
"""
- Decodes a string and re-encodes it in utf8. If it cannot decode using
- `:param:encoding` then it will try to detect the string encoding and
- decode it.
+ Decodes a string and return unicode. If it cannot decode using
+ `:param:encoding` then it will try latin1, and if that fails,
+ try to detect the string encoding. If that fails, decode with
+ ignore.

:param s: string to decode
:type s: string
:keyword encoding: the encoding to use in the decoding
:type encoding: string
+ :returns: s converted to unicode
+ :rtype: unicode

"""
+ if not s:
+ return u''
+ elif isinstance(s, unicode):
+ return s

- try:
- s = s.decode(encoding).encode("utf8", "ignore")
- except UnicodeDecodeError:
- s = s.decode(chardet.detect(s)["encoding"], "ignore").encode("utf8", "ignore")
- return s
+ encodings = [(encoding, 'strict'), ("utf8", 'strict'),
+ ("iso-8859-1", 'strict'),
+ (chardet.detect(s)["encoding"], 'strict'),
+ (chardet.detect(s)["encoding"], 'ignore')]
+ for i in range(len(encodings)):
+ try:
+ return s.decode(encodings[i][0], encodings[i][1])
+ except UnicodeDecodeError:
+ pass
+ return u''

def utf8_encoded(s):
"""
@@ -636,7 +648,10 @@ def utf8_encoded(s):

"""
if isinstance(s, str):
- s = decode_string(s)
+ try:
+ s = decode_string(s).encode("utf8")
+ except UnicodeEncodeError:
+ log.warn("Error when encoding to utf8: %s" % s)
elif isinstance(s, unicode):
s = s.encode("utf8", "ignore")
return s
diff --git a/deluge/ui/common.py b/deluge/ui/common.py
index 3b754ce..000a850 100644
--- a/deluge/ui/common.py
+++ b/deluge/ui/common.py
@@ -51,7 +51,7 @@ except ImportError:
from sha import sha

from deluge import bencode
-from deluge.common import decode_string, path_join
+from deluge.common import utf8_encoded, path_join
import deluge.configmanager

log = logging.getLogger(__name__)
@@ -88,9 +88,9 @@ class TorrentInfo(object):
# Check if 'name.utf-8' is in the torrent and if not try to decode the string
# using the encoding found.
if "name.utf-8" in self.__m_metadata["info"]:
- self.__m_name = decode_string(self.__m_metadata["info"]["name.utf-8"])
+ self.__m_name = utf8_encoded(self.__m_metadata["info"]["name.utf-8"])
else:
- self.__m_name = decode_string(self.__m_metadata["info"]["name"], self.encoding)
+ self.__m_name = utf8_encoded(self.__m_metadata["info"]["name"], self.encoding)

# Get list of files from torrent info
paths = {}
@@ -104,7 +104,7 @@ class TorrentInfo(object):
if "path.utf-8" in f:
path = os.path.join(prefix, *f["path.utf-8"])
else:
- path = decode_string(os.path.join(prefix, decode_string(os.path.join(*f["path"]), self.encoding)), self.encoding)
+ path = utf8_encoded(os.path.join(prefix, utf8_encoded(os.path.join(*f["path"]), self.encoding)), self.encoding)
f["index"] = index
paths[path] = f

@@ -160,7 +160,7 @@ class TorrentInfo(object):
if "path.utf-8" in f:
path = os.path.join(prefix, *f["path.utf-8"])
else:
- path = decode_string(os.path.join(prefix, decode_string(os.path.join(*f["path"]), self.encoding)), self.encoding)
+ path = utf8_encoded(os.path.join(prefix, utf8_encoded(os.path.join(*f["path"]), self.encoding)), self.encoding)
self.__m_files.append({
'path': path,
'size': f["length"],

Reply all
Reply to author
Forward
0 new messages