bendikro : Avoid running chardet in decode_string if not needed

0 views
Skip to first unread message

g...@deluge-torrent.org

unread,
Dec 10, 2012, 8:38:50 PM12/10/12
to deluge...@googlegroups.com
Module: deluge
Branch: master
Commit: d5e340354e46362126341020931df4606b421f20

Author: bendikro <bend...@gmail.com>
Date: Mon Nov 26 02:15:10 2012 +0100

Avoid running chardet in decode_string if not needed

---

deluge/common.py | 21 +++++++++++----------
1 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/deluge/common.py b/deluge/common.py
index 9a91531..ed11741 100644
--- a/deluge/common.py
+++ b/deluge/common.py
@@ -626,13 +626,17 @@ def decode_string(s, encoding="utf8"):
elif isinstance(s, unicode):
return s

- encodings = [(encoding, 'strict'), ("utf8", 'strict'),
- ("iso-8859-1", 'strict'),
- (chardet.detect(s)["encoding"], 'strict'),
- (chardet.detect(s)["encoding"], 'ignore')]
- for i in range(len(encodings)):
+ encodings = [lambda: ("utf8", 'strict'),
+ lambda: ("iso-8859-1", 'strict'),
+ lambda: (chardet.detect(s)["encoding"], 'strict'),
+ lambda: (chardet.detect(s)["encoding"], 'ignore')]
+
+ if not encoding is "utf8":
+ encodings.insert(0, lambda: (encoding, 'strict'))
+
+ for l in encodings:
try:
- return s.decode(encodings[i][0], encodings[i][1])
+ return s.decode(*l())
except UnicodeDecodeError:
pass
return u''
@@ -648,10 +652,7 @@ def utf8_encoded(s):

"""
if isinstance(s, str):
- try:
- s = decode_string(s).encode("utf8")
- except UnicodeEncodeError:
- log.warn("Error when encoding to utf8: %s" % s)
+ s = decode_string(s).encode("utf8")
elif isinstance(s, unicode):
s = s.encode("utf8", "ignore")
return s

Reply all
Reply to author
Forward
0 new messages