From: Terry Yiu <
g...@tyiu.xyz>
Signed-off-by: Terry Yiu <
g...@tyiu.xyz>
---
lnbc1pje4nu9pp5zf4jzxm2vs34es4k4z7h3gr9emsga8kgsvkvzlc08lgsmj64clqsdp4g3sk6atnypsh2ar0yp68yctwwdkxzarfdah8xgrxd9uzqurpw33kscqzzsxqyz5vqsp5r67njr9n46t9sv8c290ejuj9j5tsq7e9mqzusg574ezngmam2khs9qyyssqecqve2vymzyeaekchcmyh87ua2jj8jgn9s2usp4pxcv6z2w40yl4u0uwpadyq57ckvvwxkf84ts0se4clesvkep49w5skr7cd5h2wlgplahewv
damus/Util/Translator.swift | 7 ++++++-
nostrdb/NdbNote.swift | 25 +++++++++++++++++++++----
2 files changed, 27 insertions(+), 5 deletions(-)
diff --git a/damus/Util/Translator.swift b/damus/Util/Translator.swift
index 1681e665132e..ce8231adfd4b 100644
--- a/damus/Util/Translator.swift
+++ b/damus/Util/Translator.swift
@@ -23,6 +23,11 @@ public struct Translator {
}
public func translate(_ text: String, from sourceLanguage: String, to targetLanguage: String) async throws -> String? {
+ // Do not attempt to translate if the source and target languages are the same.
+ guard sourceLanguage != targetLanguage else {
+ return nil
+ }
+
switch userSettingsStore.translation_service {
case .purple:
return try await translateWithPurple(text, from: sourceLanguage, to: targetLanguage)
@@ -35,7 +40,7 @@ public struct Translator {
case .deepl:
return try await translateWithDeepL(text, from: sourceLanguage, to: targetLanguage)
case .none:
- return text
+ return nil
}
}
diff --git a/nostrdb/NdbNote.swift b/nostrdb/NdbNote.swift
index d489ecbe17c0..09a3152f8109 100644
--- a/nostrdb/NdbNote.swift
+++ b/nostrdb/NdbNote.swift
@@ -411,7 +411,25 @@ extension NdbNote {
let originalBlocks = self.blocks(keypair).blocks
let originalOnlyText = originalBlocks.compactMap {
if case .text(let txt) = $0 {
- return txt
+ // Replacing right single quotation marks (’) with "typewriter or ASCII apostrophes" (')
+ // as a workaround to get Apple's language recognizer to predict language the correctly.
+ // It is important to add this workaround to get the language right because it wastes users' money to send translation requests.
+ // Until Apple fixes their language model, this workaround will be kept in place.
+ // See
https://en.wikipedia.org/wiki/Apostrophe#Unicode for an explanation of the differences between the two characters.
+ //
+ // For example,
+ // "nevent1qqs0wsknetaju06xk39cv8sttd064amkykqalvfue7ydtg3p0lyfksqzyrhxagf6h8l9cjngatumrg60uq22v66qz979pm32v985ek54ndh8gj42wtp"
+ // has the note content "It’s a meme".
+ // Without the character replacement, it is 61% confident that the text is in Turkish (tr) and 8% confident that the text is in English (en),
+ // which is a wildly incorrect hypothesis.
+ // With the character replacement, it is 65% confident that the text is in English (en) and 24% confident that the text is in Turkish (tr), which is more accurate.
+ //
+ // Similarly,
+ // "nevent1qqspjqlln6wvxrqg6kzl2p7gk0rgr5stc7zz5sstl34cxlw55gvtylgpp4mhxue69uhkummn9ekx7mqpr4mhxue69uhkummnw3ez6ur4vgh8wetvd3hhyer9wghxuet5qy28wumn8ghj7un9d3shjtnwdaehgu3wvfnsygpx6655ve67vqlcme9ld7ww73pqx7msclhwzu8lqmkhvuluxnyc7yhf3xut"
+ // has the note content "You’re funner".
+ // Without the character replacement, it is 52% confident that the text is in Norwegian Bokmål (nb) and 41% confident that the text is in English (en).
+ // With the character replacement, it is 93% confident that the text is in English (en) and 4% confident that the text is in Norwegian Bokmål (nb).
+ return txt.replacingOccurrences(of: "’", with: "'")
}
else {
return nil
@@ -419,13 +437,12 @@ extension NdbNote {
}
.joined(separator: " ")
- // Only accept language recognition hypothesis if there's at least a 50% probability that it's accurate.
let languageRecognizer = NLLanguageRecognizer()
languageRecognizer.processString(originalOnlyText)
+ // Only accept language recognition hypothesis if there's at least a 50% probability that it's accurate.
guard let locale = languageRecognizer.languageHypotheses(withMaximum: 1).first(where: { $0.value >= 0.5 })?.key.rawValue else {
- let nstr: String? = nil
- return nstr
+ return nil
}
// Remove the variant component and just take the language part as translation services typically only supports the variant-less language.
--
2.39.0