Repository :
https://github.com/FarGroup/FarManager
On branch : master
Link :
https://github.com/FarGroup/FarManager/commit/879449f76b8f83a64d961667b4910ca785df648d
>---------------------------------------------------------------
commit 879449f76b8f83a64d961667b4910ca785df648d
Author: w17 <
vladimir....@gmail.com>
Date: Sun Jan 4 21:09:47 2026 +0300
far:config Codepages.NoAutoDetectCJK
>---------------------------------------------------------------
879449f76b8f83a64d961667b4910ca785df648d
far/config.cpp | 1 +
far/config.hpp | 1 +
far/filestr.cpp | 11 ++++++-----
far/thirdparty/uchardet/nsMBCSGroupProber.cpp | 6 +++---
far/thirdparty/uchardet/nsUniversalDetector.cpp | 5 +++--
far/uchardet.cpp | 8 +++++---
6 files changed, 19 insertions(+), 13 deletions(-)
diff --git a/far/config.cpp b/far/config.cpp
index 01b73cdb9..b4457b1e6 100644
--- a/far/config.cpp
+++ b/far/config.cpp
@@ -1946,6 +1946,7 @@ void Options::InitConfigsData()
{FSSF_PRIVATE, NKeyCmdline, L"PromptFormat"sv, CmdLine.strPromptFormat, L"$p$g"sv},
{FSSF_PRIVATE, NKeyCmdline, L"UsePromptFormat"sv, CmdLine.UsePromptFormat, false},
{FSSF_PRIVATE, NKeyCodePages, L"CPMenuMode"sv, CPMenuMode, false},
+ {FSSF_PRIVATE, NKeyCodePages, L"NoAutoDetectCJK"sv, NoAutoDetectCJK, true},
{FSSF_PRIVATE, NKeyCodePages, L"NoAutoDetectCP"sv, strNoAutoDetectCP, L""sv},
{FSSF_PRIVATE, NKeyConfirmations, L"AllowReedit"sv, Confirm.AllowReedit, true},
{FSSF_CONFIRMATIONS, NKeyConfirmations, L"Copy"sv, Confirm.Copy, true},
diff --git a/far/config.hpp b/far/config.hpp
index a0b52f7ea..dc2e5b9e9 100644
--- a/far/config.hpp
+++ b/far/config.hpp
@@ -1012,6 +1012,7 @@ public:
InfoPanelOptions InfoPanel;
BoolOption CPMenuMode;
+ BoolOption NoAutoDetectCJK;
StringOption strNoAutoDetectCP;
// Перечисленные здесь кодовые страницы будут исключены из детектирования nsUniversalDetectorEx.
// Автодетект юникодных страниц от этого не зависит, поэтому UTF-8 будет определяться даже если
diff --git a/far/filestr.cpp b/far/filestr.cpp
index 83c63f8f5..e9bb5ad0f 100644
--- a/far/filestr.cpp
+++ b/far/filestr.cpp
@@ -434,7 +434,11 @@ static bool GetCpUsingML(std::string_view Str, uintptr_t& Codepage, function_ref
std::span const Scores(Info, InfoCount);
std::ranges::sort(Scores, [](DetectEncodingInfo const& a, DetectEncodingInfo const& b) { return a.nDocPercent > b.nDocPercent; });
- const auto It = std::ranges::find_if(Scores, [&](DetectEncodingInfo const& i) { return i.nLangID != 0xffffffff && IsCodepageAcceptable(i.nCodePage); });
+ const auto no_cjk = !Global || Global->Opt->NoAutoDetectCJK; // Global == nullptr in TEST_CASE("GetCpUsingML_M4000")
+ const auto is_cp_acceptable = [no_cjk, IsCodepageAcceptable](const UINT cp) {
+ return (cp != 0xffffffff) && !(no_cjk && cp >= 932 && cp <= 950) && IsCodepageAcceptable(cp);
+ };
+ const auto It = std::ranges::find_if(Scores, [&](DetectEncodingInfo const& i) { return is_cp_acceptable(i.nLangID); });
if (It == Scores.end())
return false;
@@ -465,9 +469,6 @@ static bool GetCpUsingHeuristicsWithExceptions(std::string_view const Str, uintp
if (IsNotCodepage(Cp))
return false;
- if (!Global->Opt->CPMenuMode)
- return true;
-
if (IsStandardCodePage(Cp))
return true;
@@ -476,7 +477,7 @@ static bool GetCpUsingHeuristicsWithExceptions(std::string_view const Str, uintp
};
const auto IsCodepageAcceptable =
- Global->Opt->strNoAutoDetectCP == L"-1"sv?
+ Global->Opt->strNoAutoDetectCP == L"-2"sv || (Global->Opt->strNoAutoDetectCP == L"-1"sv && Global->Opt->CPMenuMode) ?
function_ref(IsCodepageWhitelisted) :
function_ref(IsCodepageNotBlacklisted);
diff --git a/far/thirdparty/uchardet/nsMBCSGroupProber.cpp b/far/thirdparty/uchardet/nsMBCSGroupProber.cpp
index 79cacd460..7754a9c2e 100644
--- a/far/thirdparty/uchardet/nsMBCSGroupProber.cpp
+++ b/far/thirdparty/uchardet/nsMBCSGroupProber.cpp
@@ -1,4 +1,4 @@
-/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
@@ -67,7 +67,7 @@ nsMBCSGroupProber::nsMBCSGroupProber(PRUint32 aLanguageFilter)
codePointBufferIdx[i] = 0;
}
- mProbers[0] = new nsUTF8Prober();
+ //mProbers[0] = new nsUTF8Prober(); // windows detector does that detection better
if (aLanguageFilter & NS_FILTER_JAPANESE)
{
mProbers[1] = new nsSJISProber(aLanguageFilter == NS_FILTER_JAPANESE);
@@ -88,7 +88,7 @@ nsMBCSGroupProber::nsMBCSGroupProber(PRUint32 aLanguageFilter)
for (PRUint32 i = 0; i < NUM_OF_PROBERS; i++)
{
- if (mProbers[i]->DecodeToUnicode())
+ if (mProbers[i] && mProbers[i]->DecodeToUnicode())
{
int j = 0;
diff --git a/far/thirdparty/uchardet/nsUniversalDetector.cpp b/far/thirdparty/uchardet/nsUniversalDetector.cpp
index 06c2d9a23..f4ed07d66 100644
--- a/far/thirdparty/uchardet/nsUniversalDetector.cpp
+++ b/far/thirdparty/uchardet/nsUniversalDetector.cpp
@@ -1,4 +1,4 @@
-/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
@@ -200,7 +200,8 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
}
//start multibyte and singlebyte charset prober
- if (nsnull == mCharSetProbers[0])
+ if (nsnull == mCharSetProbers[0] &&
+ (mLanguageFilter & NS_FILTER_CJK))
{
mCharSetProbers[0] = new nsMBCSGroupProber(mLanguageFilter);
if (nsnull == mCharSetProbers[0])
diff --git a/far/uchardet.cpp b/far/uchardet.cpp
index eabb30c1f..9703dc0d1 100644
--- a/far/uchardet.cpp
+++ b/far/uchardet.cpp
@@ -39,7 +39,9 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Internal:
#include "components.hpp"
+#include "config.hpp"
#include "encoding.hpp"
+#include "global.hpp"
#include "log.hpp"
// Platform:
@@ -90,14 +92,14 @@ WARNING_DISABLE_CLANG("-Wold-style-cast")
#include "thirdparty/uchardet/nsHebrewProber.cpp"
#include "thirdparty/uchardet/nsJohabProber.cpp"
#include "thirdparty/uchardet/nsLanguageDetector.cpp"
-#include "thirdparty/uchardet/nsLatin1Prober.cpp"
+//#include "thirdparty/uchardet/nsLatin1Prober.cpp"
#include "thirdparty/uchardet/nsMBCSGroupProber.cpp"
#include "thirdparty/uchardet/nsMBCSSM.cpp"
#include "thirdparty/uchardet/nsSBCharSetProber.cpp"
#include "thirdparty/uchardet/nsSBCSGroupProber.cpp"
#include "thirdparty/uchardet/nsSJISProber.cpp"
#include "thirdparty/uchardet/nsUniversalDetector.cpp"
-#include "thirdparty/uchardet/nsUTF8Prober.cpp"
+//#include "thirdparty/uchardet/nsUTF8Prober.cpp"
#define UCHARDET_LANGUAGE Arabic
#include "uchardet_model.hpp"
@@ -256,7 +258,7 @@ class UniversalDetector final: public uchardet::nsUniversalDetector
{
public:
explicit UniversalDetector(function_ref<bool(uintptr_t)> const IsCodepageAcceptable):
- nsUniversalDetector(NS_FILTER_ALL),
+ nsUniversalDetector(Global->Opt->NoAutoDetectCJK ? NS_FILTER_NON_CJK : NS_FILTER_ALL),
m_IsCodepageAcceptable(IsCodepageAcceptable)
{
}