Remove flattened fields from AcroForm [pdfium : main]

0 views
Skip to first unread message

Kuka (Gerrit)

unread,
Jun 3, 2026, 11:24:24 AM (yesterday) Jun 3
to pdfium-...@googlegroups.com

Kuka has uploaded the change for review

Commit message

Remove flattened fields from AcroForm

Remove widget annotations from AcroForm fields when flattening a page, and keep fields that are still referenced by other pages.

Add coverage for shared annotation arrays and shared widget annotations across pages.
Bug: 498010830
Change-Id: I001c9780375eb297e40f5b1f87698ac9a6a47fe7

Change diff

diff --git a/fpdfsdk/fpdf_flatten.cpp b/fpdfsdk/fpdf_flatten.cpp
index eb89264..00ed3ae 100644
--- a/fpdfsdk/fpdf_flatten.cpp
+++ b/fpdfsdk/fpdf_flatten.cpp
@@ -9,6 +9,7 @@
#include <limits.h>

#include <algorithm>
+#include <set>
#include <sstream>
#include <utility>
#include <vector>
@@ -16,6 +17,7 @@
#include "constants/annotation_common.h"
#include "constants/annotation_flags.h"
#include "constants/font_encodings.h"
+#include "constants/form_fields.h"
#include "constants/page_object.h"
#include "core/fpdfapi/edit/cpdf_contentstream_write_utils.h"
#include "core/fpdfapi/page/cpdf_page.h"
@@ -30,6 +32,7 @@
#include "core/fpdfapi/parser/cpdf_stream_acc.h"
#include "core/fpdfapi/parser/fpdf_parser_utility.h"
#include "core/fpdfdoc/cpdf_annot.h"
+#include "core/fxcrt/containers/contains.h"
#include "core/fxcrt/fx_string_wrappers.h"
#include "fpdfsdk/cpdfsdk_helpers.h"

@@ -38,6 +41,12 @@

namespace {

+constexpr char kAcroForm[] = "AcroForm";
+constexpr char kAnnots[] = "Annots";
+constexpr char kFields[] = "Fields";
+constexpr char kXFA[] = "XFA";
+constexpr int kMaxRecursion = 32;
+
bool IsValidRect(const CFX_FloatRect& rect, const CFX_FloatRect& rcPage) {
static constexpr float kMinSize = 0.000001f;
if (rect.IsEmpty() || rect.Width() < kMinSize || rect.Height() < kMinSize) {
@@ -314,6 +323,125 @@
SanitizeFontResources(resources_dict->GetMutableDictFor("Font"));
}

+bool IsWidgetAnnot(const CPDF_Dictionary* annot) {
+ return annot && annot->GetNameFor(pdfium::annotation::kSubtype) == "Widget";
+}
+
+void CollectPageWidgetAnnots(const CPDF_Dictionary* page_dict,
+ std::set<const CPDF_Dictionary*>* widget_annots) {
+ RetainPtr<const CPDF_Array> annots = page_dict->GetArrayFor(kAnnots);
+ if (!annots) {
+ return;
+ }
+
+ for (size_t i = 0; i < annots->size(); ++i) {
+ RetainPtr<const CPDF_Dictionary> annot = annots->GetDictAt(i);
+ if (IsWidgetAnnot(annot.Get())) {
+ widget_annots->insert(annot.Get());
+ }
+ }
+}
+
+void RemoveWidgetsReferencedByOtherPages(
+ CPDF_Document* document,
+ const CPDF_Dictionary* current_page_dict,
+ std::set<const CPDF_Dictionary*>* widget_annots) {
+ for (int i = 0, page_count = document->GetPageCount();
+ i < page_count && !widget_annots->empty(); ++i) {
+ RetainPtr<const CPDF_Dictionary> page_dict = document->GetPageDictionary(i);
+ if (!page_dict || page_dict.Get() == current_page_dict) {
+ continue;
+ }
+
+ std::set<const CPDF_Dictionary*> other_page_widget_annots;
+ CollectPageWidgetAnnots(page_dict.Get(), &other_page_widget_annots);
+ for (const CPDF_Dictionary* annot : other_page_widget_annots) {
+ widget_annots->erase(annot);
+ }
+ }
+}
+
+bool PruneFieldArray(CPDF_Array* fields,
+ const std::set<const CPDF_Dictionary*>& widget_annots,
+ std::set<const CPDF_Dictionary*>* visited_fields,
+ int level);
+
+bool ShouldPruneField(CPDF_Dictionary* field,
+ const std::set<const CPDF_Dictionary*>& widget_annots,
+ std::set<const CPDF_Dictionary*>* visited_fields,
+ int level) {
+ if (level > kMaxRecursion) {
+ return false;
+ }
+
+ if (pdfium::Contains(widget_annots, field)) {
+ return true;
+ }
+
+ RetainPtr<CPDF_Array> kids =
+ field->GetMutableArrayFor(pdfium::form_fields::kKids);
+ if (!kids || !visited_fields->insert(field).second) {
+ return false;
+ }
+
+ return PruneFieldArray(kids.Get(), widget_annots, visited_fields,
+ level + 1) &&
+ kids->IsEmpty();
+}
+
+bool PruneFieldArray(CPDF_Array* fields,
+ const std::set<const CPDF_Dictionary*>& widget_annots,
+ std::set<const CPDF_Dictionary*>* visited_fields,
+ int level) {
+ bool pruned = false;
+ for (size_t i = fields->size(); i > 0; --i) {
+ const size_t field_index = i - 1;
+ RetainPtr<CPDF_Dictionary> field = fields->GetMutableDictAt(field_index);
+ if (field &&
+ ShouldPruneField(field.Get(), widget_annots, visited_fields, level)) {
+ fields->RemoveAt(field_index);
+ pruned = true;
+ }
+ }
+ return pruned;
+}
+
+void RemoveFlattenedFields(CPDF_Document* document,
+ const CPDF_Dictionary* page_dict,
+ int level) {
+ std::set<const CPDF_Dictionary*> widget_annots;
+ CollectPageWidgetAnnots(page_dict, &widget_annots);
+ if (widget_annots.empty()) {
+ return;
+ }
+
+ RemoveWidgetsReferencedByOtherPages(document, page_dict, &widget_annots);
+ if (widget_annots.empty()) {
+ return;
+ }
+
+ RetainPtr<CPDF_Dictionary> root = document->GetMutableRoot();
+ if (!root) {
+ return;
+ }
+
+ RetainPtr<CPDF_Dictionary> acro_form = root->GetMutableDictFor(kAcroForm);
+ if (!acro_form) {
+ return;
+ }
+
+ RetainPtr<CPDF_Array> fields = acro_form->GetMutableArrayFor(kFields);
+ if (!fields) {
+ return;
+ }
+
+ std::set<const CPDF_Dictionary*> visited_fields;
+ PruneFieldArray(fields.Get(), widget_annots, &visited_fields, level);
+ if (fields->IsEmpty() && !acro_form->KeyExist(kXFA)) {
+ root->RemoveFor(kAcroForm);
+ }
+}
+
} // namespace

FPDF_EXPORT int FPDF_CALLCONV FPDFPage_Flatten(FPDF_PAGE page, int nFlag) {
@@ -496,6 +624,7 @@
sFormName.c_str());
pNewXObject->SetDataAndRemoveFilter(sStream.unsigned_span());
}
- pPageDict->RemoveFor("Annots");
+ RemoveFlattenedFields(document, pPageDict.Get(), /*level=*/0);
+ pPageDict->RemoveFor(kAnnots);
return FLATTEN_SUCCESS;
}
diff --git a/fpdfsdk/fpdf_flatten_embeddertest.cpp b/fpdfsdk/fpdf_flatten_embeddertest.cpp
index ec1073f..e11a99b 100644
--- a/fpdfsdk/fpdf_flatten_embeddertest.cpp
+++ b/fpdfsdk/fpdf_flatten_embeddertest.cpp
@@ -134,6 +134,29 @@
EXPECT_TRUE(FPDF_SaveAsCopy(document(), this, 0));
ScopedSavedDoc saved_doc = OpenScopedSavedDocument();
ASSERT_TRUE(saved_doc);
- // TODO(crbug.com/498010830): this should be FORMTYPE_NONE
+ EXPECT_EQ(FORMTYPE_NONE, FPDF_GetFormType(saved_doc.get()));
+}
+
+TEST_F(FPDFFlattenEmbedderTest, FlattenSharedAnnotArrayKeepsAcroForm) {
+ ASSERT_TRUE(OpenDocument("bug_498010830_shared_annots.pdf"));
+ ScopedPage page = LoadScopedPage(0);
+ ASSERT_TRUE(page);
+ EXPECT_EQ(FPDFPage_Flatten(page.get(), FLAT_NORMALDISPLAY), FLATTEN_SUCCESS);
+
+ EXPECT_TRUE(FPDF_SaveAsCopy(document(), this, 0));
+ ScopedSavedDoc saved_doc = OpenScopedSavedDocument();
+ ASSERT_TRUE(saved_doc);
+ EXPECT_EQ(FORMTYPE_ACRO_FORM, FPDF_GetFormType(saved_doc.get()));
+}
+
+TEST_F(FPDFFlattenEmbedderTest, FlattenSharedWidgetAnnotKeepsAcroForm) {
+ ASSERT_TRUE(OpenDocument("bug_498010830_shared_widget.pdf"));
+ ScopedPage page = LoadScopedPage(0);
+ ASSERT_TRUE(page);
+ EXPECT_EQ(FPDFPage_Flatten(page.get(), FLAT_NORMALDISPLAY), FLATTEN_SUCCESS);
+
+ EXPECT_TRUE(FPDF_SaveAsCopy(document(), this, 0));
+ ScopedSavedDoc saved_doc = OpenScopedSavedDocument();
+ ASSERT_TRUE(saved_doc);
EXPECT_EQ(FORMTYPE_ACRO_FORM, FPDF_GetFormType(saved_doc.get()));
}
diff --git a/testing/resources/bug_498010830_shared_annots.in b/testing/resources/bug_498010830_shared_annots.in
new file mode 100644
index 0000000..88bab06
--- /dev/null
+++ b/testing/resources/bug_498010830_shared_annots.in
@@ -0,0 +1,43 @@
+{{header}}
+{{object 1 0}}
+<<
+ /Type /Catalog
+ /Pages 2 0 R
+ /AcroForm << /Fields [ 6 0 R ] >>
+>>
+endobj
+{{object 2 0}}
+<< /Count 2 /Kids [ 3 0 R 4 0 R ] /Type /Pages >>
+endobj
+{{object 3 0}}
+<<
+ /Type /Page
+ /Parent 2 0 R
+ /MediaBox [ 0 0 300 300 ]
+ /Annots 5 0 R
+>>
+endobj
+{{object 4 0}}
+<<
+ /Type /Page
+ /Parent 2 0 R
+ /MediaBox [ 0 0 300 300 ]
+ /Annots 5 0 R
+>>
+endobj
+{{object 5 0}}
+[ 6 0 R ]
+endobj
+{{object 6 0}}
+<<
+ /Type /Annot
+ /FT /Tx
+ /T (Shared Annot Array Widget)
+ /Rect [ 100 100 200 130 ]
+ /Subtype /Widget
+>>
+endobj
+{{xref}}
+{{trailer}}
+{{startxref}}
+%%EOF
diff --git a/testing/resources/bug_498010830_shared_annots.pdf b/testing/resources/bug_498010830_shared_annots.pdf
new file mode 100644
index 0000000..c42010a
--- /dev/null
+++ b/testing/resources/bug_498010830_shared_annots.pdf
@@ -0,0 +1,56 @@
+%PDF-1.7
+% ò¤ô
+1 0 obj
+<<
+ /Type /Catalog
+ /Pages 2 0 R
+ /AcroForm << /Fields [ 6 0 R ] >>
+>>
+endobj
+2 0 obj
+<< /Count 2 /Kids [ 3 0 R 4 0 R ] /Type /Pages >>
+endobj
+3 0 obj
+<<
+ /Type /Page
+ /Parent 2 0 R
+ /MediaBox [ 0 0 300 300 ]
+ /Annots 5 0 R
+>>
+endobj
+4 0 obj
+<<
+ /Type /Page
+ /Parent 2 0 R
+ /MediaBox [ 0 0 300 300 ]
+ /Annots 5 0 R
+>>
+endobj
+5 0 obj
+[ 6 0 R ]
+endobj
+6 0 obj
+<<
+ /Type /Annot
+ /FT /Tx
+ /T (Shared Annot Array Widget)
+ /Rect [ 100 100 200 130 ]
+ /Subtype /Widget
+>>
+endobj
+xref
+0 7
+0000000000 65535 f
+0000000015 00000 n
+0000000104 00000 n
+0000000169 00000 n
+0000000264 00000 n
+0000000359 00000 n
+0000000384 00000 n
+trailer <<
+ /Root 1 0 R
+ /Size 7
+>>
+startxref
+510
+%%EOF
diff --git a/testing/resources/bug_498010830_shared_widget.in b/testing/resources/bug_498010830_shared_widget.in
new file mode 100644
index 0000000..4c32bcc
--- /dev/null
+++ b/testing/resources/bug_498010830_shared_widget.in
@@ -0,0 +1,40 @@
+{{header}}
+{{object 1 0}}
+<<
+ /Type /Catalog
+ /Pages 2 0 R
+ /AcroForm << /Fields [ 5 0 R ] >>
+>>
+endobj
+{{object 2 0}}
+<< /Count 2 /Kids [ 3 0 R 4 0 R ] /Type /Pages >>
+endobj
+{{object 3 0}}
+<<
+ /Type /Page
+ /Parent 2 0 R
+ /MediaBox [ 0 0 300 300 ]
+ /Annots [ 5 0 R ]
+>>
+endobj
+{{object 4 0}}
+<<
+ /Type /Page
+ /Parent 2 0 R
+ /MediaBox [ 0 0 300 300 ]
+ /Annots [ 5 0 R ]
+>>
+endobj
+{{object 5 0}}
+<<
+ /Type /Annot
+ /FT /Tx
+ /T (Shared Widget)
+ /Rect [ 100 100 200 130 ]
+ /Subtype /Widget
+>>
+endobj
+{{xref}}
+{{trailer}}
+{{startxref}}
+%%EOF
diff --git a/testing/resources/bug_498010830_shared_widget.pdf b/testing/resources/bug_498010830_shared_widget.pdf
new file mode 100644
index 0000000..6a17075
--- /dev/null
+++ b/testing/resources/bug_498010830_shared_widget.pdf
@@ -0,0 +1,52 @@
+%PDF-1.7
+% ò¤ô
+1 0 obj
+<<
+ /Type /Catalog
+ /Pages 2 0 R
+ /AcroForm << /Fields [ 5 0 R ] >>
+>>
+endobj
+2 0 obj
+<< /Count 2 /Kids [ 3 0 R 4 0 R ] /Type /Pages >>
+endobj
+3 0 obj
+<<
+ /Type /Page
+ /Parent 2 0 R
+ /MediaBox [ 0 0 300 300 ]
+ /Annots [ 5 0 R ]
+>>
+endobj
+4 0 obj
+<<
+ /Type /Page
+ /Parent 2 0 R
+ /MediaBox [ 0 0 300 300 ]
+ /Annots [ 5 0 R ]
+>>
+endobj
+5 0 obj
+<<
+ /Type /Annot
+ /FT /Tx
+ /T (Shared Widget)
+ /Rect [ 100 100 200 130 ]
+ /Subtype /Widget
+>>
+endobj
+xref
+0 6
+0000000000 65535 f
+0000000015 00000 n
+0000000104 00000 n
+0000000169 00000 n
+0000000268 00000 n
+0000000367 00000 n
+trailer <<
+ /Root 1 0 R
+ /Size 6
+>>
+startxref
+481
+%%EOF

Change information

Files:
Change size: L
Delta: 6 files changed, 345 insertions(+), 2 deletions(-)
Open in Gerrit

Related details

Attention set is empty
Submit Requirements:
  • requirement is not satisfiedCode-Owners
  • requirement is not satisfiedCode-Review
  • requirement is not satisfiedReview-Enforcement
Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. DiffyGerrit
Gerrit-MessageType: newchange
Gerrit-Project: pdfium
Gerrit-Branch: main
Gerrit-Change-Id: I001c9780375eb297e40f5b1f87698ac9a6a47fe7
Gerrit-Change-Number: 149070
Gerrit-PatchSet: 1
Gerrit-Owner: Kuka <tyck...@gmail.com>
unsatisfied_requirement
open
diffy

Lei Zhang (Gerrit)

unread,
Jun 3, 2026, 10:42:59 PM (18 hours ago) Jun 3
to Kuka, Lei Zhang, Tom Sepez, pdfium-...@googlegroups.com
Attention needed from Kuka and Tom Sepez

Lei Zhang added 6 comments

Commit Message
Line 9, Patchset 3 (Latest):Remove widget annotations from AcroForm fields when flattening a page, and keep fields that are still referenced by other pages.
Lei Zhang . unresolved

Please wrap at 72 columns.

File fpdfsdk/fpdf_flatten.cpp
Line 331, Patchset 3 (Latest): std::set<const CPDF_Dictionary*>* widget_annots) {
Lei Zhang . unresolved

Just return this, instead of using an out-parameter.

Line 345, Patchset 3 (Latest):void RemoveWidgetsReferencedByOtherPages(
Lei Zhang . unresolved

It wasn't obvious if this is removing the widgets from a dictionary, or from the set. Since this is a set operation, how about "RemoveSharedWidgetsFromSet"?

Line 359, Patchset 3 (Latest): widget_annots->erase(annot);
Lei Zhang . unresolved

Use std::set_difference() instead?

Line 369, Patchset 3 (Latest):bool ShouldPruneField(CPDF_Dictionary* field,
Lei Zhang . unresolved

Intuitive, this name sounds like it is trying to answer a question, so it should be logically const. But it is not. Is there a way to do the decision making, and then prune?

File testing/resources/bug_498010830_shared_annots.in
Line 3, Patchset 3 (Latest):<<
Lei Zhang . unresolved

Please use testing/resources/344775293.in as an example and format the .in file in the same manner. Then regenerate the .pdf.

Open in Gerrit

Related details

Attention is currently required from:
  • Kuka
  • Tom Sepez
Submit Requirements:
    • requirement is not satisfiedCode-Owners
    • requirement is not satisfiedCode-Review
    • requirement is not satisfiedNo-Unresolved-Comments
    • requirement is not satisfiedReview-Enforcement
    Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. DiffyGerrit
    Gerrit-MessageType: comment
    Gerrit-Project: pdfium
    Gerrit-Branch: main
    Gerrit-Change-Id: I001c9780375eb297e40f5b1f87698ac9a6a47fe7
    Gerrit-Change-Number: 149070
    Gerrit-PatchSet: 3
    Gerrit-Owner: Kuka <tyck...@gmail.com>
    Gerrit-Reviewer: Lei Zhang <the...@chromium.org>
    Gerrit-Reviewer: Tom Sepez <tse...@chromium.org>
    Gerrit-Attention: Tom Sepez <tse...@chromium.org>
    Gerrit-Attention: Kuka <tyck...@gmail.com>
    Gerrit-Comment-Date: Thu, 04 Jun 2026 02:42:56 +0000
    Gerrit-HasComments: Yes
    Gerrit-Has-Labels: No
    unsatisfied_requirement
    open
    diffy

    Kuka (Gerrit)

    unread,
    11:57 AM (4 hours ago) 11:57 AM
    to Lei Zhang, Tom Sepez, pdfium-...@googlegroups.com
    Attention needed from Lei Zhang and Tom Sepez

    Kuka added 7 comments

    Patchset-level comments
    File-level comment, Patchset 8 (Latest):
    Kuka . unresolved

    Thanks for the review. I addressed the comments and uploaded a new patch set.
    Could you please take another look when you have time?

    Commit Message
    Line 9, Patchset 3:Remove widget annotations from AcroForm fields when flattening a page, and keep fields that are still referenced by other pages.
    Lei Zhang . resolved

    Please wrap at 72 columns.

    Kuka

    Done

    File fpdfsdk/fpdf_flatten.cpp
    Line 331, Patchset 3: std::set<const CPDF_Dictionary*>* widget_annots) {
    Lei Zhang . resolved

    Just return this, instead of using an out-parameter.

    Kuka

    Done

    Line 345, Patchset 3:void RemoveWidgetsReferencedByOtherPages(
    Lei Zhang . resolved

    It wasn't obvious if this is removing the widgets from a dictionary, or from the set. Since this is a set operation, how about "RemoveSharedWidgetsFromSet"?

    Kuka

    Done. Renamed it to RemoveSharedWidgetsFromSet().

    Line 359, Patchset 3: widget_annots->erase(annot);
    Lei Zhang . resolved

    Use std::set_difference() instead?

    Kuka

    Done

    Line 369, Patchset 3:bool ShouldPruneField(CPDF_Dictionary* field,
    Lei Zhang . resolved

    Intuitive, this name sounds like it is trying to answer a question, so it should be logically const. But it is not. Is there a way to do the decision making, and then prune?

    Kuka

    Done. I split the decision from the pruning side effect. The old
    ShouldPruneField() helper is gone now. PruneFieldArray() first decides whether
    the current field should be removed, and only then removes it from the array.

    File testing/resources/bug_498010830_shared_annots.in
    Line 3, Patchset 3:<<
    Lei Zhang . resolved

    Please use testing/resources/344775293.in as an example and format the .in file in the same manner. Then regenerate the .pdf.

    Kuka

    Done

    Open in Gerrit

    Related details

    Attention is currently required from:
    • Lei Zhang
    • Tom Sepez
    Submit Requirements:
    • requirement is not satisfiedCode-Owners
    • requirement is not satisfiedCode-Review
    • requirement is not satisfiedNo-Unresolved-Comments
    • requirement is not satisfiedReview-Enforcement
    Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. DiffyGerrit
    Gerrit-MessageType: comment
    Gerrit-Project: pdfium
    Gerrit-Branch: main
    Gerrit-Change-Id: I001c9780375eb297e40f5b1f87698ac9a6a47fe7
    Gerrit-Change-Number: 149070
    Gerrit-PatchSet: 8
    Gerrit-Owner: Kuka <tyck...@gmail.com>
    Gerrit-Reviewer: Lei Zhang <the...@chromium.org>
    Gerrit-Reviewer: Tom Sepez <tse...@chromium.org>
    Gerrit-Attention: Lei Zhang <the...@chromium.org>
    Gerrit-Attention: Tom Sepez <tse...@chromium.org>
    Gerrit-Comment-Date: Thu, 04 Jun 2026 15:57:29 +0000
    Gerrit-HasComments: Yes
    Gerrit-Has-Labels: No
    Comment-In-Reply-To: Lei Zhang <the...@chromium.org>
    unsatisfied_requirement
    open
    diffy
    Reply all
    Reply to author
    Forward
    0 new messages