Remove flattened fields from AcroForm
Remove widget annotations from AcroForm fields when flattening a page, and keep fields that are still referenced by other pages.
Add coverage for shared annotation arrays and shared widget annotations across pages.
diff --git a/fpdfsdk/fpdf_flatten.cpp b/fpdfsdk/fpdf_flatten.cpp
index eb89264..00ed3ae 100644
--- a/fpdfsdk/fpdf_flatten.cpp
+++ b/fpdfsdk/fpdf_flatten.cpp
@@ -9,6 +9,7 @@
#include <limits.h>
#include <algorithm>
+#include <set>
#include <sstream>
#include <utility>
#include <vector>
@@ -16,6 +17,7 @@
#include "constants/annotation_common.h"
#include "constants/annotation_flags.h"
#include "constants/font_encodings.h"
+#include "constants/form_fields.h"
#include "constants/page_object.h"
#include "core/fpdfapi/edit/cpdf_contentstream_write_utils.h"
#include "core/fpdfapi/page/cpdf_page.h"
@@ -30,6 +32,7 @@
#include "core/fpdfapi/parser/cpdf_stream_acc.h"
#include "core/fpdfapi/parser/fpdf_parser_utility.h"
#include "core/fpdfdoc/cpdf_annot.h"
+#include "core/fxcrt/containers/contains.h"
#include "core/fxcrt/fx_string_wrappers.h"
#include "fpdfsdk/cpdfsdk_helpers.h"
@@ -38,6 +41,12 @@
namespace {
+constexpr char kAcroForm[] = "AcroForm";
+constexpr char kAnnots[] = "Annots";
+constexpr char kFields[] = "Fields";
+constexpr char kXFA[] = "XFA";
+constexpr int kMaxRecursion = 32;
+
bool IsValidRect(const CFX_FloatRect& rect, const CFX_FloatRect& rcPage) {
static constexpr float kMinSize = 0.000001f;
if (rect.IsEmpty() || rect.Width() < kMinSize || rect.Height() < kMinSize) {
@@ -314,6 +323,125 @@
SanitizeFontResources(resources_dict->GetMutableDictFor("Font"));
}
+bool IsWidgetAnnot(const CPDF_Dictionary* annot) {
+ return annot && annot->GetNameFor(pdfium::annotation::kSubtype) == "Widget";
+}
+
+void CollectPageWidgetAnnots(const CPDF_Dictionary* page_dict,
+ std::set<const CPDF_Dictionary*>* widget_annots) {
+ RetainPtr<const CPDF_Array> annots = page_dict->GetArrayFor(kAnnots);
+ if (!annots) {
+ return;
+ }
+
+ for (size_t i = 0; i < annots->size(); ++i) {
+ RetainPtr<const CPDF_Dictionary> annot = annots->GetDictAt(i);
+ if (IsWidgetAnnot(annot.Get())) {
+ widget_annots->insert(annot.Get());
+ }
+ }
+}
+
+void RemoveWidgetsReferencedByOtherPages(
+ CPDF_Document* document,
+ const CPDF_Dictionary* current_page_dict,
+ std::set<const CPDF_Dictionary*>* widget_annots) {
+ for (int i = 0, page_count = document->GetPageCount();
+ i < page_count && !widget_annots->empty(); ++i) {
+ RetainPtr<const CPDF_Dictionary> page_dict = document->GetPageDictionary(i);
+ if (!page_dict || page_dict.Get() == current_page_dict) {
+ continue;
+ }
+
+ std::set<const CPDF_Dictionary*> other_page_widget_annots;
+ CollectPageWidgetAnnots(page_dict.Get(), &other_page_widget_annots);
+ for (const CPDF_Dictionary* annot : other_page_widget_annots) {
+ widget_annots->erase(annot);
+ }
+ }
+}
+
+bool PruneFieldArray(CPDF_Array* fields,
+ const std::set<const CPDF_Dictionary*>& widget_annots,
+ std::set<const CPDF_Dictionary*>* visited_fields,
+ int level);
+
+bool ShouldPruneField(CPDF_Dictionary* field,
+ const std::set<const CPDF_Dictionary*>& widget_annots,
+ std::set<const CPDF_Dictionary*>* visited_fields,
+ int level) {
+ if (level > kMaxRecursion) {
+ return false;
+ }
+
+ if (pdfium::Contains(widget_annots, field)) {
+ return true;
+ }
+
+ RetainPtr<CPDF_Array> kids =
+ field->GetMutableArrayFor(pdfium::form_fields::kKids);
+ if (!kids || !visited_fields->insert(field).second) {
+ return false;
+ }
+
+ return PruneFieldArray(kids.Get(), widget_annots, visited_fields,
+ level + 1) &&
+ kids->IsEmpty();
+}
+
+bool PruneFieldArray(CPDF_Array* fields,
+ const std::set<const CPDF_Dictionary*>& widget_annots,
+ std::set<const CPDF_Dictionary*>* visited_fields,
+ int level) {
+ bool pruned = false;
+ for (size_t i = fields->size(); i > 0; --i) {
+ const size_t field_index = i - 1;
+ RetainPtr<CPDF_Dictionary> field = fields->GetMutableDictAt(field_index);
+ if (field &&
+ ShouldPruneField(field.Get(), widget_annots, visited_fields, level)) {
+ fields->RemoveAt(field_index);
+ pruned = true;
+ }
+ }
+ return pruned;
+}
+
+void RemoveFlattenedFields(CPDF_Document* document,
+ const CPDF_Dictionary* page_dict,
+ int level) {
+ std::set<const CPDF_Dictionary*> widget_annots;
+ CollectPageWidgetAnnots(page_dict, &widget_annots);
+ if (widget_annots.empty()) {
+ return;
+ }
+
+ RemoveWidgetsReferencedByOtherPages(document, page_dict, &widget_annots);
+ if (widget_annots.empty()) {
+ return;
+ }
+
+ RetainPtr<CPDF_Dictionary> root = document->GetMutableRoot();
+ if (!root) {
+ return;
+ }
+
+ RetainPtr<CPDF_Dictionary> acro_form = root->GetMutableDictFor(kAcroForm);
+ if (!acro_form) {
+ return;
+ }
+
+ RetainPtr<CPDF_Array> fields = acro_form->GetMutableArrayFor(kFields);
+ if (!fields) {
+ return;
+ }
+
+ std::set<const CPDF_Dictionary*> visited_fields;
+ PruneFieldArray(fields.Get(), widget_annots, &visited_fields, level);
+ if (fields->IsEmpty() && !acro_form->KeyExist(kXFA)) {
+ root->RemoveFor(kAcroForm);
+ }
+}
+
} // namespace
FPDF_EXPORT int FPDF_CALLCONV FPDFPage_Flatten(FPDF_PAGE page, int nFlag) {
@@ -496,6 +624,7 @@
sFormName.c_str());
pNewXObject->SetDataAndRemoveFilter(sStream.unsigned_span());
}
- pPageDict->RemoveFor("Annots");
+ RemoveFlattenedFields(document, pPageDict.Get(), /*level=*/0);
+ pPageDict->RemoveFor(kAnnots);
return FLATTEN_SUCCESS;
}
diff --git a/fpdfsdk/fpdf_flatten_embeddertest.cpp b/fpdfsdk/fpdf_flatten_embeddertest.cpp
index ec1073f..e11a99b 100644
--- a/fpdfsdk/fpdf_flatten_embeddertest.cpp
+++ b/fpdfsdk/fpdf_flatten_embeddertest.cpp
@@ -134,6 +134,29 @@
EXPECT_TRUE(FPDF_SaveAsCopy(document(), this, 0));
ScopedSavedDoc saved_doc = OpenScopedSavedDocument();
ASSERT_TRUE(saved_doc);
- // TODO(crbug.com/498010830): this should be FORMTYPE_NONE
+ EXPECT_EQ(FORMTYPE_NONE, FPDF_GetFormType(saved_doc.get()));
+}
+
+TEST_F(FPDFFlattenEmbedderTest, FlattenSharedAnnotArrayKeepsAcroForm) {
+ ASSERT_TRUE(OpenDocument("bug_498010830_shared_annots.pdf"));
+ ScopedPage page = LoadScopedPage(0);
+ ASSERT_TRUE(page);
+ EXPECT_EQ(FPDFPage_Flatten(page.get(), FLAT_NORMALDISPLAY), FLATTEN_SUCCESS);
+
+ EXPECT_TRUE(FPDF_SaveAsCopy(document(), this, 0));
+ ScopedSavedDoc saved_doc = OpenScopedSavedDocument();
+ ASSERT_TRUE(saved_doc);
+ EXPECT_EQ(FORMTYPE_ACRO_FORM, FPDF_GetFormType(saved_doc.get()));
+}
+
+TEST_F(FPDFFlattenEmbedderTest, FlattenSharedWidgetAnnotKeepsAcroForm) {
+ ASSERT_TRUE(OpenDocument("bug_498010830_shared_widget.pdf"));
+ ScopedPage page = LoadScopedPage(0);
+ ASSERT_TRUE(page);
+ EXPECT_EQ(FPDFPage_Flatten(page.get(), FLAT_NORMALDISPLAY), FLATTEN_SUCCESS);
+
+ EXPECT_TRUE(FPDF_SaveAsCopy(document(), this, 0));
+ ScopedSavedDoc saved_doc = OpenScopedSavedDocument();
+ ASSERT_TRUE(saved_doc);
EXPECT_EQ(FORMTYPE_ACRO_FORM, FPDF_GetFormType(saved_doc.get()));
}
diff --git a/testing/resources/bug_498010830_shared_annots.in b/testing/resources/bug_498010830_shared_annots.in
new file mode 100644
index 0000000..88bab06
--- /dev/null
+++ b/testing/resources/bug_498010830_shared_annots.in
@@ -0,0 +1,43 @@
+{{header}}
+{{object 1 0}}
+<<
+ /Type /Catalog
+ /Pages 2 0 R
+ /AcroForm << /Fields [ 6 0 R ] >>
+>>
+endobj
+{{object 2 0}}
+<< /Count 2 /Kids [ 3 0 R 4 0 R ] /Type /Pages >>
+endobj
+{{object 3 0}}
+<<
+ /Type /Page
+ /Parent 2 0 R
+ /MediaBox [ 0 0 300 300 ]
+ /Annots 5 0 R
+>>
+endobj
+{{object 4 0}}
+<<
+ /Type /Page
+ /Parent 2 0 R
+ /MediaBox [ 0 0 300 300 ]
+ /Annots 5 0 R
+>>
+endobj
+{{object 5 0}}
+[ 6 0 R ]
+endobj
+{{object 6 0}}
+<<
+ /Type /Annot
+ /FT /Tx
+ /T (Shared Annot Array Widget)
+ /Rect [ 100 100 200 130 ]
+ /Subtype /Widget
+>>
+endobj
+{{xref}}
+{{trailer}}
+{{startxref}}
+%%EOF
diff --git a/testing/resources/bug_498010830_shared_annots.pdf b/testing/resources/bug_498010830_shared_annots.pdf
new file mode 100644
index 0000000..c42010a
--- /dev/null
+++ b/testing/resources/bug_498010830_shared_annots.pdf
@@ -0,0 +1,56 @@
+%PDF-1.7
+% ò¤ô
+1 0 obj
+<<
+ /Type /Catalog
+ /Pages 2 0 R
+ /AcroForm << /Fields [ 6 0 R ] >>
+>>
+endobj
+2 0 obj
+<< /Count 2 /Kids [ 3 0 R 4 0 R ] /Type /Pages >>
+endobj
+3 0 obj
+<<
+ /Type /Page
+ /Parent 2 0 R
+ /MediaBox [ 0 0 300 300 ]
+ /Annots 5 0 R
+>>
+endobj
+4 0 obj
+<<
+ /Type /Page
+ /Parent 2 0 R
+ /MediaBox [ 0 0 300 300 ]
+ /Annots 5 0 R
+>>
+endobj
+5 0 obj
+[ 6 0 R ]
+endobj
+6 0 obj
+<<
+ /Type /Annot
+ /FT /Tx
+ /T (Shared Annot Array Widget)
+ /Rect [ 100 100 200 130 ]
+ /Subtype /Widget
+>>
+endobj
+xref
+0 7
+0000000000 65535 f
+0000000015 00000 n
+0000000104 00000 n
+0000000169 00000 n
+0000000264 00000 n
+0000000359 00000 n
+0000000384 00000 n
+trailer <<
+ /Root 1 0 R
+ /Size 7
+>>
+startxref
+510
+%%EOF
diff --git a/testing/resources/bug_498010830_shared_widget.in b/testing/resources/bug_498010830_shared_widget.in
new file mode 100644
index 0000000..4c32bcc
--- /dev/null
+++ b/testing/resources/bug_498010830_shared_widget.in
@@ -0,0 +1,40 @@
+{{header}}
+{{object 1 0}}
+<<
+ /Type /Catalog
+ /Pages 2 0 R
+ /AcroForm << /Fields [ 5 0 R ] >>
+>>
+endobj
+{{object 2 0}}
+<< /Count 2 /Kids [ 3 0 R 4 0 R ] /Type /Pages >>
+endobj
+{{object 3 0}}
+<<
+ /Type /Page
+ /Parent 2 0 R
+ /MediaBox [ 0 0 300 300 ]
+ /Annots [ 5 0 R ]
+>>
+endobj
+{{object 4 0}}
+<<
+ /Type /Page
+ /Parent 2 0 R
+ /MediaBox [ 0 0 300 300 ]
+ /Annots [ 5 0 R ]
+>>
+endobj
+{{object 5 0}}
+<<
+ /Type /Annot
+ /FT /Tx
+ /T (Shared Widget)
+ /Rect [ 100 100 200 130 ]
+ /Subtype /Widget
+>>
+endobj
+{{xref}}
+{{trailer}}
+{{startxref}}
+%%EOF
diff --git a/testing/resources/bug_498010830_shared_widget.pdf b/testing/resources/bug_498010830_shared_widget.pdf
new file mode 100644
index 0000000..6a17075
--- /dev/null
+++ b/testing/resources/bug_498010830_shared_widget.pdf
@@ -0,0 +1,52 @@
+%PDF-1.7
+% ò¤ô
+1 0 obj
+<<
+ /Type /Catalog
+ /Pages 2 0 R
+ /AcroForm << /Fields [ 5 0 R ] >>
+>>
+endobj
+2 0 obj
+<< /Count 2 /Kids [ 3 0 R 4 0 R ] /Type /Pages >>
+endobj
+3 0 obj
+<<
+ /Type /Page
+ /Parent 2 0 R
+ /MediaBox [ 0 0 300 300 ]
+ /Annots [ 5 0 R ]
+>>
+endobj
+4 0 obj
+<<
+ /Type /Page
+ /Parent 2 0 R
+ /MediaBox [ 0 0 300 300 ]
+ /Annots [ 5 0 R ]
+>>
+endobj
+5 0 obj
+<<
+ /Type /Annot
+ /FT /Tx
+ /T (Shared Widget)
+ /Rect [ 100 100 200 130 ]
+ /Subtype /Widget
+>>
+endobj
+xref
+0 6
+0000000000 65535 f
+0000000015 00000 n
+0000000104 00000 n
+0000000169 00000 n
+0000000268 00000 n
+0000000367 00000 n
+trailer <<
+ /Root 1 0 R
+ /Size 6
+>>
+startxref
+481
+%%EOF
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Remove widget annotations from AcroForm fields when flattening a page, and keep fields that are still referenced by other pages.Please wrap at 72 columns.
std::set<const CPDF_Dictionary*>* widget_annots) {Just return this, instead of using an out-parameter.
void RemoveWidgetsReferencedByOtherPages(It wasn't obvious if this is removing the widgets from a dictionary, or from the set. Since this is a set operation, how about "RemoveSharedWidgetsFromSet"?
widget_annots->erase(annot);Use std::set_difference() instead?
bool ShouldPruneField(CPDF_Dictionary* field,Intuitive, this name sounds like it is trying to answer a question, so it should be logically const. But it is not. Is there a way to do the decision making, and then prune?
<<Please use testing/resources/344775293.in as an example and format the .in file in the same manner. Then regenerate the .pdf.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Thanks for the review. I addressed the comments and uploaded a new patch set.
Could you please take another look when you have time?
Remove widget annotations from AcroForm fields when flattening a page, and keep fields that are still referenced by other pages.Please wrap at 72 columns.
Done
Just return this, instead of using an out-parameter.
Done
It wasn't obvious if this is removing the widgets from a dictionary, or from the set. Since this is a set operation, how about "RemoveSharedWidgetsFromSet"?
Done. Renamed it to RemoveSharedWidgetsFromSet().
widget_annots->erase(annot);KukaUse std::set_difference() instead?
Done
Intuitive, this name sounds like it is trying to answer a question, so it should be logically const. But it is not. Is there a way to do the decision making, and then prune?
Done. I split the decision from the pruning side effect. The old
ShouldPruneField() helper is gone now. PruneFieldArray() first decides whether
the current field should be removed, and only then removes it from the array.
Please use testing/resources/344775293.in as an example and format the .in file in the same manner. Then regenerate the .pdf.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |