How do I detect broken links and dead object references in my PDF files?

32 views
Skip to first unread message

Support

unread,
Nov 13, 2009, 7:57:00 PM11/13/09
to PDFTron PDFNet SDK
Q: I am processing PDF documents using PDFNet SDK (i.e. editing pages,
cloning pages, and copying annotations). The processing works fine,
but the size of generated file is larger than the input size.

Based on some discussion in this forum I suspect that the increase in
file size is due to broken links (dead object references - such as
bookmarks or annotations pointing to deleted pages) .

How do I detect broken links and dead object references in my PDF
files?

-------------------
A: You could use the following utility function to detect broken
links and dead object references in PDF:

http://groups.google.com/group/pdfnet-sdk/web/DetectBrokenLinks.zip


using System;
using pdftron;
using pdftron.Common;
using pdftron.Filters;
using pdftron.SDF;
using pdftron.PDF;

namespace SDFTestCS
{
/// <summary>
/// Sample code showing how detect 'dead references' to pages that
are
/// no longer in use.
/// </summary>
class SDFTest
{
static bool IsPage(Obj o)
{
if (!o.IsDict()) return false;
Obj t = o.FindObj("Type");
if (t == null) return false;
return t.IsName() && t.GetName()=="Page";
}

static bool IsDeadReference(Obj o, PDFDoc d)
{
for (PageIterator i = d.GetPageIterator(); i.HasNext(); i.Next())
{
if (o == i.Current().GetSDFObj()) return false;
}

return true;
}

static void CheckReferences(Obj obj, PDFDoc doc)
{
if (!obj.IsContainer()) return;

if (obj.IsStream() || obj.IsDict())
{
DictIterator itr = obj.GetDictIterator();
for (; itr.HasNext(); itr.Next())
{
Obj val = itr.Value();
if (val.IsIndirect())
{
if (IsPage(val) && IsDeadReference(val, doc))
{
Console.WriteLine("Dead reference: {0} in {1}\n", val.GetObjNum
(), obj.GetObjNum());
}
}
else
{
CheckReferences(val, doc);
}
}
}
else if (obj.IsArray())
{
int sz = obj.Size();
for (int i=0; i<sz; ++i)
{
Obj val =obj.GetAt(i);
if (val.IsIndirect())
{
if (IsPage(val) && IsDeadReference(val, doc))
{
Console.WriteLine("Dead reference: {0} in {1}\n", val.GetObjNum
(), obj.GetObjNum());
}
}
else
{
CheckReferences(val, doc);
}
}

}
}

static void Main(string[] args)
{
try
{
PDFNet.Initialize();

PDFDoc doc = new PDFDoc(@"Transformed.pdf");
doc.InitSecurityHandler();

SDFDoc cos_doc = doc.GetSDFDoc();
int num_objs = cos_doc.XRefSize();

for (int i=1; i<num_objs; ++i)
{
Obj obj = cos_doc.GetObj(i);

if (obj!=null && !obj.IsFree())
{
CheckReferences(obj, doc);
}
}

doc.Close();
Console.WriteLine("Done.");
}
catch (PDFNetException e)
{
Console.WriteLine(e.Message);
}
}
}
}
Reply all
Reply to author
Forward
0 new messages