How do I detect broken links and dead object references in my PDF files?

瀏覽次數:34 次
跳到第一則未讀訊息

Support

未讀,
2009年11月13日 晚上7:57:002009/11/13
收件者:PDFTron PDFNet SDK
Q: I am processing PDF documents using PDFNet SDK (i.e. editing pages,
cloning pages, and copying annotations). The processing works fine,
but the size of generated file is larger than the input size.

Based on some discussion in this forum I suspect that the increase in
file size is due to broken links (dead object references - such as
bookmarks or annotations pointing to deleted pages) .

How do I detect broken links and dead object references in my PDF
files?

-------------------
A: You could use the following utility function to detect broken
links and dead object references in PDF:

http://groups.google.com/group/pdfnet-sdk/web/DetectBrokenLinks.zip


using System;
using pdftron;
using pdftron.Common;
using pdftron.Filters;
using pdftron.SDF;
using pdftron.PDF;

namespace SDFTestCS
{
/// <summary>
/// Sample code showing how detect 'dead references' to pages that
are
/// no longer in use.
/// </summary>
class SDFTest
{
static bool IsPage(Obj o)
{
if (!o.IsDict()) return false;
Obj t = o.FindObj("Type");
if (t == null) return false;
return t.IsName() && t.GetName()=="Page";
}

static bool IsDeadReference(Obj o, PDFDoc d)
{
for (PageIterator i = d.GetPageIterator(); i.HasNext(); i.Next())
{
if (o == i.Current().GetSDFObj()) return false;
}

return true;
}

static void CheckReferences(Obj obj, PDFDoc doc)
{
if (!obj.IsContainer()) return;

if (obj.IsStream() || obj.IsDict())
{
DictIterator itr = obj.GetDictIterator();
for (; itr.HasNext(); itr.Next())
{
Obj val = itr.Value();
if (val.IsIndirect())
{
if (IsPage(val) && IsDeadReference(val, doc))
{
Console.WriteLine("Dead reference: {0} in {1}\n", val.GetObjNum
(), obj.GetObjNum());
}
}
else
{
CheckReferences(val, doc);
}
}
}
else if (obj.IsArray())
{
int sz = obj.Size();
for (int i=0; i<sz; ++i)
{
Obj val =obj.GetAt(i);
if (val.IsIndirect())
{
if (IsPage(val) && IsDeadReference(val, doc))
{
Console.WriteLine("Dead reference: {0} in {1}\n", val.GetObjNum
(), obj.GetObjNum());
}
}
else
{
CheckReferences(val, doc);
}
}

}
}

static void Main(string[] args)
{
try
{
PDFNet.Initialize();

PDFDoc doc = new PDFDoc(@"Transformed.pdf");
doc.InitSecurityHandler();

SDFDoc cos_doc = doc.GetSDFDoc();
int num_objs = cos_doc.XRefSize();

for (int i=1; i<num_objs; ++i)
{
Obj obj = cos_doc.GetObj(i);

if (obj!=null && !obj.IsFree())
{
CheckReferences(obj, doc);
}
}

doc.Close();
Console.WriteLine("Done.");
}
catch (PDFNetException e)
{
Console.WriteLine(e.Message);
}
}
}
}
回覆所有人
回覆作者
轉寄
0 則新訊息