Searching with a regular expression

101 views
Skip to first unread message

Matt Parizeau

unread,
Aug 28, 2014, 2:28:41 PM8/28/14
to pdfnet-w...@googlegroups.com
Q:

Is it possible to search through a document using a regular expression?

A:

You could use the LoadPageText and GetTextPosition functions of the Document object to implement regex searching. The text position sample included in the WebViewer download package is similar to what you want, as it manually searches through the page text and then adds highlight annotations over top. Here is a slightly modified version of the sample to do some basic regex searching, hooking up the search function to the print button as an example and always searching on the current page:

var pageNum;
var regex = /web|viewer/gi;

$
(document).on('documentLoaded', function() {
   
var doc = readerControl.docViewer.getDocument();

    $
('#printButton').off('click').on('click', function() {
        pageNum
= readerControl.docViewer.getCurrentPage() - 1;

       
// the text is cached internally but you could save it to a variable if you want
        doc
.loadPageText(pageNum, function(text) {
           
var match = regex.exec(text);

           
if (!match) {
                alert
("end of page");
           
} else {
                doc
.getTextPosition(pageNum, match.index, match.index + match[0].length, highlightText);
           
}
       
});
   
});
});

function highlightText(quads) {
   
var docViewer = readerControl.docViewer;

   
var firstChar = quads[0];
   
var lastChar = quads[quads.length - 1];

   
/* point locations
    x1 ---- x2
    |       |
    |       |
    x4 ---- x3
    */


   
// center the selection coordinates to make it more precise
   
var firstx = (firstChar.x1 + firstChar.x2) / 2;
   
var finalx = (lastChar.x3 + lastChar.x4) / 2;
   
var y = (firstChar.y1 + firstChar.y4) / 2;

   
// assume that all the characters are aligned vertically
   
// select from the top left of the first char to the bottom right of the last char
   
var topLeft = { x: firstx, y: y, pageIndex: pageNum };
   
var bottomRight = { x: finalx, y: y, pageIndex: pageNum };

   
var textSelectTool = new Tools.TextSelectTool(docViewer);
    textSelectTool
.select(topLeft, bottomRight);
}

Reply all
Reply to author
Forward
0 new messages