We are getting an OutOfMemory error when Hippo tries to extract the content of a pdf. We do not really need to index that document, is there a way to disable text extraction for particular documents?
[INFO] [talledLocalContainer] 10.02.2015 01:49:00 WARN jackrabbit-pool-3 [LazyTextExtractorField$ParsingTask.run:181] Failed to extract text from a binary property
[INFO] [talledLocalContainer] java.lang.OutOfMemoryError: Java heap space
[INFO] [talledLocalContainer] at java.util.Arrays.copyOf(Arrays.java:2271)
[INFO] [talledLocalContainer] at java.io.ByteArrayOutputStream.toByteArray(ByteArrayOutputStream.java:178)
[INFO] [talledLocalContainer] at org.apache.pdfbox.filter.FlateFilter.decode(FlateFilter.java:102)
[INFO] [talledLocalContainer] at org.apache.pdfbox.cos.COSStream.doDecode(COSStream.java:279)
[INFO] [talledLocalContainer] at org.apache.pdfbox.cos.COSStream.doDecode(COSStream.java:221)
[INFO] [talledLocalContainer] at org.apache.pdfbox.cos.COSStream.getUnfilteredStream(COSStream.java:156)
[INFO] [talledLocalContainer] at org.apache.pdfbox.pdfparser.PDFStreamParser.<init>(PDFStreamParser.java:108)
[INFO] [talledLocalContainer] at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:253)
[INFO] [talledLocalContainer] at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:237)
[INFO] [talledLocalContainer] at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:217)
[INFO] [talledLocalContainer] at org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:448)
[INFO] [talledLocalContainer] at org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:372)
[INFO] [talledLocalContainer] at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:328)
[INFO] [talledLocalContainer] at org.apache.tika.parser.pdf.PDF2XHTML.process(PDF2XHTML.java:56)
[INFO] [talledLocalContainer] at org.apache.tika.parser.pdf.PDFParser.parse(PDFParser.java:89)
[INFO] [talledLocalContainer] at org.apache.tika.parser.ParserDecorator.parse(ParserDecorator.java:91)
[INFO] [talledLocalContainer] at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:197)
[INFO] [talledLocalContainer] at org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:135)
[INFO] [talledLocalContainer] at org.apache.jackrabbit.core.query.lucene.JackrabbitParser.parse(JackrabbitParser.java:192)
[INFO] [talledLocalContainer] at org.apache.jackrabbit.core.query.lucene.LazyTextExtractorField$ParsingTask.run(LazyTextExtractorField.java:175)
[INFO] [talledLocalContainer] at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471)
[INFO] [talledLocalContainer] at java.util.concurrent.FutureTask.run(FutureTask.java:262)
[INFO] [talledLocalContainer] at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:178)
[INFO] [talledLocalContainer] at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:292)
[INFO] [talledLocalContainer] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
[INFO] [talledLocalContainer] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
[INFO] [talledLocalContainer] at java.lang.Thread.run(Thread.java:745)