Class PDF2TextConverter.PatchedPDFTextStripper
java.lang.Object
org.apache.pdfbox.contentstream.PDFStreamEngine
org.apache.pdfbox.text.PDFTextStripper
org.nuxeo.ecm.core.convert.plugins.text.extractors.PDF2TextConverter.PatchedPDFTextStripper
- Enclosing class:
- PDF2TextConverter
public static class PDF2TextConverter.PatchedPDFTextStripper
extends org.apache.pdfbox.text.PDFTextStripper
-
Field Summary
Fields inherited from class org.apache.pdfbox.text.PDFTextStripper
charactersByArticle, document, LINE_SEPARATOR, output
-
Constructor Summary
-
Method Summary
Modifier and TypeMethodDescriptionprotected float
computeFontHeight
(org.apache.pdfbox.pdmodel.font.PDFont arg0) protected void
operatorException
(org.apache.pdfbox.contentstream.operator.Operator operator, List<org.apache.pdfbox.cos.COSBase> operands, IOException e) protected void
showGlyph
(org.apache.pdfbox.util.Matrix arg0, org.apache.pdfbox.pdmodel.font.PDFont arg1, int arg2, String arg3, org.apache.pdfbox.util.Vector arg4) Methods inherited from class org.apache.pdfbox.text.PDFTextStripper
endArticle, endDocument, endPage, getAddMoreFormatting, getArticleEnd, getArticleStart, getAverageCharTolerance, getCharactersByArticle, getCurrentPageNo, getDropThreshold, getEndBookmark, getEndPage, getIndentThreshold, getLineSeparator, getListItemPatterns, getOutput, getPageEnd, getPageStart, getParagraphEnd, getParagraphStart, getSeparateByBeads, getSortByPosition, getSpacingTolerance, getStartBookmark, getStartPage, getSuppressDuplicateOverlappingText, getText, getWordSeparator, matchPattern, processPage, processPages, processTextPosition, setAddMoreFormatting, setArticleEnd, setArticleStart, setAverageCharTolerance, setDropThreshold, setEndBookmark, setEndPage, setIndentThreshold, setLineSeparator, setListItemPatterns, setPageEnd, setPageStart, setParagraphEnd, setParagraphStart, setShouldSeparateByBeads, setSortByPosition, setSpacingTolerance, setStartBookmark, setStartPage, setSuppressDuplicateOverlappingText, setWordSeparator, startArticle, startArticle, startDocument, startPage, writeCharacters, writeLineSeparator, writePage, writePageEnd, writePageStart, writeParagraphEnd, writeParagraphSeparator, writeParagraphStart, writeString, writeString, writeText, writeWordSeparator
Methods inherited from class org.apache.pdfbox.contentstream.PDFStreamEngine
addOperator, applyTextAdjustment, beginMarkedContentSequence, beginText, decreaseLevel, endMarkedContentSequence, endText, getAppearance, getCurrentPage, getGraphicsStackSize, getGraphicsState, getInitialMatrix, getLevel, getResources, getTextLineMatrix, getTextMatrix, increaseLevel, processAnnotation, processChildStream, processOperator, processOperator, processSoftMask, processTilingPattern, processTilingPattern, processTransparencyGroup, processType3Stream, registerOperatorProcessor, restoreGraphicsStack, restoreGraphicsState, saveGraphicsStack, saveGraphicsState, setLineDashPattern, setTextLineMatrix, setTextMatrix, showAnnotation, showFontGlyph, showFontGlyph, showForm, showGlyph, showText, showTextString, showTextStrings, showTransparencyGroup, showType3Glyph, showType3Glyph, transformedPoint, transformWidth, unsupportedOperator
-
Constructor Details
-
PatchedPDFTextStripper
- Throws:
IOException
-
-
Method Details
-
operatorException
protected void operatorException(org.apache.pdfbox.contentstream.operator.Operator operator, List<org.apache.pdfbox.cos.COSBase> operands, IOException e) throws IOException - Overrides:
operatorException
in classorg.apache.pdfbox.contentstream.PDFStreamEngine
- Throws:
IOException
-
showGlyph
protected void showGlyph(org.apache.pdfbox.util.Matrix arg0, org.apache.pdfbox.pdmodel.font.PDFont arg1, int arg2, String arg3, org.apache.pdfbox.util.Vector arg4) throws IOException - Overrides:
showGlyph
in classorg.apache.pdfbox.contentstream.PDFStreamEngine
- Throws:
IOException
-
computeFontHeight
- Throws:
IOException
-