使用以下代码将 PDF 文件写入 HTML 文件格式时...import java.io.BufferedWriter;import java.io.File;import java.io.FileWriter;import java.io.IOException;import java.io.PrintWriter;import java.io.Writer;import javax.xml.parsers.ParserConfigurationException;import org.apache.pdfbox.pdmodel.PDDocument;import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException;import org.fit.pdfdom.PDFDomTree;import org.fit.pdfdom.PDFDomTreeConfig;import org.fit.pdfdom.resource.HtmlResourceHandler;import org.fit.pdfdom.resource.SaveResourceToDirHandler;public class PdfToHtmlConverter { public String pdfToHtmlFileWriter(File file, String outputFilePath, String outputFileName) throws InvalidPasswordException, IOException, ParserConfigurationException { // load the PDF file using PDFBox PDDocument pdf = PDDocument.load(file); PDFDomTreeConfig config = PDFDomTreeConfig.createDefaultConfig(); HtmlResourceHandler fontHandler = new SaveResourceToDirHandler(); config.setFontHandler(fontHandler); HtmlResourceHandler imageHandler = new SaveResourceToDirHandler(); config.setImageHandler(imageHandler); // create the DOM parser PDFDomTree parser = new PDFDomTree(); // parse the file and get the DOM Document String outputFile = outputFilePath + File.separator + outputFileName + ".html"; try (Writer woutput = new PrintWriter(new BufferedWriter(new FileWriter(outputFile)))) { parser.writeText(pdf, woutput); } catch(Exception e) { e.printStackTrace(); } pdf.close(); return outputFile; }}并且build.gradle文件具有以下依赖项列表...
1 回答
胡子哥哥
TA贡献1825条经验 获得超6个赞
请将 jbig2 解码器更新到最新版本,即 3.0.2。由于levigo Solutions GmbH,jbig2 解码器现在是 Apache PDFBox 的一部分。对于 maven,使用这个:
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>jbig2-imageio</artifactId>
<version>3.0.2</version>
</dependency>
或者使用直接下载。
添加回答
举报
0/150
提交
取消
