JAVA实现将PDF转换成word文档
POM.xml
<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <parent> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-parent</artifactId> <!-- <version>3.2.1</version>--> <version>2.3.9.RELEASE</version> <relativePath/> <!-- lookup parent from repository --> </parent> <groupId>com.jack</groupId> <artifactId>jackDemo</artifactId> <version>0.0.1-SNAPSHOT</version> <name>jackDemo</name> <description>jackDemo</description> <properties> <java.version>1.8</java.version> </properties> <dependencies> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-data-mongodb</artifactId> </dependency> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-web</artifactId> </dependency> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-devtools</artifactId> <scope>runtime</scope> <optional>true</optional> </dependency> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-test</artifactId> <scope>test</scope> </dependency> <dependency> <groupId>com.alibaba</groupId> <artifactId>fastjson</artifactId> <version>1.2.75</version> </dependency> <dependency> <groupId>org.openpnp</groupId> <artifactId>opencv</artifactId> <version>4.5.3-4</version> </dependency> <!-- Apache POI for Excel files --> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>5.2.3</version> <!-- 请检查并使用最新版本 --> </dependency> <!-- Apache POI dependencies (these may be included automatically by Maven, but it's good to be explicit) --> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>5.2.3</version> <!-- 与poi-ooxml版本保持一致 --> </dependency> <!-- Apache Commons Collections (required by POI) --> <dependency> <groupId>org.apache.commons</groupId> <artifactId>commons-collections4</artifactId> <version>4.4</version> <!-- 确保版本与你的项目兼容 --> </dependency> <!-- Apache Commons IO (optional, but useful for file handling) --> <dependency> <groupId>commons-io</groupId> <artifactId>commons-io</artifactId> <version>2.11.0</version> <!-- 确保版本与你的项目兼容 --> </dependency> <!-- PDFBox for reading PDF files --> <dependency> <groupId>org.apache.pdfbox</groupId> <artifactId>pdfbox</artifactId> <version>2.0.24</version> </dependency> <!-- docx4j for creating Word documents --> <dependency> <groupId>org.docx4j</groupId> <artifactId>docx4j</artifactId> <version>3.2.1</version> </dependency> <dependency> <groupId>javax.xml.bind</groupId> <artifactId>jaxb-api</artifactId> <version>2.3.1</version> </dependency> <dependency> <groupId>org.glassfish.jaxb</groupId> <artifactId>jaxb-runtime</artifactId> <version>2.3.1</version> </dependency> </dependencies> <build> <plugins> <plugin> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-maven-plugin</artifactId> </plugin> </plugins> </build> </project>
java 文件:
import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.text.PDFTextStripper; import org.docx4j.openpackaging.packages.WordprocessingMLPackage; import org.docx4j.openpackaging.parts.WordprocessingML.MainDocumentPart; import org.docx4j.wml.Body; import org.docx4j.wml.P; import org.docx4j.wml.R; import org.docx4j.wml.Text; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; public class PdfToWordConverter { public static void main(String[] args) throws Exception{ String pdfFilePath = "D:\\word\\何以为父影响彼此一生的父子关系.pdf"; // 替换为你的PDF文件路径 String wordFilePath = "D:\\word\\何以为父影响彼此一生的父子关系.docx"; // 生成的Word文件路径 try { // 读取PDF文件内容 String pdfText = extractTextFromPdf(pdfFilePath); // 将内容写入Word文档 createWordDocument(wordFilePath, pdfText); System.out.println("PDF to Word conversion completed successfully!"); } catch (IOException e) { e.printStackTrace(); } } public static String extractTextFromPdf(String filePath) throws IOException { PDDocument document = PDDocument.load(new FileInputStream(filePath)); PDFTextStripper pdfStripper = new PDFTextStripper(); return pdfStripper.getText(document); } public static void createWordDocument(String filePath, String content) throws Exception { WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.createPackage(); MainDocumentPart mainDocumentPart = wordMLPackage.getMainDocumentPart(); Body body = mainDocumentPart.getContents().getBody(); // 将内容按段落分割并添加到Word文档中 String[] paragraphs = content.split("\\r?\\n"); for (String paragraph : paragraphs) { P p = new P(); R r = new R(); Text text = new Text(); text.setParent(paragraph); r.getContent().add(text); p.getContent().add(r); body.getContent().add(p); } // 保存Word文档 try (FileOutputStream out = new FileOutputStream(new File(filePath))) { wordMLPackage.save(out); } } }