一、依赖
<spire.pdf.free.version>9.13.0</spire.pdf.free.version>
<itextpdf.version>5.5.13</itextpdf.version>
<dependency>
<groupId>e-iceblue</groupId>
<artifactId>spire.pdf.free</artifactId>
<version>${spire.pdf.free.version}</version>
</dependency>
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>itextpdf</artifactId>
<version>${itextpdf.version}</version>
</dependency>
二、思路:
①问题:
1、spire.pdf.free只能免费转换每个PDF的前三页
2、转换速度慢
3、多线程合并后页数顺序问题
②解决
1、将PDF文档根据页数截断为多个PDF,每个PDF最多三页
2、使用线程池多线程异步处理
3、map里的索引
三、代码:
1、 转换的类
package com.shiqiao.nev.business.infra.adapter.pdftoPic;
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.pdf.PdfCopy;
import com.itextpdf.text.pdf.PdfImportedPage;
import com.itextpdf.text.pdf.PdfReader;
import com.spire.pdf.PdfDocument;
import com.spire.pdf.graphics.PdfImageType;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
public class PdfToPicConverter {
private final byte[] pdfFileByteStream;
private final Integer pageCount;
private List<InputStream> picResult;
public PdfToPicConverter(InputStream pdfFile) {
try {
pdfFileByteStream = inputStreamToByteArray(pdfFile);
} catch (Exception e) {
throw new RuntimeException("pdf文件转换为字节流失败", e);
}
PdfDocument pdf = new PdfDocument();
pdf.loadFromStream(new ByteArrayInputStream(pdfFileByteStream));
pageCount = pdf.getPages().getCount();
}
public PdfToPicConverter(byte[] pdfFile) {
pdfFileByteStream = pdfFile;
PdfDocument pdf = new PdfDocument();
pdf.loadFromStream(new ByteArrayInputStream(pdfFileByteStream));
pageCount = pdf.getPages().getCount();
}
public List<InputStream> convertToPictures(ExecutorService executorService) {
picResult = new ArrayList<>();
int groupSize = 3;
int numGroups = (pageCount + groupSize - 1) / groupSize;
List<Future<List<InputStream>>> futures = new ArrayList<>();
for (int i = 0; i < numGroups; i++) {
int startPage = i * groupSize;
int endPage = Math.min(startPage + groupSize, pageCount);
if (executorService != null) {
futures.add(executorService.submit(() -> convertGroup(startPage, endPage)));
}
}
for (Future<List<InputStream>> future : futures) {
try {
picResult.addAll(future.get());
} catch (Exception e) {
throw new RuntimeException("图片转换失败", e);
}
}
return picResult;
}
private InputStream splitPDFFile(byte[] pdfFileByteStream, int from, int end) {
Document document = null;
PdfCopy copy = null;
try (ByteArrayInputStream bais = new ByteArrayInputStream(pdfFileByteStream);
ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
PdfReader reader = new PdfReader(bais);
int n = reader.getNumberOfPages();
if (end == 0 || end > n) {
end = n;
}
document = new Document(reader.getPageSize(1));
copy = new PdfCopy(document, baos);
document.open();
for (int j = from + 1; j <= end; j++) {
document.newPage();
PdfImportedPage page = copy.getImportedPage(reader, j);
copy.addPage(page);
}
document.close();
return new ByteArrayInputStream(baos.toByteArray());
} catch (IOException | DocumentException e) {
throw new RuntimeException("PDF文件拆分失败", e);
}
}
private List<InputStream> convertGroup(int startPage, int endPage) {
InputStream inputStream = splitPDFFile(pdfFileByteStream, startPage, endPage);
PdfDocument pdf = new PdfDocument();
pdf.loadFromStream(inputStream);
int end = endPage - startPage;
List<InputStream> groupResult = new ArrayList<>();
for (int i = 0; i < end; i++) {
BufferedImage image = pdf.saveAsImage(i, PdfImageType.Bitmap, 500, 500);
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
ImageIO.write(image, "PNG", baos);
groupResult.add(new ByteArrayInputStream(baos.toByteArray()));
} catch (IOException e) {
throw new RuntimeException("图片保存失败", e);
}
}
pdf.close();
return groupResult;
return new ArrayList<>();
}
protected byte[] inputStreamToByteArray(InputStream inputStream) throws IOException {
int bufferSize = 4096;
byte[] buffer = new byte[bufferSize];
int bytesRead;
ByteArrayOutputStream baos = new ByteArrayOutputStream();
while ((bytesRead = inputStream.read(buffer)) != -1) {
baos.write(buffer, 0, bytesRead);
}
return baos.toByteArray();
}
}
2、使用的地方
public List<FileDTO> pdfToPng(byte[] filebyte, String fileName) {
List<FileDTO> fileDTOS = new ArrayList<>();
PdfToPicConverter pdfToPicConverter = new PdfToPicConverter(filebyte);
List<InputStream> inputStreams = pdfToPicConverter.convertToPictures(pdfCovertPicExectorPool);
inputStreams.forEach((e) -> {
String pgnNames = "fileName"+inputStreams.indexOf(e) + ".png";
FileDTO fileDTO = new FileDTO(e, pgnNames);
fileDTOS.add(fileDTO);
});
return fileDTOS;
}
四、spire.pdf.free 的下载:maven仓库里进去然后点官网就能下jar包了