想要获取一个pdf文件的页数,有多种实现方式。可以利用pdfjs,也可以利用PDFDocument:
// 方法一:利用文件的arrayBuffer
let arrayBuffer = await file.arrayBuffer();
const pdfDoc = await PDFDocument.load(arrayBuffer, { ignoreEncryption: true });
const pageCount = pdfDoc.getPageCount();
// 方法二:利用文件生成blob对象
const pdfObj = await PDFDocument.load(blob, { ignoreEncryption: true });
const pageCount = pdfObj.getPageCount();
// 方法三:利用文件的在线预览地址
const byteString = atob(file.base64.split(',')[1]);
const mimeString = file.base64.split(',')[0].split(':')[1].split(';')[0];
const ab = new ArrayBuffer(byteString.length);
const ia = new Uint8Array(ab);
for (let i = 0; i < byteString.length; i++) {
ia[i] = byteString.charCodeAt(i);
}
const loadingTask = pdfjsLib.getDocument({
data: ab,
password: file.code || '',
});
const pdfDocument = await loadingTask.promise;
const pageCount = pdfDocument.numPages;
如果是加密文件,想要使用pdfjs方式获取文件页数,需要提前给用户弹出密码框进行密码输入及验证,如何进行密码验证呢
(office文件暂时没有密码正确性验证的方式,因此只能校验是否是加密文件,而pdf文件可以校验出是否其他权限有无加密)
import { PDFDocument } from 'pdf-lib';
import '@/assets/pdfjs/build/pdf.js'
import '@/assets/pdfjs/build/pdf.worker.js'
import * as XLSX from 'xlsx';
import JSZip from 'jszip';
import { gaEvent } from '@/utils/gtag'
// 判断是否已加密
const checkIfFileEncrypted = async (file: any, isCloud: boolean) => {
try {
let arrayBuffer;
if (isCloud) {
const link = await getAccessLink(file.cloudId || file.file_id, file.name || file.file_name);
const response = await fetch(link);
if (!response.ok) throw new Error('Failed to fetch file');
arrayBuffer = await response.arrayBuffer();
} else {
if(!file.base64) {
arrayBuffer = await file.arrayBuffer();
} else {
const byteString = atob(file.base64.split(',')[1]);
const mimeString = file.base64.split(',')[0].split(':')[1].split(';')[0];
const ab = new ArrayBuffer(byteString.length);
const ia = new Uint8Array(ab);
for (let i = 0; i < byteString.length; i++) {
ia[i] = byteString.charCodeAt(i);
}
// 创建 Blob
const blob = new Blob([ab], { type: mimeString });
const fileObj = blobToFile(blob, file.name);
arrayBuffer = await fileObj.arrayBuffer();
}
}
if (file.type === 'application/pdf' || file.file_type === 'pdf' || file.name.endsWith('.pdf')) {
// PDF 文件的加密检查
const result = await checkPDFFileEncryption(arrayBuffer);
return result.isEncrypted;
} else if (
file.type === 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' ||
file.type === 'application/msword' ||
file.file_type === 'docx' ||
file.file_type === 'doc'
) {
// Word 文件的加密检查
return checkWordFileEncryption(arrayBuffer);
} else if (
file.type === 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' ||
file.type === 'application/vnd.ms-excel' ||
file.file_type === 'xlsx' ||
file.file_type === 'xls'
) {
// Excel 文件的加密检查
return checkExcelFileEncryption(arrayBuffer);
} else if (
file.type === 'application/vnd.openxmlformats-officedocument.presentationml.presentation' ||
file.type === 'application/vnd.ms-powerpoint' ||
file.file_type === 'pptx' ||
file.file_type === 'ppt'
) {
// PPT 文件的加密检查
return checkPPTFileEncryption(arrayBuffer);
} else {
return false; // 默认不加密
}
} catch (error) {
return false;
}
};
// PDF 文件加密检测
const checkPDFFileEncryption = async (arrayBuffer: any) => {
try {
const loadingTask = pdfjsLib.getDocument({ data: arrayBuffer });
const pdfDoc = await loadingTask.promise;
const permissions = await pdfDoc.getPermissions();
if (permissions) {
return {
isEncrypted: false,
hasOwnerPassword: true
};
}
return {
isEncrypted: false,
hasOwnerPassword: false
};
} catch (error: any) {
if (error.name === 'PasswordException') {
return {
isEncrypted: true,
hasOwnerPassword: false
};
}
return {
isEncrypted: false,
hasOwnerPassword: false
};
}
};
// 验证 PDF 密码的函数
const verifyPDFPassword = async (file: any, password: string, isCode: boolean) => {
try {
let arrayBuffer;
if (isCode) { // 云盘文件
const link = await getAccessLink(file.cloudId || file.file_id, file.name || file.file_name);
const response = await fetch(link);
if (!response.ok) throw new Error('Failed to fetch file');
arrayBuffer = await response.arrayBuffer();
} else {
arrayBuffer = await file.arrayBuffer();
}
const loadingTask = pdfjsLib.getDocument({
data: arrayBuffer,
password: password,
});
await loadingTask.promise;
return true; // 密码正确
} catch (error: any) {
if (error.name === 'PasswordException') {
return false; // 密码错误
}
return false; // 其他错误
}
};
// 提示用户输入密码的函数
const promptUserForPassword = async (file: any) => {
return new Promise((resolve) => {
EventBus.$emit('show-password-prompt', file, (action: any, password: string) => {
resolve({ action, password });
});
});
};
// Word 文件加密检测
const checkWordFileEncryption = async (arrayBuffer: any) => {
try {
const zip = await JSZip.loadAsync(arrayBuffer);
if (zip.files['word/document.xml']) {
return false; // 文件解压成功,说明未加密
}
} catch (error: any) {
if (
error.message.includes('End of data reached') ||
error.message.includes('Corrupted zip') ||
error.message.includes('Can\'t find end of central directory')
) {
return true; // 文件加密或损坏
}
}
return false;
};
// Excel 文件加密检测
const checkExcelFileEncryption = async (arrayBuffer: any) => {
try {
const workbook = XLSX.read(arrayBuffer, { type: 'array' });
if (workbook.SheetNames.length > 0) {
return false; // 文件解压成功,说明未加密
}
} catch (error: any) {
if (
error.message.includes('Encrypted') ||
error.message.includes('File is password-protected')
) {
return true; // 文件加密
}
}
return false;
};
// PPT 文件加密检测
const checkPPTFileEncryption = async (arrayBuffer: any) => {
try {
const zip = await JSZip.loadAsync(arrayBuffer);
if (zip.files['ppt/presentation.xml']) {
return false; // 文件解压成功,说明未加密
}
} catch (error: any) {
if (
error.message.includes('Encrypted') ||
error.message.includes('Can\'t find end of central directory')
) {
return true; // 文件加密
}
}
return false;
};
通过以上方式即可成功拿到进行加密校验,下面是word、PPT、Excel格式文件获取文件页数的方式:
// 获取PDF文件页数
const getPDFPageCount = async (arrayBuffer, password) => {
const loadingTask = pdfjsLib.getDocument({ data: arrayBuffer, password });
const pdfDocument = await loadingTask.promise;
return pdfDocument.numPages;
};
// 获取Word文件页数
const getWordPageCount = async (arrayBuffer) => {
try {
const zip = await JSZip.loadAsync(arrayBuffer);
if (zip.files['word/document.xml']) {
// 解析 document.xml 获取页数(此处仅为示例,具体解析需要更复杂的处理)
return 1; // Word文件页数暂时返回1
}
} catch (error) {
console.error('Error processing Word file:', error);
}
return 1;
};
// 获取Excel文件页数
const getExcelPageCount = async (arrayBuffer) => {
try {
const workbook = XLSX.read(arrayBuffer, { type: 'array' });
// 以工作表的数量作为页数
return workbook.SheetNames.length;
} catch (error) {
console.error('Error processing Excel file:', error);
}
return 1;
};
// 获取PPT文件页数
const getPPTPageCount = async (arrayBuffer) => {
try {
const zip = await JSZip.loadAsync(arrayBuffer);
if (zip.files['ppt/slides/']) {
// 以幻灯片数量作为页数
return Object.keys(zip.files['ppt/slides/']).length;
}
} catch (error) {
console.error('Error processing PPT file:', error);
}
return 1;
};