vue 识别word表格中的图片
但是识别不出 .emf .tif
tif是不显示 emf这是不识别
<template>
<div class="container">
<h2>提取 Word 表格中的图片</h2>
<input type="file" @change="handleFileUpload" accept=".docx">
<div v-if="tables.length">
<h3>解析结果:</h3>
<div v-for="(table, index) in tables" :key="index">
<table border="1">
<tbody>
<tr v-for="(row, rowIndex) in table" :key="rowIndex">
<td v-for="(cell, cellIndex) in row" :key="cellIndex" v-html="cell"></td>
</tr>
</tbody>
</table>
</div>
</div>
</div>
</template>
<script>
import JSZip from "jszip";
export default {
data() {
return {
tables: []
};
},
methods: {
async handleFileUpload(event) {
const file = event.target.files[0];
if (!file) return alert("请选择一个 Word 文件");
const zip = await JSZip.loadAsync(file);
console.log("ZIP 文件结构:", Object.keys(zip.files)); // 调试 ZIP 内的文件结构
const documentXml = await this.readXmlFile(zip, "word/document.xml");
const relsXml = await this.readXmlFile(zip, "word/_rels/document.xml.rels");
if (!documentXml || !relsXml) {
alert("无法解析 Word 文件");
return;
}
const parser = new DOMParser();
const docXml = parser.parseFromString(documentXml, "application/xml");
const relsXmlDoc = parser.parseFromString(relsXml, "application/xml");
const tables = docXml.getElementsByTagName("w:tbl");
if (!tables.length) {
alert("未找到 Word 表格");
return;
}
let extractedTables = [];
for (let i = 0; i < tables.length; i++) {
let tableArray = [];
const rows = tables[i].getElementsByTagName("w:tr");
for (let row of rows) {
let rowArray = [];
const cells = row.getElementsByTagName("w:tc");
for (let cell of cells) {
let cellContent = this.extractText(cell);
// 查找单元格内的图片
const images = await this.extractImages(cell, relsXmlDoc, zip);
if (images.length) {
cellContent += images.map(imgSrc => `<img src="${imgSrc}" style="max-width:100px;">`).join("<br>");
}
rowArray.push(cellContent);
}
tableArray.push(rowArray);
}
extractedTables.push(tableArray);
}
this.tables = extractedTables;
},
async readXmlFile(zip, filePath) {
const file = zip.file(filePath);
return file ? await file.async("string") : null;
},
extractText(cell) {
const paragraphs = cell.getElementsByTagName("w:p");
let text = "";
for (let paragraph of paragraphs) {
const runs = paragraph.getElementsByTagName("w:r");
for (let run of runs) {
const texts = run.getElementsByTagName("w:t");
for (let textNode of texts) {
text += textNode.textContent + " ";
}
}
}
return text.trim();
},
async extractImages(cell, relsXmlDoc, zip) {
let imageSources = [];
const drawings = cell.getElementsByTagName("w:drawing");
const picts = cell.getElementsByTagName("w:pict"); // 兼容旧格式
const imagesToProcess = [];
// 处理 w:drawing 图片
for (let drawing of drawings) {
const blips = drawing.getElementsByTagName("a:blip");
for (let blip of blips) {
const embed = blip.getAttribute("r:embed");
if (embed) imagesToProcess.push(embed);
}
}
// 处理 w:pict 图片
for (let pict of picts) {
const imgs = pict.getElementsByTagName("v:imagedata");
for (let img of imgs) {
const embed = img.getAttribute("r:id");
if (embed) imagesToProcess.push(embed);
}
}
console.log("提取到的图片 ID:", imagesToProcess); // 调试
const imagePromises = imagesToProcess.map(async (embed) => {
const rel = relsXmlDoc.querySelector(`Relationship[Id="${embed}"]`);
if (rel) {
const target = rel.getAttribute("Target");
let imagePath = `word/${target}`;
console.log("图片路径:", imagePath); // 调试
if (zip.file(imagePath)) {
return await zip.file(imagePath).async("base64").then(base64 => `data:image/png;base64,${base64}`);
}
}
});
return Promise.all(imagePromises);
}
}
};
</script>
<style scoped>
.container {
max-width: 600px;
margin: auto;
text-align: center;
}
table {
width: 100%;
margin-top: 10px;
border-collapse: collapse;
}
td {
padding: 5px;
text-align: center;
}
</style>