读取word文档,并将其中的内容按原来的样式输出,望各位大神赐教!!!
发布网友
发布时间:2022-05-11 02:48
我来回答
共2个回答
热心网友
时间:2024-02-08 05:57
package com;
import static org.junit.Assert.assertEquals;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Enumeration;
import java.util.zip.ZipEntry;
import java.util.zip.ZipException;
import java.util.zip.ZipFile;
import java.util.zip.ZipOutputStream;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.junit.Test;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.xml.sax.SAXException;
public class InputToword {
@Test public void modifyDocumentAndSave()throws IOException, ZipException,
SAXException,ParserConfigurationException,TransformerException,TransformerConfigurationException
{
//读取e盘下的hello.docx文档
ZipFile docxFile =new ZipFile(new File("e:\\hello.docx"));
//解压缩后获得里面和内容相关的xml,word文档是可以解压的,大家可以解压了试试
ZipEntry documentXML =docxFile.getEntry("word/document.xml");
InputStream documentXMLIS =docxFile.getInputStream(documentXML);
DocumentBuilderFactory dbf =DocumentBuilderFactory.newInstance();
Document doc =dbf.newDocumentBuilder().parse(documentXMLIS);
//获得文档里相关的节点
Element docElement = doc.getDocumentElement();
assertEquals("w:document", docElement.getTagName());
Element bodyElement = (Element)docElement.getElementsByTagName("w:body").item(0);
assertEquals("w:body", bodyElement.getTagName());
Element pElement = (Element)bodyElement.getElementsByTagName("w:p").item(0);
assertEquals("w:p", pElement.getTagName());
Element rElement = (Element)pElement.getElementsByTagName("w:r").item(0);
assertEquals("w:r", rElement.getTagName());
Element tElement = (Element)rElement.getElementsByTagName("w:t").item(0);
assertEquals("w:t", tElement.getTagName());
//查找文档中的Hello, from Office 2007!文字部分
assertEquals("Hello, from Office 2007!",tElement.getTextContent());
//写入新的内容
tElement.setTextContent("哈哈,终于可以用java写word了,Hello, Office 2007, from Java6!");
Transformer t =TransformerFactory.newInstance().newTransformer();
ByteArrayOutputStream baos =new ByteArrayOutputStream();
t.transform(new DOMSource(doc),
new StreamResult(baos));
//创建新的要输出的word文档,按钮原来word文档的内容写入新的文档中。
ZipOutputStream docxOutFile = new ZipOutputStream(new FileOutputStream("e:\\response.docx"));
Enumeration entriesIter =docxFile.entries();
while (entriesIter.hasMoreElements())
{
ZipEntry entry = (ZipEntry) entriesIter.nextElement();
if (entry.getName().equals("word/document.xml"))
{
byte[] data = baos.toByteArray();
docxOutFile.putNextEntry(new ZipEntry(entry.getName()));
docxOutFile.write(data, 0, data.length);
docxOutFile.closeEntry();
}
else
{
InputStream incoming =docxFile.getInputStream(entry);
byte[] data = new byte[1024 * 16];
int readCount =incoming.read(data, 0, data.length);
docxOutFile.putNextEntry(new ZipEntry(entry.getName()));
docxOutFile.write(data, 0, readCount);
docxOutFile.closeEntry();
}
}
docxOutFile.close();
}
}
追问这个方法不是将,word内容读出的方法,不过还是谢谢你!
追答OI 解析word内容,但是没法解析格式。
读出来你要自己控制格式。
试试 直接用文件流读word,然后设置 response ContentType,以html输出。
热心网友
时间:2024-02-08 05:58
能不能先转换一下,成PDF保证格式不变,然后上传?
其他情况想保证格式不变,比较难实现。
静等大神。