001/* 002 * Copyright 2011 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package org.ametys.plugins.webcontentio.docx; 018 019import java.io.File; 020import java.io.IOException; 021import java.io.InputStream; 022import java.util.Date; 023import java.util.HashMap; 024import java.util.Map; 025 026import org.apache.avalon.framework.context.ContextException; 027import org.apache.avalon.framework.context.Contextualizable; 028import org.apache.avalon.framework.service.ServiceException; 029import org.apache.avalon.framework.service.ServiceManager; 030import org.apache.avalon.framework.service.Serviceable; 031import org.apache.cocoon.Constants; 032import org.apache.cocoon.environment.Context; 033import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; 034import org.apache.commons.compress.archivers.zip.ZipFile; 035import org.apache.commons.lang.StringUtils; 036import org.apache.excalibur.source.Source; 037import org.apache.excalibur.source.SourceResolver; 038import org.apache.excalibur.xml.dom.DOMParser; 039import org.apache.excalibur.xml.sax.SAXParser; 040import org.apache.excalibur.xml.xpath.PrefixResolver; 041import org.apache.excalibur.xml.xpath.XPathProcessor; 042import org.w3c.dom.Document; 043import org.w3c.dom.NodeList; 044import org.xml.sax.InputSource; 045import org.xml.sax.SAXException; 046import org.xml.sax.helpers.DefaultHandler; 047 048import org.ametys.cms.repository.Content; 049import org.ametys.plugins.repository.metadata.ModifiableBinaryMetadata; 050import org.ametys.plugins.repository.metadata.ModifiableRichText; 051import org.ametys.plugins.webcontentio.ContentImporter; 052import org.ametys.web.repository.content.ModifiableWebContent; 053import org.ametys.web.repository.page.ModifiablePage; 054 055/** 056 * Imports Docx files. 057 */ 058public class DocxContentImporter implements ContentImporter, Serviceable, Contextualizable 059{ 060 private SourceResolver _resolver; 061 private DOMParser _domParser; 062 private SAXParser _saxParser; 063 private XPathProcessor _xPathProcessor; 064 private Context _context; 065 066 @Override 067 public void contextualize(org.apache.avalon.framework.context.Context context) throws ContextException 068 { 069 _context = (Context) context.get(Constants.CONTEXT_ENVIRONMENT_CONTEXT); 070 } 071 072 @Override 073 public void service(ServiceManager manager) throws ServiceException 074 { 075 _resolver = (SourceResolver) manager.lookup(SourceResolver.ROLE); 076 _domParser = (DOMParser) manager.lookup(DOMParser.ROLE); 077 _xPathProcessor = (XPathProcessor) manager.lookup(XPathProcessor.ROLE); 078 _saxParser = (SAXParser) manager.lookup(SAXParser.ROLE); 079 } 080 081 @Override 082 public void importContent(File file, ModifiableWebContent content, Map<String, String> params) throws IOException 083 { 084 ZipFile zipFile = new ZipFile(file); 085 086 String template = null; 087 String longTitle = null; 088 String contentType = null; 089 boolean section; 090 boolean directAccess; 091 boolean footer; 092 boolean event; 093 094 Document document = _getDocument(zipFile, "word/document.xml", file); 095 Document relations = _getDocument(zipFile, "word/_rels/document.xml.rels", file); 096 097 PrefixResolver resolver = new DocxPrefixResolver(); 098 099 template = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:tbl[1]/w:tr[1]/w:sdt/w:sdtPr[w:tag/@w:val='template']/w:comboBox/w:listItem[@w:displayText=../../../w:sdtContent/w:tc/w:p/w:r/w:t]/@w:value", resolver); 100 longTitle = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:tbl[1]/w:tr[2]/w:sdt/w:sdtPr[w:tag/@w:val='long_title']/w:text", resolver); 101 contentType = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:tbl[1]/w:tr[3]/w:sdt/w:sdtPr[w:tag/@w:val='content-type']/w:comboBox/w:listItem[@w:displayText=../../../w:sdtContent/w:tc/w:p/w:r/w:t]/@w:value", resolver); 102 103 // tags 104 String sectionStr = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:tbl[2]/w:tr[1]/w:sdt/w:sdtPr[w:tag/@w:val='SECTION']/w:comboBox/w:listItem[@w:displayText=../../../w:sdtContent/w:tc/w:p/w:r/w:t]/@w:value", resolver); 105 String directAccessStr = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:tbl[2]/w:tr[2]/w:sdt/w:sdtPr[w:tag/@w:val='ACCES_DIRECTS']/w:comboBox/w:listItem[@w:displayText=../../../w:sdtContent/w:tc/w:p/w:r/w:t]/@w:value", resolver); 106 String footerStr = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:tbl[2]/w:tr[3]/w:sdt/w:sdtPr[w:tag/@w:val='FOOTER_LINK']/w:comboBox/w:listItem[@w:displayText=../../../w:sdtContent/w:tc/w:p/w:r/w:t]/@w:value", resolver); 107 String eventStr = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:tbl[2]/w:tr[4]/w:sdt/w:sdtPr[w:tag/@w:val='EVENT']/w:comboBox/w:listItem[@w:displayText=../../../w:sdtContent/w:tc/w:p/w:r/w:t]/@w:value", resolver); 108 109 section = "SECTION".equals(sectionStr); 110 directAccess = "ACCES_DIRECTS".equals(directAccessStr); 111 footer = "FOOTER_LINK".equals(footerStr); 112 event = "EVENT".equals(eventStr); 113 114 // abstract 115 NodeList abstractList = _xPathProcessor.selectNodeList(document, "/w:document/w:body/w:sdt[w:sdtPr/w:tag/@w:val='abstract' and not(w:sdtPr/w:showingPlcHdr)]/w:sdtContent/w:p/w:r/w:t", resolver); 116 StringBuilder abstr = new StringBuilder(); 117 118 for (int i = 0; i < abstractList.getLength(); i++) 119 { 120 if (i != 0) 121 { 122 abstr.append('\n'); 123 } 124 125 abstr.append(abstractList.item(i).getTextContent()); 126 } 127 128 // illustration 129 String pictureId = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:sdt[w:sdtPr/w:tag/@w:val='illustration' and not(w:sdtPr/w:showingPlcHdr)]/w:sdtContent/w:p/w:r/w:drawing/wp:inline/a:graphic/a:graphicData/pic:pic/pic:blipFill/a:blip/@r:embed", resolver); 130 131 if (StringUtils.trimToNull(pictureId) != null) 132 { 133 String pictureEntryName = _xPathProcessor.evaluateAsString(relations, "/rel:Relationships/rel:Relationship[@Id='" + pictureId + "']/@Target", resolver); 134 135 int i = pictureEntryName.lastIndexOf('/'); 136 String fileName = i == -1 ? pictureEntryName : pictureEntryName.substring(i + 1); 137 138 ZipArchiveEntry entry = zipFile.getEntry("word/" + pictureEntryName); 139 140 try (InputStream is = zipFile.getInputStream(entry)) 141 { 142 ModifiableBinaryMetadata illustration = content.getMetadataHolder().getCompositeMetadata("illustration", true).getBinaryMetadata("image", true); 143 illustration.setLastModified(new Date()); 144 illustration.setInputStream(is); 145 illustration.setFilename(fileName); 146 147 String mimeType = _context.getMimeType(fileName); 148 if (mimeType != null) 149 { 150 illustration.setMimeType(mimeType); 151 } 152 } 153 } 154 155 // title 156 NodeList titleList = _xPathProcessor.selectNodeList(document, "/w:document/w:body/w:p[w:pPr/w:pStyle/@w:val='Titre'][1]/w:r/w:t", resolver); 157 StringBuilder titleBuilder = new StringBuilder(); 158 159 for (int j = 0; j < titleList.getLength(); j++) 160 { 161 titleBuilder.append(titleList.item(j).getTextContent()); 162 } 163 164 params.put("page.template", StringUtils.trimToNull(template)); 165 params.put("page.longTitle", StringUtils.trimToNull(longTitle)); 166 167 String title = titleBuilder.toString(); 168 169 content.setTitle(StringUtils.trimToNull(title) != null ? title : content.getName()); 170 String cType = StringUtils.trimToNull(contentType) != null ? contentType : "org.ametys.web.default.Content.article"; 171 content.setTypes(new String[] {cType}); 172 173 if (abstr.length() > 0) 174 { 175 content.getMetadataHolder().setMetadata("abstract", abstr.toString()); 176 } 177 178 if (section) 179 { 180 content.tag("SECTION"); 181 } 182 183 if (directAccess) 184 { 185 content.tag("ACCES_DIRECTS"); 186 } 187 188 if (footer) 189 { 190 content.tag("FOOTER_LINK"); 191 } 192 193 if (event) 194 { 195 content.tag("EVENT"); 196 } 197 198 // actual content 199 Map<String, Object> context = new HashMap<>(); 200 context.put("document", document); 201 context.put("relations", relations); 202 context.put("zipFile", zipFile); 203 context.put("content", content); 204 Source src = _resolver.resolveURI("cocoon:/docx2docbook", null, context); 205 206 try (InputStream is = src.getInputStream()) 207 { 208 ModifiableRichText richText = content.getMetadataHolder().getRichText("content", true); 209 210 richText.setLastModified(new Date()); 211 richText.setMimeType("text/xml"); 212 richText.setInputStream(is); 213 214 try (InputStream in = richText.getInputStream()) 215 { 216 _saxParser.parse(new InputSource(in), new DefaultHandler()); 217 } 218 catch (SAXException e) 219 { 220 throw new IOException("Invalid resulting XML after transformation", e); 221 } 222 } 223 224 ZipFile.closeQuietly(zipFile); 225 } 226 227 @Override 228 public String[] getMimeTypes() 229 { 230 // handles docx mime-type 231 return new String[]{"application/vnd.openxmlformats-officedocument.wordprocessingml.document"}; 232 } 233 234 @Override 235 public void postTreatment(ModifiablePage page, Content content, File file) throws IOException 236 { 237 // Nothing to do 238 } 239 240 private Document _getDocument(ZipFile zipFile, String entryName, File file) throws IOException 241 { 242 ZipArchiveEntry entry = zipFile.getEntry(entryName); 243 244 try (InputStream is = zipFile.getInputStream(entry)) 245 { 246 return _domParser.parseDocument(new InputSource(is)); 247 } 248 catch (SAXException e) 249 { 250 throw new IOException("Unable to read " + entryName + " in file " + file.getAbsolutePath(), e); 251 } 252 } 253 254 private class DocxPrefixResolver implements PrefixResolver 255 { 256 private Map<String, String> _ns = new HashMap<>(); 257 258 public DocxPrefixResolver() 259 { 260 _ns.put("w", "http://schemas.openxmlformats.org/wordprocessingml/2006/main"); 261 _ns.put("r", "http://schemas.openxmlformats.org/officeDocument/2006/relationships"); 262 _ns.put("wp", "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"); 263 _ns.put("a", "http://schemas.openxmlformats.org/drawingml/2006/main"); 264 _ns.put("pic", "http://schemas.openxmlformats.org/drawingml/2006/picture"); 265 _ns.put("rel", "http://schemas.openxmlformats.org/package/2006/relationships"); 266 } 267 268 @Override 269 public String prefixToNamespace(String prefix) 270 { 271 return _ns.get(prefix); 272 } 273 } 274}