001/* 002 * Copyright 2011 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package org.ametys.plugins.webcontentio.docx; 018 019import java.io.File; 020import java.io.IOException; 021import java.io.InputStream; 022import java.util.Date; 023import java.util.HashMap; 024import java.util.Map; 025 026import org.apache.avalon.framework.context.ContextException; 027import org.apache.avalon.framework.context.Contextualizable; 028import org.apache.avalon.framework.service.ServiceException; 029import org.apache.avalon.framework.service.ServiceManager; 030import org.apache.avalon.framework.service.Serviceable; 031import org.apache.cocoon.Constants; 032import org.apache.cocoon.environment.Context; 033import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; 034import org.apache.commons.compress.archivers.zip.ZipFile; 035import org.apache.commons.lang.StringUtils; 036import org.apache.excalibur.source.Source; 037import org.apache.excalibur.source.SourceResolver; 038import org.apache.excalibur.xml.dom.DOMParser; 039import org.apache.excalibur.xml.sax.SAXParser; 040import org.apache.excalibur.xml.xpath.PrefixResolver; 041import org.apache.excalibur.xml.xpath.XPathProcessor; 042import org.w3c.dom.Document; 043import org.w3c.dom.NodeList; 044import org.xml.sax.InputSource; 045import org.xml.sax.SAXException; 046import org.xml.sax.helpers.DefaultHandler; 047 048import org.ametys.cms.repository.Content; 049import org.ametys.plugins.repository.metadata.ModifiableBinaryMetadata; 050import org.ametys.plugins.repository.metadata.ModifiableRichText; 051import org.ametys.plugins.webcontentio.ContentImporter; 052import org.ametys.web.repository.content.ModifiableWebContent; 053import org.ametys.web.repository.page.ModifiablePage; 054 055/** 056 * Imports Docx files. 057 */ 058public class DocxContentImporter implements ContentImporter, Serviceable, Contextualizable 059{ 060 /** The service manager */ 061 protected ServiceManager _manager; 062 063 private SourceResolver _resolver; 064 private DOMParser _domParser; 065 private XPathProcessor _xPathProcessor; 066 private Context _context; 067 068 @Override 069 public void contextualize(org.apache.avalon.framework.context.Context context) throws ContextException 070 { 071 _context = (Context) context.get(Constants.CONTEXT_ENVIRONMENT_CONTEXT); 072 } 073 074 @Override 075 public void service(ServiceManager manager) throws ServiceException 076 { 077 _manager = manager; 078 _resolver = (SourceResolver) manager.lookup(SourceResolver.ROLE); 079 _domParser = (DOMParser) manager.lookup(DOMParser.ROLE); 080 _xPathProcessor = (XPathProcessor) manager.lookup(XPathProcessor.ROLE); 081 } 082 083 @Override 084 public void importContent(File file, ModifiableWebContent content, Map<String, String> params) throws IOException 085 { 086 ZipFile zipFile = new ZipFile(file); 087 088 String template = null; 089 String longTitle = null; 090 String contentType = null; 091 boolean section; 092 boolean directAccess; 093 boolean footer; 094 boolean event; 095 096 Document document = _getDocument(zipFile, "word/document.xml", file); 097 Document relations = _getDocument(zipFile, "word/_rels/document.xml.rels", file); 098 099 PrefixResolver resolver = new DocxPrefixResolver(); 100 101 template = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:tbl[1]/w:tr[1]/w:sdt/w:sdtPr[w:tag/@w:val='template']/w:comboBox/w:listItem[@w:displayText=../../../w:sdtContent/w:tc/w:p/w:r/w:t]/@w:value", resolver); 102 longTitle = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:tbl[1]/w:tr[2]/w:sdt/w:sdtPr[w:tag/@w:val='long_title']/w:text", resolver); 103 contentType = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:tbl[1]/w:tr[3]/w:sdt/w:sdtPr[w:tag/@w:val='content-type']/w:comboBox/w:listItem[@w:displayText=../../../w:sdtContent/w:tc/w:p/w:r/w:t]/@w:value", resolver); 104 105 // tags 106 String sectionStr = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:tbl[2]/w:tr[1]/w:sdt/w:sdtPr[w:tag/@w:val='SECTION']/w:comboBox/w:listItem[@w:displayText=../../../w:sdtContent/w:tc/w:p/w:r/w:t]/@w:value", resolver); 107 String directAccessStr = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:tbl[2]/w:tr[2]/w:sdt/w:sdtPr[w:tag/@w:val='ACCES_DIRECTS']/w:comboBox/w:listItem[@w:displayText=../../../w:sdtContent/w:tc/w:p/w:r/w:t]/@w:value", resolver); 108 String footerStr = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:tbl[2]/w:tr[3]/w:sdt/w:sdtPr[w:tag/@w:val='FOOTER_LINK']/w:comboBox/w:listItem[@w:displayText=../../../w:sdtContent/w:tc/w:p/w:r/w:t]/@w:value", resolver); 109 String eventStr = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:tbl[2]/w:tr[4]/w:sdt/w:sdtPr[w:tag/@w:val='EVENT']/w:comboBox/w:listItem[@w:displayText=../../../w:sdtContent/w:tc/w:p/w:r/w:t]/@w:value", resolver); 110 111 section = "SECTION".equals(sectionStr); 112 directAccess = "ACCES_DIRECTS".equals(directAccessStr); 113 footer = "FOOTER_LINK".equals(footerStr); 114 event = "EVENT".equals(eventStr); 115 116 // abstract 117 NodeList abstractList = _xPathProcessor.selectNodeList(document, "/w:document/w:body/w:sdt[w:sdtPr/w:tag/@w:val='abstract' and not(w:sdtPr/w:showingPlcHdr)]/w:sdtContent/w:p/w:r/w:t", resolver); 118 StringBuilder abstr = new StringBuilder(); 119 120 for (int i = 0; i < abstractList.getLength(); i++) 121 { 122 if (i != 0) 123 { 124 abstr.append('\n'); 125 } 126 127 abstr.append(abstractList.item(i).getTextContent()); 128 } 129 130 // illustration 131 String pictureId = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:sdt[w:sdtPr/w:tag/@w:val='illustration' and not(w:sdtPr/w:showingPlcHdr)]/w:sdtContent/w:p/w:r/w:drawing/wp:inline/a:graphic/a:graphicData/pic:pic/pic:blipFill/a:blip/@r:embed", resolver); 132 133 if (StringUtils.trimToNull(pictureId) != null) 134 { 135 String pictureEntryName = _xPathProcessor.evaluateAsString(relations, "/rel:Relationships/rel:Relationship[@Id='" + pictureId + "']/@Target", resolver); 136 137 int i = pictureEntryName.lastIndexOf('/'); 138 String fileName = i == -1 ? pictureEntryName : pictureEntryName.substring(i + 1); 139 140 ZipArchiveEntry entry = zipFile.getEntry("word/" + pictureEntryName); 141 142 try (InputStream is = zipFile.getInputStream(entry)) 143 { 144 ModifiableBinaryMetadata illustration = content.getMetadataHolder().getCompositeMetadata("illustration", true).getBinaryMetadata("image", true); 145 illustration.setLastModified(new Date()); 146 illustration.setInputStream(is); 147 illustration.setFilename(fileName); 148 149 String mimeType = _context.getMimeType(fileName); 150 if (mimeType != null) 151 { 152 illustration.setMimeType(mimeType); 153 } 154 } 155 } 156 157 // title 158 NodeList titleList = _xPathProcessor.selectNodeList(document, "/w:document/w:body/w:p[w:pPr/w:pStyle/@w:val='Titre'][1]/w:r/w:t", resolver); 159 StringBuilder titleBuilder = new StringBuilder(); 160 161 for (int j = 0; j < titleList.getLength(); j++) 162 { 163 titleBuilder.append(titleList.item(j).getTextContent()); 164 } 165 166 params.put("page.template", StringUtils.trimToNull(template)); 167 params.put("page.longTitle", StringUtils.trimToNull(longTitle)); 168 169 String title = titleBuilder.toString(); 170 171 content.setTitle(StringUtils.trimToNull(title) != null ? title : content.getName()); 172 String cType = _getContentType(contentType); 173 content.setTypes(new String[] {cType}); 174 175 if (abstr.length() > 0) 176 { 177 content.getMetadataHolder().setMetadata("abstract", abstr.toString()); 178 } 179 180 if (section) 181 { 182 content.tag("SECTION"); 183 } 184 185 if (directAccess) 186 { 187 content.tag("ACCES_DIRECTS"); 188 } 189 190 if (footer) 191 { 192 content.tag("FOOTER_LINK"); 193 } 194 195 if (event) 196 { 197 content.tag("EVENT"); 198 } 199 200 // actual content 201 Map<String, Object> context = new HashMap<>(); 202 context.put("document", document); 203 context.put("relations", relations); 204 context.put("zipFile", zipFile); 205 context.put("content", content); 206 Source src = _resolver.resolveURI("cocoon:/docx2docbook", null, context); 207 208 try (InputStream is = src.getInputStream()) 209 { 210 ModifiableRichText richText = content.getMetadataHolder().getRichText("content", true); 211 212 richText.setLastModified(new Date()); 213 richText.setMimeType("text/xml"); 214 richText.setInputStream(is); 215 216 SAXParser saxParser = null; 217 try (InputStream in = richText.getInputStream()) 218 { 219 saxParser = (SAXParser) _manager.lookup(SAXParser.ROLE); 220 saxParser.parse(new InputSource(in), new DefaultHandler()); 221 } 222 catch (SAXException e) 223 { 224 throw new IOException("Invalid resulting XML after transformation", e); 225 } 226 catch (ServiceException e) 227 { 228 throw new IOException("Unable to get a SAX parser.", e); 229 } 230 finally 231 { 232 _manager.release(saxParser); 233 } 234 } 235 236 ZipFile.closeQuietly(zipFile); 237 } 238 239 private String _getContentType(String contentType) 240 { 241 return StringUtils.trimToNull(contentType) != null ? contentType : "org.ametys.web.default.Content.article"; 242 } 243 244 @Override 245 public String[] getMimeTypes() 246 { 247 // handles docx mime-type 248 return new String[]{"application/vnd.openxmlformats-officedocument.wordprocessingml.document"}; 249 } 250 251 @Override 252 public void postTreatment(ModifiablePage page, Content content, File file) throws IOException 253 { 254 // Nothing to do 255 } 256 257 private Document _getDocument(ZipFile zipFile, String entryName, File file) throws IOException 258 { 259 ZipArchiveEntry entry = zipFile.getEntry(entryName); 260 261 try (InputStream is = zipFile.getInputStream(entry)) 262 { 263 return _domParser.parseDocument(new InputSource(is)); 264 } 265 catch (SAXException e) 266 { 267 throw new IOException("Unable to read " + entryName + " in file " + file.getAbsolutePath(), e); 268 } 269 } 270 271 private class DocxPrefixResolver implements PrefixResolver 272 { 273 private Map<String, String> _ns = new HashMap<>(); 274 275 public DocxPrefixResolver() 276 { 277 _ns.put("w", "http://schemas.openxmlformats.org/wordprocessingml/2006/main"); 278 _ns.put("r", "http://schemas.openxmlformats.org/officeDocument/2006/relationships"); 279 _ns.put("wp", "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"); 280 _ns.put("a", "http://schemas.openxmlformats.org/drawingml/2006/main"); 281 _ns.put("pic", "http://schemas.openxmlformats.org/drawingml/2006/picture"); 282 _ns.put("rel", "http://schemas.openxmlformats.org/package/2006/relationships"); 283 } 284 285 @Override 286 public String prefixToNamespace(String prefix) 287 { 288 return _ns.get(prefix); 289 } 290 } 291}