001/* 002 * Copyright 2011 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package org.ametys.plugins.webcontentio.docx; 018 019import java.io.File; 020import java.io.IOException; 021import java.io.InputStream; 022import java.time.ZonedDateTime; 023import java.util.HashMap; 024import java.util.Map; 025 026import org.apache.avalon.framework.context.ContextException; 027import org.apache.avalon.framework.context.Contextualizable; 028import org.apache.avalon.framework.service.ServiceException; 029import org.apache.avalon.framework.service.ServiceManager; 030import org.apache.avalon.framework.service.Serviceable; 031import org.apache.cocoon.Constants; 032import org.apache.cocoon.environment.Context; 033import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; 034import org.apache.commons.compress.archivers.zip.ZipFile; 035import org.apache.commons.lang.StringUtils; 036import org.apache.excalibur.source.Source; 037import org.apache.excalibur.source.SourceResolver; 038import org.apache.excalibur.xml.dom.DOMParser; 039import org.apache.excalibur.xml.sax.SAXParser; 040import org.apache.excalibur.xml.xpath.PrefixResolver; 041import org.apache.excalibur.xml.xpath.XPathProcessor; 042import org.w3c.dom.Document; 043import org.w3c.dom.NodeList; 044import org.xml.sax.InputSource; 045import org.xml.sax.SAXException; 046import org.xml.sax.helpers.DefaultHandler; 047 048import org.ametys.cms.data.Binary; 049import org.ametys.cms.data.RichText; 050import org.ametys.cms.repository.Content; 051import org.ametys.plugins.webcontentio.ContentImporter; 052import org.ametys.web.repository.content.ModifiableWebContent; 053import org.ametys.web.repository.page.ModifiablePage; 054 055/** 056 * Imports Docx files. 057 */ 058public class DocxContentImporter implements ContentImporter, Serviceable, Contextualizable 059{ 060 /** The service manager */ 061 protected ServiceManager _manager; 062 063 private SourceResolver _resolver; 064 private DOMParser _domParser; 065 private XPathProcessor _xPathProcessor; 066 private Context _context; 067 068 @Override 069 public void contextualize(org.apache.avalon.framework.context.Context context) throws ContextException 070 { 071 _context = (Context) context.get(Constants.CONTEXT_ENVIRONMENT_CONTEXT); 072 } 073 074 @Override 075 public void service(ServiceManager manager) throws ServiceException 076 { 077 _manager = manager; 078 _resolver = (SourceResolver) manager.lookup(SourceResolver.ROLE); 079 _domParser = (DOMParser) manager.lookup(DOMParser.ROLE); 080 _xPathProcessor = (XPathProcessor) manager.lookup(XPathProcessor.ROLE); 081 } 082 083 @Override 084 public void importContent(File file, ModifiableWebContent content, Map<String, String> params) throws IOException 085 { 086 ZipFile zipFile = new ZipFile(file); 087 088 String template = null; 089 String longTitle = null; 090 String contentType = null; 091 boolean section; 092 boolean directAccess; 093 boolean footer; 094 boolean event; 095 096 Document document = _getDocument(zipFile, "word/document.xml", file); 097 Document relations = _getDocument(zipFile, "word/_rels/document.xml.rels", file); 098 099 PrefixResolver resolver = new DocxPrefixResolver(); 100 101 template = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:tbl[1]/w:tr[1]/w:sdt/w:sdtPr[w:tag/@w:val='template']/w:comboBox/w:listItem[@w:displayText=../../../w:sdtContent/w:tc/w:p/w:r/w:t]/@w:value", resolver); 102 longTitle = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:tbl[1]/w:tr[2]/w:sdt/w:sdtPr[w:tag/@w:val='long_title']/w:text", resolver); 103 contentType = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:tbl[1]/w:tr[3]/w:sdt/w:sdtPr[w:tag/@w:val='content-type']/w:comboBox/w:listItem[@w:displayText=../../../w:sdtContent/w:tc/w:p/w:r/w:t]/@w:value", resolver); 104 105 // tags 106 String sectionStr = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:tbl[2]/w:tr[1]/w:sdt/w:sdtPr[w:tag/@w:val='SECTION']/w:comboBox/w:listItem[@w:displayText=../../../w:sdtContent/w:tc/w:p/w:r/w:t]/@w:value", resolver); 107 String directAccessStr = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:tbl[2]/w:tr[2]/w:sdt/w:sdtPr[w:tag/@w:val='ACCES_DIRECTS']/w:comboBox/w:listItem[@w:displayText=../../../w:sdtContent/w:tc/w:p/w:r/w:t]/@w:value", resolver); 108 String footerStr = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:tbl[2]/w:tr[3]/w:sdt/w:sdtPr[w:tag/@w:val='FOOTER_LINK']/w:comboBox/w:listItem[@w:displayText=../../../w:sdtContent/w:tc/w:p/w:r/w:t]/@w:value", resolver); 109 String eventStr = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:tbl[2]/w:tr[4]/w:sdt/w:sdtPr[w:tag/@w:val='EVENT']/w:comboBox/w:listItem[@w:displayText=../../../w:sdtContent/w:tc/w:p/w:r/w:t]/@w:value", resolver); 110 111 section = "SECTION".equals(sectionStr); 112 directAccess = "ACCES_DIRECTS".equals(directAccessStr); 113 footer = "FOOTER_LINK".equals(footerStr); 114 event = "EVENT".equals(eventStr); 115 116 // abstract 117 NodeList abstractList = _xPathProcessor.selectNodeList(document, "/w:document/w:body/w:sdt[w:sdtPr/w:tag/@w:val='abstract' and not(w:sdtPr/w:showingPlcHdr)]/w:sdtContent/w:p/w:r/w:t", resolver); 118 StringBuilder abstr = new StringBuilder(); 119 120 for (int i = 0; i < abstractList.getLength(); i++) 121 { 122 if (i != 0) 123 { 124 abstr.append('\n'); 125 } 126 127 abstr.append(abstractList.item(i).getTextContent()); 128 } 129 130 // illustration 131 String pictureId = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:sdt[w:sdtPr/w:tag/@w:val='illustration' and not(w:sdtPr/w:showingPlcHdr)]/w:sdtContent/w:p/w:r/w:drawing/wp:inline/a:graphic/a:graphicData/pic:pic/pic:blipFill/a:blip/@r:embed", resolver); 132 133 if (StringUtils.trimToNull(pictureId) != null) 134 { 135 String pictureEntryName = _xPathProcessor.evaluateAsString(relations, "/rel:Relationships/rel:Relationship[@Id='" + pictureId + "']/@Target", resolver); 136 137 int i = pictureEntryName.lastIndexOf('/'); 138 String fileName = i == -1 ? pictureEntryName : pictureEntryName.substring(i + 1); 139 140 ZipArchiveEntry entry = zipFile.getEntry("word/" + pictureEntryName); 141 142 try (InputStream is = zipFile.getInputStream(entry)) 143 { 144 Binary illustration = new Binary(); 145 illustration.setLastModificationDate(ZonedDateTime.now()); 146 illustration.setInputStream(is); 147 illustration.setFilename(fileName); 148 149 String mimeType = _context.getMimeType(fileName); 150 if (mimeType != null) 151 { 152 illustration.setMimeType(mimeType); 153 } 154 155 content.getComposite("illustration", true).setValue("image", illustration); 156 } 157 } 158 159 // title 160 NodeList titleList = _xPathProcessor.selectNodeList(document, "/w:document/w:body/w:p[w:pPr/w:pStyle/@w:val='Titre'][1]/w:r/w:t", resolver); 161 StringBuilder titleBuilder = new StringBuilder(); 162 163 for (int j = 0; j < titleList.getLength(); j++) 164 { 165 titleBuilder.append(titleList.item(j).getTextContent()); 166 } 167 168 params.put("page.template", StringUtils.trimToNull(template)); 169 params.put("page.longTitle", StringUtils.trimToNull(longTitle)); 170 171 String title = titleBuilder.toString(); 172 173 content.setTitle(StringUtils.trimToNull(title) != null ? title : content.getName()); 174 String cType = _getContentType(contentType); 175 content.setTypes(new String[] {cType}); 176 177 if (abstr.length() > 0) 178 { 179 content.setValue("abstract", abstr.toString()); 180 } 181 182 if (section) 183 { 184 content.tag("SECTION"); 185 } 186 187 if (directAccess) 188 { 189 content.tag("ACCES_DIRECTS"); 190 } 191 192 if (footer) 193 { 194 content.tag("FOOTER_LINK"); 195 } 196 197 if (event) 198 { 199 content.tag("EVENT"); 200 } 201 202 // actual content 203 Map<String, Object> context = new HashMap<>(); 204 context.put("document", document); 205 context.put("relations", relations); 206 context.put("zipFile", zipFile); 207 context.put("content", content); 208 Source src = _resolver.resolveURI("cocoon:/docx2docbook", null, context); 209 210 try (InputStream is = src.getInputStream()) 211 { 212 RichText richText = new RichText(); 213 richText.setLastModificationDate(ZonedDateTime.now()); 214 richText.setMimeType("text/xml"); 215 richText.setInputStream(is); 216 217 SAXParser saxParser = null; 218 try (InputStream in = richText.getInputStream()) 219 { 220 saxParser = (SAXParser) _manager.lookup(SAXParser.ROLE); 221 saxParser.parse(new InputSource(in), new DefaultHandler()); 222 content.setValue("content", richText); 223 } 224 catch (SAXException e) 225 { 226 throw new IOException("Invalid resulting XML after transformation", e); 227 } 228 catch (ServiceException e) 229 { 230 throw new IOException("Unable to get a SAX parser.", e); 231 } 232 finally 233 { 234 _manager.release(saxParser); 235 } 236 } 237 238 ZipFile.closeQuietly(zipFile); 239 } 240 241 private String _getContentType(String contentType) 242 { 243 return StringUtils.trimToNull(contentType) != null ? contentType : "org.ametys.web.default.Content.article"; 244 } 245 246 @Override 247 public String[] getMimeTypes() 248 { 249 // handles docx mime-type 250 return new String[]{"application/vnd.openxmlformats-officedocument.wordprocessingml.document"}; 251 } 252 253 @Override 254 public void postTreatment(ModifiablePage page, Content content, File file) throws IOException 255 { 256 // Nothing to do 257 } 258 259 private Document _getDocument(ZipFile zipFile, String entryName, File file) throws IOException 260 { 261 ZipArchiveEntry entry = zipFile.getEntry(entryName); 262 263 try (InputStream is = zipFile.getInputStream(entry)) 264 { 265 return _domParser.parseDocument(new InputSource(is)); 266 } 267 catch (SAXException e) 268 { 269 throw new IOException("Unable to read " + entryName + " in file " + file.getAbsolutePath(), e); 270 } 271 } 272 273 private class DocxPrefixResolver implements PrefixResolver 274 { 275 private Map<String, String> _ns = new HashMap<>(); 276 277 public DocxPrefixResolver() 278 { 279 _ns.put("w", "http://schemas.openxmlformats.org/wordprocessingml/2006/main"); 280 _ns.put("r", "http://schemas.openxmlformats.org/officeDocument/2006/relationships"); 281 _ns.put("wp", "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"); 282 _ns.put("a", "http://schemas.openxmlformats.org/drawingml/2006/main"); 283 _ns.put("pic", "http://schemas.openxmlformats.org/drawingml/2006/picture"); 284 _ns.put("rel", "http://schemas.openxmlformats.org/package/2006/relationships"); 285 } 286 287 @Override 288 public String prefixToNamespace(String prefix) 289 { 290 return _ns.get(prefix); 291 } 292 } 293}