001/*
002 *  Copyright 2011 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016
017package org.ametys.plugins.webcontentio.docx;
018
019import java.io.File;
020import java.io.IOException;
021import java.io.InputStream;
022import java.time.ZonedDateTime;
023import java.util.HashMap;
024import java.util.Map;
025
026import org.apache.avalon.framework.context.ContextException;
027import org.apache.avalon.framework.context.Contextualizable;
028import org.apache.avalon.framework.service.ServiceException;
029import org.apache.avalon.framework.service.ServiceManager;
030import org.apache.avalon.framework.service.Serviceable;
031import org.apache.cocoon.Constants;
032import org.apache.cocoon.environment.Context;
033import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
034import org.apache.commons.compress.archivers.zip.ZipFile;
035import org.apache.commons.lang.StringUtils;
036import org.apache.excalibur.source.Source;
037import org.apache.excalibur.source.SourceResolver;
038import org.apache.excalibur.xml.dom.DOMParser;
039import org.apache.excalibur.xml.sax.SAXParser;
040import org.apache.excalibur.xml.xpath.PrefixResolver;
041import org.apache.excalibur.xml.xpath.XPathProcessor;
042import org.w3c.dom.Document;
043import org.w3c.dom.NodeList;
044import org.xml.sax.InputSource;
045import org.xml.sax.SAXException;
046import org.xml.sax.helpers.DefaultHandler;
047
048import org.ametys.cms.data.Binary;
049import org.ametys.cms.data.RichText;
050import org.ametys.cms.repository.Content;
051import org.ametys.plugins.webcontentio.ContentImporter;
052import org.ametys.web.repository.content.ModifiableWebContent;
053import org.ametys.web.repository.page.ModifiablePage;
054
055/**
056 * Imports Docx files.
057 */
058public class DocxContentImporter implements ContentImporter, Serviceable, Contextualizable
059{
060    /** The service manager */
061    protected ServiceManager _manager;
062    
063    private SourceResolver _resolver;
064    private DOMParser _domParser;
065    private XPathProcessor _xPathProcessor;
066    private Context _context;
067    
068    @Override
069    public void contextualize(org.apache.avalon.framework.context.Context context) throws ContextException
070    {
071        _context = (Context) context.get(Constants.CONTEXT_ENVIRONMENT_CONTEXT);
072    }
073    
074    @Override
075    public void service(ServiceManager manager) throws ServiceException
076    {
077        _manager = manager;
078        _resolver = (SourceResolver) manager.lookup(SourceResolver.ROLE);
079        _domParser = (DOMParser) manager.lookup(DOMParser.ROLE);
080        _xPathProcessor = (XPathProcessor) manager.lookup(XPathProcessor.ROLE);
081    }
082    
083    @Override
084    public void importContent(File file, ModifiableWebContent content, Map<String, String> params) throws IOException
085    {
086        ZipFile zipFile = new ZipFile(file);
087        
088        String template = null;
089        String longTitle = null;
090        String contentType = null;
091        boolean section;
092        boolean directAccess;
093        boolean footer;
094        boolean event;
095        
096        Document document = _getDocument(zipFile, "word/document.xml", file);
097        Document relations = _getDocument(zipFile, "word/_rels/document.xml.rels", file);
098        
099        PrefixResolver resolver = new DocxPrefixResolver();
100        
101        template = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:tbl[1]/w:tr[1]/w:sdt/w:sdtPr[w:tag/@w:val='template']/w:comboBox/w:listItem[@w:displayText=../../../w:sdtContent/w:tc/w:p/w:r/w:t]/@w:value", resolver);
102        longTitle = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:tbl[1]/w:tr[2]/w:sdt/w:sdtPr[w:tag/@w:val='long_title']/w:text", resolver);
103        contentType = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:tbl[1]/w:tr[3]/w:sdt/w:sdtPr[w:tag/@w:val='content-type']/w:comboBox/w:listItem[@w:displayText=../../../w:sdtContent/w:tc/w:p/w:r/w:t]/@w:value", resolver);
104        
105        // tags
106        String sectionStr = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:tbl[2]/w:tr[1]/w:sdt/w:sdtPr[w:tag/@w:val='SECTION']/w:comboBox/w:listItem[@w:displayText=../../../w:sdtContent/w:tc/w:p/w:r/w:t]/@w:value", resolver);
107        String directAccessStr = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:tbl[2]/w:tr[2]/w:sdt/w:sdtPr[w:tag/@w:val='ACCES_DIRECTS']/w:comboBox/w:listItem[@w:displayText=../../../w:sdtContent/w:tc/w:p/w:r/w:t]/@w:value", resolver);
108        String footerStr = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:tbl[2]/w:tr[3]/w:sdt/w:sdtPr[w:tag/@w:val='FOOTER_LINK']/w:comboBox/w:listItem[@w:displayText=../../../w:sdtContent/w:tc/w:p/w:r/w:t]/@w:value", resolver);
109        String eventStr = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:tbl[2]/w:tr[4]/w:sdt/w:sdtPr[w:tag/@w:val='EVENT']/w:comboBox/w:listItem[@w:displayText=../../../w:sdtContent/w:tc/w:p/w:r/w:t]/@w:value", resolver);
110        
111        section = "SECTION".equals(sectionStr);
112        directAccess = "ACCES_DIRECTS".equals(directAccessStr);
113        footer = "FOOTER_LINK".equals(footerStr);
114        event = "EVENT".equals(eventStr);
115        
116        // abstract
117        NodeList abstractList = _xPathProcessor.selectNodeList(document, "/w:document/w:body/w:sdt[w:sdtPr/w:tag/@w:val='abstract' and not(w:sdtPr/w:showingPlcHdr)]/w:sdtContent/w:p/w:r/w:t", resolver);
118        StringBuilder abstr = new StringBuilder();
119        
120        for (int i = 0; i < abstractList.getLength(); i++)
121        {
122            if (i != 0)
123            {
124                abstr.append('\n');
125            }
126            
127            abstr.append(abstractList.item(i).getTextContent());
128        }
129        
130        // illustration
131        String pictureId = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:sdt[w:sdtPr/w:tag/@w:val='illustration' and not(w:sdtPr/w:showingPlcHdr)]/w:sdtContent/w:p/w:r/w:drawing/wp:inline/a:graphic/a:graphicData/pic:pic/pic:blipFill/a:blip/@r:embed", resolver);
132        
133        if (StringUtils.trimToNull(pictureId) != null)
134        {
135            String pictureEntryName = _xPathProcessor.evaluateAsString(relations, "/rel:Relationships/rel:Relationship[@Id='" + pictureId + "']/@Target", resolver);
136            
137            int i = pictureEntryName.lastIndexOf('/');
138            String fileName = i == -1 ? pictureEntryName : pictureEntryName.substring(i + 1);
139            
140            ZipArchiveEntry entry = zipFile.getEntry("word/" + pictureEntryName);
141            
142            try (InputStream is = zipFile.getInputStream(entry))
143            {
144                Binary illustration = new Binary();
145                illustration.setLastModificationDate(ZonedDateTime.now());
146                illustration.setInputStream(is);
147                illustration.setFilename(fileName);
148                
149                String mimeType = _context.getMimeType(fileName);
150                if (mimeType != null)
151                {
152                    illustration.setMimeType(mimeType);
153                }
154                
155                content.getComposite("illustration", true).setValue("image", illustration);
156            }
157        }
158        
159        // title
160        NodeList titleList = _xPathProcessor.selectNodeList(document, "/w:document/w:body/w:p[w:pPr/w:pStyle/@w:val='Titre'][1]/w:r/w:t", resolver);
161        StringBuilder titleBuilder = new StringBuilder();
162        
163        for (int j = 0; j < titleList.getLength(); j++)
164        {
165            titleBuilder.append(titleList.item(j).getTextContent());
166        }
167        
168        params.put("page.template", StringUtils.trimToNull(template));
169        params.put("page.longTitle", StringUtils.trimToNull(longTitle));
170        
171        String title = titleBuilder.toString();
172        
173        content.setTitle(StringUtils.trimToNull(title) != null ? title : content.getName());
174        String cType = _getContentType(contentType);
175        content.setTypes(new String[] {cType});
176        
177        if (abstr.length() > 0)
178        {
179            content.setValue("abstract", abstr.toString());
180        }
181        
182        if (section)
183        {
184            content.tag("SECTION");
185        }
186        
187        if (directAccess)
188        {
189            content.tag("ACCES_DIRECTS");
190        }
191        
192        if (footer)
193        {
194            content.tag("FOOTER_LINK");
195        }
196        
197        if (event)
198        {
199            content.tag("EVENT");
200        }
201        
202        // actual content
203        Map<String, Object> context = new HashMap<>();
204        context.put("document", document);
205        context.put("relations", relations);
206        context.put("zipFile", zipFile);
207        context.put("content", content);
208        Source src = _resolver.resolveURI("cocoon:/docx2docbook", null, context);
209        
210        try (InputStream is = src.getInputStream())
211        {
212            RichText richText = new RichText();
213            richText.setLastModificationDate(ZonedDateTime.now());
214            richText.setMimeType("text/xml");
215            richText.setInputStream(is);
216            
217            SAXParser saxParser = null;
218            try (InputStream in = richText.getInputStream())
219            {
220                saxParser = (SAXParser) _manager.lookup(SAXParser.ROLE);
221                saxParser.parse(new InputSource(in), new DefaultHandler());
222                content.setValue("content", richText);
223            }
224            catch (SAXException e)
225            {
226                throw new IOException("Invalid resulting XML after transformation", e);
227            }
228            catch (ServiceException e)
229            {
230                throw new IOException("Unable to get a SAX parser.", e);
231            }
232            finally
233            {
234                _manager.release(saxParser);
235            }
236        }
237        
238        ZipFile.closeQuietly(zipFile);
239    }
240    
241    private String _getContentType(String contentType)
242    {
243        return StringUtils.trimToNull(contentType) != null ? contentType : "org.ametys.web.default.Content.article";
244    }
245
246    @Override
247    public String[] getMimeTypes()
248    {
249        // handles docx mime-type
250        return new String[]{"application/vnd.openxmlformats-officedocument.wordprocessingml.document"};
251    }
252    
253    @Override
254    public void postTreatment(ModifiablePage page, Content content, File file) throws IOException
255    {
256        // Nothing to do
257    }
258    
259    private Document _getDocument(ZipFile zipFile, String entryName, File file) throws IOException
260    {
261        ZipArchiveEntry entry = zipFile.getEntry(entryName);
262        
263        try (InputStream is = zipFile.getInputStream(entry))
264        {
265            return _domParser.parseDocument(new InputSource(is));
266        }
267        catch (SAXException e)
268        {
269            throw new IOException("Unable to read " + entryName + " in file " + file.getAbsolutePath(), e);
270        }
271    }
272    
273    private class DocxPrefixResolver implements PrefixResolver
274    {
275        private Map<String, String> _ns = new HashMap<>();
276        
277        public DocxPrefixResolver()
278        {
279            _ns.put("w", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
280            _ns.put("r", "http://schemas.openxmlformats.org/officeDocument/2006/relationships");
281            _ns.put("wp", "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing");
282            _ns.put("a", "http://schemas.openxmlformats.org/drawingml/2006/main");
283            _ns.put("pic", "http://schemas.openxmlformats.org/drawingml/2006/picture");
284            _ns.put("rel", "http://schemas.openxmlformats.org/package/2006/relationships");
285        }
286        
287        @Override
288        public String prefixToNamespace(String prefix)
289        {
290            return _ns.get(prefix);
291        }
292    }
293}