001/*
002 *  Copyright 2011 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016
017package org.ametys.plugins.webcontentio.docx;
018
019import java.io.File;
020import java.io.IOException;
021import java.io.InputStream;
022import java.util.Date;
023import java.util.HashMap;
024import java.util.Map;
025
026import org.apache.avalon.framework.context.ContextException;
027import org.apache.avalon.framework.context.Contextualizable;
028import org.apache.avalon.framework.service.ServiceException;
029import org.apache.avalon.framework.service.ServiceManager;
030import org.apache.avalon.framework.service.Serviceable;
031import org.apache.cocoon.Constants;
032import org.apache.cocoon.environment.Context;
033import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
034import org.apache.commons.compress.archivers.zip.ZipFile;
035import org.apache.commons.lang.StringUtils;
036import org.apache.excalibur.source.Source;
037import org.apache.excalibur.source.SourceResolver;
038import org.apache.excalibur.xml.dom.DOMParser;
039import org.apache.excalibur.xml.sax.SAXParser;
040import org.apache.excalibur.xml.xpath.PrefixResolver;
041import org.apache.excalibur.xml.xpath.XPathProcessor;
042import org.w3c.dom.Document;
043import org.w3c.dom.NodeList;
044import org.xml.sax.InputSource;
045import org.xml.sax.SAXException;
046import org.xml.sax.helpers.DefaultHandler;
047
048import org.ametys.cms.repository.Content;
049import org.ametys.plugins.repository.metadata.ModifiableBinaryMetadata;
050import org.ametys.plugins.repository.metadata.ModifiableRichText;
051import org.ametys.plugins.webcontentio.ContentImporter;
052import org.ametys.web.repository.content.ModifiableWebContent;
053import org.ametys.web.repository.page.ModifiablePage;
054
055/**
056 * Imports Docx files.
057 */
058public class DocxContentImporter implements ContentImporter, Serviceable, Contextualizable
059{
060    /** The service manager */
061    protected ServiceManager _manager;
062    
063    private SourceResolver _resolver;
064    private DOMParser _domParser;
065    private XPathProcessor _xPathProcessor;
066    private Context _context;
067    
068    @Override
069    public void contextualize(org.apache.avalon.framework.context.Context context) throws ContextException
070    {
071        _context = (Context) context.get(Constants.CONTEXT_ENVIRONMENT_CONTEXT);
072    }
073    
074    @Override
075    public void service(ServiceManager manager) throws ServiceException
076    {
077        _manager = manager;
078        _resolver = (SourceResolver) manager.lookup(SourceResolver.ROLE);
079        _domParser = (DOMParser) manager.lookup(DOMParser.ROLE);
080        _xPathProcessor = (XPathProcessor) manager.lookup(XPathProcessor.ROLE);
081    }
082    
083    @Override
084    public void importContent(File file, ModifiableWebContent content, Map<String, String> params) throws IOException
085    {
086        ZipFile zipFile = new ZipFile(file);
087        
088        String template = null;
089        String longTitle = null;
090        String contentType = null;
091        boolean section;
092        boolean directAccess;
093        boolean footer;
094        boolean event;
095        
096        Document document = _getDocument(zipFile, "word/document.xml", file);
097        Document relations = _getDocument(zipFile, "word/_rels/document.xml.rels", file);
098        
099        PrefixResolver resolver = new DocxPrefixResolver();
100        
101        template = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:tbl[1]/w:tr[1]/w:sdt/w:sdtPr[w:tag/@w:val='template']/w:comboBox/w:listItem[@w:displayText=../../../w:sdtContent/w:tc/w:p/w:r/w:t]/@w:value", resolver);
102        longTitle = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:tbl[1]/w:tr[2]/w:sdt/w:sdtPr[w:tag/@w:val='long_title']/w:text", resolver);
103        contentType = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:tbl[1]/w:tr[3]/w:sdt/w:sdtPr[w:tag/@w:val='content-type']/w:comboBox/w:listItem[@w:displayText=../../../w:sdtContent/w:tc/w:p/w:r/w:t]/@w:value", resolver);
104        
105        // tags
106        String sectionStr = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:tbl[2]/w:tr[1]/w:sdt/w:sdtPr[w:tag/@w:val='SECTION']/w:comboBox/w:listItem[@w:displayText=../../../w:sdtContent/w:tc/w:p/w:r/w:t]/@w:value", resolver);
107        String directAccessStr = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:tbl[2]/w:tr[2]/w:sdt/w:sdtPr[w:tag/@w:val='ACCES_DIRECTS']/w:comboBox/w:listItem[@w:displayText=../../../w:sdtContent/w:tc/w:p/w:r/w:t]/@w:value", resolver);
108        String footerStr = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:tbl[2]/w:tr[3]/w:sdt/w:sdtPr[w:tag/@w:val='FOOTER_LINK']/w:comboBox/w:listItem[@w:displayText=../../../w:sdtContent/w:tc/w:p/w:r/w:t]/@w:value", resolver);
109        String eventStr = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:tbl[2]/w:tr[4]/w:sdt/w:sdtPr[w:tag/@w:val='EVENT']/w:comboBox/w:listItem[@w:displayText=../../../w:sdtContent/w:tc/w:p/w:r/w:t]/@w:value", resolver);
110        
111        section = "SECTION".equals(sectionStr);
112        directAccess = "ACCES_DIRECTS".equals(directAccessStr);
113        footer = "FOOTER_LINK".equals(footerStr);
114        event = "EVENT".equals(eventStr);
115        
116        // abstract
117        NodeList abstractList = _xPathProcessor.selectNodeList(document, "/w:document/w:body/w:sdt[w:sdtPr/w:tag/@w:val='abstract' and not(w:sdtPr/w:showingPlcHdr)]/w:sdtContent/w:p/w:r/w:t", resolver);
118        StringBuilder abstr = new StringBuilder();
119        
120        for (int i = 0; i < abstractList.getLength(); i++)
121        {
122            if (i != 0)
123            {
124                abstr.append('\n');
125            }
126            
127            abstr.append(abstractList.item(i).getTextContent());
128        }
129        
130        // illustration
131        String pictureId = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:sdt[w:sdtPr/w:tag/@w:val='illustration' and not(w:sdtPr/w:showingPlcHdr)]/w:sdtContent/w:p/w:r/w:drawing/wp:inline/a:graphic/a:graphicData/pic:pic/pic:blipFill/a:blip/@r:embed", resolver);
132        
133        if (StringUtils.trimToNull(pictureId) != null)
134        {
135            String pictureEntryName = _xPathProcessor.evaluateAsString(relations, "/rel:Relationships/rel:Relationship[@Id='" + pictureId + "']/@Target", resolver);
136            
137            int i = pictureEntryName.lastIndexOf('/');
138            String fileName = i == -1 ? pictureEntryName : pictureEntryName.substring(i + 1);
139            
140            ZipArchiveEntry entry = zipFile.getEntry("word/" + pictureEntryName);
141            
142            try (InputStream is = zipFile.getInputStream(entry))
143            {
144                ModifiableBinaryMetadata illustration = content.getMetadataHolder().getCompositeMetadata("illustration", true).getBinaryMetadata("image", true);
145                illustration.setLastModified(new Date());
146                illustration.setInputStream(is);
147                illustration.setFilename(fileName);
148                
149                String mimeType = _context.getMimeType(fileName);
150                if (mimeType != null)
151                {
152                    illustration.setMimeType(mimeType);
153                }
154            }
155        }
156        
157        // title
158        NodeList titleList = _xPathProcessor.selectNodeList(document, "/w:document/w:body/w:p[w:pPr/w:pStyle/@w:val='Titre'][1]/w:r/w:t", resolver);
159        StringBuilder titleBuilder = new StringBuilder();
160        
161        for (int j = 0; j < titleList.getLength(); j++)
162        {
163            titleBuilder.append(titleList.item(j).getTextContent());
164        }
165        
166        params.put("page.template", StringUtils.trimToNull(template));
167        params.put("page.longTitle", StringUtils.trimToNull(longTitle));
168        
169        String title = titleBuilder.toString();
170        
171        content.setTitle(StringUtils.trimToNull(title) != null ? title : content.getName());
172        String cType = _getContentType(contentType);
173        content.setTypes(new String[] {cType});
174        
175        if (abstr.length() > 0)
176        {
177            content.getMetadataHolder().setMetadata("abstract", abstr.toString());
178        }
179        
180        if (section)
181        {
182            content.tag("SECTION");
183        }
184        
185        if (directAccess)
186        {
187            content.tag("ACCES_DIRECTS");
188        }
189        
190        if (footer)
191        {
192            content.tag("FOOTER_LINK");
193        }
194        
195        if (event)
196        {
197            content.tag("EVENT");
198        }
199        
200        // actual content
201        Map<String, Object> context = new HashMap<>();
202        context.put("document", document);
203        context.put("relations", relations);
204        context.put("zipFile", zipFile);
205        context.put("content", content);
206        Source src = _resolver.resolveURI("cocoon:/docx2docbook", null, context);
207        
208        try (InputStream is = src.getInputStream())
209        {
210            ModifiableRichText richText = content.getMetadataHolder().getRichText("content", true);
211            
212            richText.setLastModified(new Date());
213            richText.setMimeType("text/xml");
214            richText.setInputStream(is);
215            
216            SAXParser saxParser = null;
217            try (InputStream in = richText.getInputStream())
218            {
219                saxParser = (SAXParser) _manager.lookup(SAXParser.ROLE);
220                saxParser.parse(new InputSource(in), new DefaultHandler());
221            }
222            catch (SAXException e)
223            {
224                throw new IOException("Invalid resulting XML after transformation", e);
225            }
226            catch (ServiceException e)
227            {
228                throw new IOException("Unable to get a SAX parser.", e);
229            }
230            finally
231            {
232                _manager.release(saxParser);
233            }
234        }
235        
236        ZipFile.closeQuietly(zipFile);
237    }
238    
239    private String _getContentType(String contentType)
240    {
241        return StringUtils.trimToNull(contentType) != null ? contentType : "org.ametys.web.default.Content.article";
242    }
243
244    @Override
245    public String[] getMimeTypes()
246    {
247        // handles docx mime-type
248        return new String[]{"application/vnd.openxmlformats-officedocument.wordprocessingml.document"};
249    }
250    
251    @Override
252    public void postTreatment(ModifiablePage page, Content content, File file) throws IOException
253    {
254        // Nothing to do
255    }
256    
257    private Document _getDocument(ZipFile zipFile, String entryName, File file) throws IOException
258    {
259        ZipArchiveEntry entry = zipFile.getEntry(entryName);
260        
261        try (InputStream is = zipFile.getInputStream(entry))
262        {
263            return _domParser.parseDocument(new InputSource(is));
264        }
265        catch (SAXException e)
266        {
267            throw new IOException("Unable to read " + entryName + " in file " + file.getAbsolutePath(), e);
268        }
269    }
270    
271    private class DocxPrefixResolver implements PrefixResolver
272    {
273        private Map<String, String> _ns = new HashMap<>();
274        
275        public DocxPrefixResolver()
276        {
277            _ns.put("w", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
278            _ns.put("r", "http://schemas.openxmlformats.org/officeDocument/2006/relationships");
279            _ns.put("wp", "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing");
280            _ns.put("a", "http://schemas.openxmlformats.org/drawingml/2006/main");
281            _ns.put("pic", "http://schemas.openxmlformats.org/drawingml/2006/picture");
282            _ns.put("rel", "http://schemas.openxmlformats.org/package/2006/relationships");
283        }
284        
285        @Override
286        public String prefixToNamespace(String prefix)
287        {
288            return _ns.get(prefix);
289        }
290    }
291}