001/*
002 *  Copyright 2011 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016
017package org.ametys.plugins.webcontentio.docx;
018
019import java.io.File;
020import java.io.IOException;
021import java.io.InputStream;
022import java.util.Date;
023import java.util.HashMap;
024import java.util.Map;
025
026import org.apache.avalon.framework.context.ContextException;
027import org.apache.avalon.framework.context.Contextualizable;
028import org.apache.avalon.framework.service.ServiceException;
029import org.apache.avalon.framework.service.ServiceManager;
030import org.apache.avalon.framework.service.Serviceable;
031import org.apache.cocoon.Constants;
032import org.apache.cocoon.environment.Context;
033import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
034import org.apache.commons.compress.archivers.zip.ZipFile;
035import org.apache.commons.lang.StringUtils;
036import org.apache.excalibur.source.Source;
037import org.apache.excalibur.source.SourceResolver;
038import org.apache.excalibur.xml.dom.DOMParser;
039import org.apache.excalibur.xml.sax.SAXParser;
040import org.apache.excalibur.xml.xpath.PrefixResolver;
041import org.apache.excalibur.xml.xpath.XPathProcessor;
042import org.w3c.dom.Document;
043import org.w3c.dom.NodeList;
044import org.xml.sax.InputSource;
045import org.xml.sax.SAXException;
046import org.xml.sax.helpers.DefaultHandler;
047
048import org.ametys.cms.repository.Content;
049import org.ametys.plugins.repository.metadata.ModifiableBinaryMetadata;
050import org.ametys.plugins.repository.metadata.ModifiableRichText;
051import org.ametys.plugins.webcontentio.ContentImporter;
052import org.ametys.web.repository.content.ModifiableWebContent;
053import org.ametys.web.repository.page.ModifiablePage;
054
055/**
056 * Imports Docx files.
057 */
058public class DocxContentImporter implements ContentImporter, Serviceable, Contextualizable
059{
060    private SourceResolver _resolver;
061    private DOMParser _domParser;
062    private SAXParser _saxParser;
063    private XPathProcessor _xPathProcessor;
064    private Context _context;
065    
066    @Override
067    public void contextualize(org.apache.avalon.framework.context.Context context) throws ContextException
068    {
069        _context = (Context) context.get(Constants.CONTEXT_ENVIRONMENT_CONTEXT);
070    }
071    
072    @Override
073    public void service(ServiceManager manager) throws ServiceException
074    {
075        _resolver = (SourceResolver) manager.lookup(SourceResolver.ROLE);
076        _domParser = (DOMParser) manager.lookup(DOMParser.ROLE);
077        _xPathProcessor = (XPathProcessor) manager.lookup(XPathProcessor.ROLE);
078        _saxParser = (SAXParser) manager.lookup(SAXParser.ROLE);
079    }
080    
081    @Override
082    public void importContent(File file, ModifiableWebContent content, Map<String, String> params) throws IOException
083    {
084        ZipFile zipFile = new ZipFile(file);
085        
086        String template = null;
087        String longTitle = null;
088        String contentType = null;
089        boolean section;
090        boolean directAccess;
091        boolean footer;
092        boolean event;
093        
094        Document document = _getDocument(zipFile, "word/document.xml", file);
095        Document relations = _getDocument(zipFile, "word/_rels/document.xml.rels", file);
096        
097        PrefixResolver resolver = new DocxPrefixResolver();
098        
099        template = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:tbl[1]/w:tr[1]/w:sdt/w:sdtPr[w:tag/@w:val='template']/w:comboBox/w:listItem[@w:displayText=../../../w:sdtContent/w:tc/w:p/w:r/w:t]/@w:value", resolver);
100        longTitle = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:tbl[1]/w:tr[2]/w:sdt/w:sdtPr[w:tag/@w:val='long_title']/w:text", resolver);
101        contentType = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:tbl[1]/w:tr[3]/w:sdt/w:sdtPr[w:tag/@w:val='content-type']/w:comboBox/w:listItem[@w:displayText=../../../w:sdtContent/w:tc/w:p/w:r/w:t]/@w:value", resolver);
102        
103        // tags
104        String sectionStr = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:tbl[2]/w:tr[1]/w:sdt/w:sdtPr[w:tag/@w:val='SECTION']/w:comboBox/w:listItem[@w:displayText=../../../w:sdtContent/w:tc/w:p/w:r/w:t]/@w:value", resolver);
105        String directAccessStr = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:tbl[2]/w:tr[2]/w:sdt/w:sdtPr[w:tag/@w:val='ACCES_DIRECTS']/w:comboBox/w:listItem[@w:displayText=../../../w:sdtContent/w:tc/w:p/w:r/w:t]/@w:value", resolver);
106        String footerStr = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:tbl[2]/w:tr[3]/w:sdt/w:sdtPr[w:tag/@w:val='FOOTER_LINK']/w:comboBox/w:listItem[@w:displayText=../../../w:sdtContent/w:tc/w:p/w:r/w:t]/@w:value", resolver);
107        String eventStr = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:tbl[2]/w:tr[4]/w:sdt/w:sdtPr[w:tag/@w:val='EVENT']/w:comboBox/w:listItem[@w:displayText=../../../w:sdtContent/w:tc/w:p/w:r/w:t]/@w:value", resolver);
108        
109        section = "SECTION".equals(sectionStr);
110        directAccess = "ACCES_DIRECTS".equals(directAccessStr);
111        footer = "FOOTER_LINK".equals(footerStr);
112        event = "EVENT".equals(eventStr);
113        
114        // abstract
115        NodeList abstractList = _xPathProcessor.selectNodeList(document, "/w:document/w:body/w:sdt[w:sdtPr/w:tag/@w:val='abstract' and not(w:sdtPr/w:showingPlcHdr)]/w:sdtContent/w:p/w:r/w:t", resolver);
116        StringBuilder abstr = new StringBuilder();
117        
118        for (int i = 0; i < abstractList.getLength(); i++)
119        {
120            if (i != 0)
121            {
122                abstr.append('\n');
123            }
124            
125            abstr.append(abstractList.item(i).getTextContent());
126        }
127        
128        // illustration
129        String pictureId = _xPathProcessor.evaluateAsString(document, "/w:document/w:body/w:sdt[w:sdtPr/w:tag/@w:val='illustration' and not(w:sdtPr/w:showingPlcHdr)]/w:sdtContent/w:p/w:r/w:drawing/wp:inline/a:graphic/a:graphicData/pic:pic/pic:blipFill/a:blip/@r:embed", resolver);
130        
131        if (StringUtils.trimToNull(pictureId) != null)
132        {
133            String pictureEntryName = _xPathProcessor.evaluateAsString(relations, "/rel:Relationships/rel:Relationship[@Id='" + pictureId + "']/@Target", resolver);
134            
135            int i = pictureEntryName.lastIndexOf('/');
136            String fileName = i == -1 ? pictureEntryName : pictureEntryName.substring(i + 1);
137            
138            ZipArchiveEntry entry = zipFile.getEntry("word/" + pictureEntryName);
139            
140            try (InputStream is = zipFile.getInputStream(entry))
141            {
142                ModifiableBinaryMetadata illustration = content.getMetadataHolder().getCompositeMetadata("illustration", true).getBinaryMetadata("image", true);
143                illustration.setLastModified(new Date());
144                illustration.setInputStream(is);
145                illustration.setFilename(fileName);
146                
147                String mimeType = _context.getMimeType(fileName);
148                if (mimeType != null)
149                {
150                    illustration.setMimeType(mimeType);
151                }
152            }
153        }
154        
155        // title
156        NodeList titleList = _xPathProcessor.selectNodeList(document, "/w:document/w:body/w:p[w:pPr/w:pStyle/@w:val='Titre'][1]/w:r/w:t", resolver);
157        StringBuilder titleBuilder = new StringBuilder();
158        
159        for (int j = 0; j < titleList.getLength(); j++)
160        {
161            titleBuilder.append(titleList.item(j).getTextContent());
162        }
163        
164        params.put("page.template", StringUtils.trimToNull(template));
165        params.put("page.longTitle", StringUtils.trimToNull(longTitle));
166        
167        String title = titleBuilder.toString();
168        
169        content.setTitle(StringUtils.trimToNull(title) != null ? title : content.getName());
170        String cType = StringUtils.trimToNull(contentType) != null ? contentType : "org.ametys.web.default.Content.article";
171        content.setTypes(new String[] {cType});
172        
173        if (abstr.length() > 0)
174        {
175            content.getMetadataHolder().setMetadata("abstract", abstr.toString());
176        }
177        
178        if (section)
179        {
180            content.tag("SECTION");
181        }
182        
183        if (directAccess)
184        {
185            content.tag("ACCES_DIRECTS");
186        }
187        
188        if (footer)
189        {
190            content.tag("FOOTER_LINK");
191        }
192        
193        if (event)
194        {
195            content.tag("EVENT");
196        }
197        
198        // actual content
199        Map<String, Object> context = new HashMap<>();
200        context.put("document", document);
201        context.put("relations", relations);
202        context.put("zipFile", zipFile);
203        context.put("content", content);
204        Source src = _resolver.resolveURI("cocoon:/docx2docbook", null, context);
205        
206        try (InputStream is = src.getInputStream())
207        {
208            ModifiableRichText richText = content.getMetadataHolder().getRichText("content", true);
209            
210            richText.setLastModified(new Date());
211            richText.setMimeType("text/xml");
212            richText.setInputStream(is);
213            
214            try (InputStream in = richText.getInputStream())
215            {
216                _saxParser.parse(new InputSource(in), new DefaultHandler());
217            }
218            catch (SAXException e)
219            {
220                throw new IOException("Invalid resulting XML after transformation", e);
221            }
222        }
223        
224        ZipFile.closeQuietly(zipFile);
225    }
226
227    @Override
228    public String[] getMimeTypes()
229    {
230        // handles docx mime-type
231        return new String[]{"application/vnd.openxmlformats-officedocument.wordprocessingml.document"};
232    }
233    
234    @Override
235    public void postTreatment(ModifiablePage page, Content content, File file) throws IOException
236    {
237        // Nothing to do
238    }
239    
240    private Document _getDocument(ZipFile zipFile, String entryName, File file) throws IOException
241    {
242        ZipArchiveEntry entry = zipFile.getEntry(entryName);
243        
244        try (InputStream is = zipFile.getInputStream(entry))
245        {
246            return _domParser.parseDocument(new InputSource(is));
247        }
248        catch (SAXException e)
249        {
250            throw new IOException("Unable to read " + entryName + " in file " + file.getAbsolutePath(), e);
251        }
252    }
253    
254    private class DocxPrefixResolver implements PrefixResolver
255    {
256        private Map<String, String> _ns = new HashMap<>();
257        
258        public DocxPrefixResolver()
259        {
260            _ns.put("w", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
261            _ns.put("r", "http://schemas.openxmlformats.org/officeDocument/2006/relationships");
262            _ns.put("wp", "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing");
263            _ns.put("a", "http://schemas.openxmlformats.org/drawingml/2006/main");
264            _ns.put("pic", "http://schemas.openxmlformats.org/drawingml/2006/picture");
265            _ns.put("rel", "http://schemas.openxmlformats.org/package/2006/relationships");
266        }
267        
268        @Override
269        public String prefixToNamespace(String prefix)
270        {
271            return _ns.get(prefix);
272        }
273    }
274}