001/*
002 *  Copyright 2016 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.plugins.webcontentio.xml;
017
018import java.io.ByteArrayInputStream;
019import java.io.ByteArrayOutputStream;
020import java.io.File;
021import java.io.FileInputStream;
022import java.io.FileNotFoundException;
023import java.io.IOException;
024import java.io.InputStream;
025import java.io.InputStreamReader;
026import java.io.Reader;
027import java.io.StringWriter;
028import java.io.UnsupportedEncodingException;
029import java.lang.reflect.Array;
030import java.net.HttpURLConnection;
031import java.net.URL;
032import java.time.ZonedDateTime;
033import java.util.ArrayList;
034import java.util.List;
035import java.util.Map;
036import java.util.Optional;
037import java.util.Properties;
038import java.util.regex.Matcher;
039import java.util.regex.Pattern;
040
041import javax.xml.transform.OutputKeys;
042import javax.xml.transform.Transformer;
043import javax.xml.transform.TransformerException;
044import javax.xml.transform.TransformerFactory;
045import javax.xml.transform.dom.DOMSource;
046import javax.xml.transform.stream.StreamResult;
047
048import org.apache.avalon.framework.logger.AbstractLogEnabled;
049import org.apache.avalon.framework.service.ServiceException;
050import org.apache.avalon.framework.service.ServiceManager;
051import org.apache.avalon.framework.service.Serviceable;
052import org.apache.commons.io.FilenameUtils;
053import org.apache.commons.io.IOUtils;
054import org.apache.commons.lang3.StringUtils;
055import org.apache.excalibur.xml.dom.DOMParser;
056import org.apache.excalibur.xml.xpath.XPathProcessor;
057import org.w3c.dom.Document;
058import org.w3c.dom.Node;
059import org.w3c.dom.NodeList;
060import org.xml.sax.InputSource;
061import org.xml.sax.SAXException;
062
063import org.ametys.cms.contenttype.ContentTypeExtensionPoint;
064import org.ametys.cms.data.Binary;
065import org.ametys.cms.data.Geocode;
066import org.ametys.cms.data.RichText;
067import org.ametys.cms.data.type.ModelItemTypeConstants;
068import org.ametys.cms.repository.Content;
069import org.ametys.core.util.URIUtils;
070import org.ametys.plugins.repository.data.holder.ModifiableModelAwareDataHolder;
071import org.ametys.plugins.repository.data.holder.group.impl.ModifiableModelAwareComposite;
072import org.ametys.plugins.repository.data.holder.group.impl.ModifiableModelAwareRepeater;
073import org.ametys.plugins.repository.data.holder.group.impl.ModifiableModelAwareRepeaterEntry;
074import org.ametys.plugins.repository.model.CompositeDefinition;
075import org.ametys.plugins.repository.model.RepeaterDefinition;
076import org.ametys.plugins.webcontentio.ContentImporter;
077import org.ametys.runtime.model.ElementDefinition;
078import org.ametys.runtime.model.ModelItem;
079import org.ametys.runtime.model.type.ElementType;
080import org.ametys.web.repository.content.ModifiableWebContent;
081import org.ametys.web.repository.page.ModifiablePage;
082
083/**
084 * Default XML content importer
085 */
086public class XmlContentImporter extends AbstractLogEnabled implements ContentImporter, Serviceable
087{
088    private DOMParser _domParser;
089    private XPathProcessor _xPathProcessor;
090    private ContentTypeExtensionPoint _contentTypeExtensionPoint;
091
092    @Override
093    public void service(ServiceManager manager) throws ServiceException
094    {
095        _domParser = (DOMParser) manager.lookup(DOMParser.ROLE);
096        _xPathProcessor = (XPathProcessor) manager.lookup(XPathProcessor.ROLE);
097        _contentTypeExtensionPoint = (ContentTypeExtensionPoint) manager.lookup(ContentTypeExtensionPoint.ROLE);
098    }
099    
100    @Override
101    public void importContent(File file, ModifiableWebContent content, Map<String, String> params) throws IOException
102    {
103        Document doc = getXmlDocFromFile(file);
104        
105        if (doc == null)
106        {
107            throw new IOException("Unable to retrieve the xml document from the file received.");
108        }
109        
110        Node xmlContent = _xPathProcessor.selectSingleNode(doc, "/content");
111
112        String contentTypeId = _xPathProcessor.evaluateAsString(xmlContent, "@type");
113        
114        if (StringUtils.isEmpty(contentTypeId))
115        {
116            throw new IOException("Invalid file content : no content type specified.");
117        }
118        
119        if (!_contentTypeExtensionPoint.hasExtension(contentTypeId))
120        {
121            throw new IOException("Invalid file content : the specified content type does not exist.");
122        }
123        
124        content.setTypes(new String[] {contentTypeId});
125        
126        Node title = _xPathProcessor.selectSingleNode(xmlContent, "title");
127        
128        if (title == null)
129        {
130            throw new IOException("Invalid file content : no title found, but it is mandatory.");
131        }
132        
133        _importAttributes(content, xmlContent);
134    }
135    
136    @Override
137    public String[] getMimeTypes()
138    {
139        // handles xml mime-types
140        return new String[] {"application/xml", "text/xml"};
141    }
142    
143    @Override
144    public void postTreatment(ModifiablePage page, Content content, File file) throws IOException
145    {
146        // Nothing to do
147    }
148    
149    private Document getXmlDocFromFile(File file) throws FileNotFoundException, UnsupportedEncodingException, IOException
150    {
151        InputStream is = new FileInputStream(file);
152        Reader reader = new InputStreamReader(is, "UTF-8");
153        Document doc = null;
154        try
155        {
156            doc = _domParser.parseDocument(new InputSource(reader));
157        }
158        catch (SAXException e)
159        {
160            getLogger().error("[IMPORT] Unable to parse imported file " + file.getName(), e);
161        }
162        return doc;
163    }
164    
165    private void _importAttributes(ModifiableWebContent content, Node xmlContent) throws IOException
166    {
167        NodeList attributesNodes = xmlContent.getChildNodes();
168        for (int i = 0; i < attributesNodes.getLength(); i++)
169        {
170            Node attributeNode = attributesNodes.item(i);
171            
172            if (attributeNode.getNodeType() == Node.ELEMENT_NODE && content.hasDefinition(attributeNode.getLocalName()))
173            {
174                ModelItem attributeDefinition = content.getDefinition(attributeNode.getLocalName());
175                _importAttribute(content, attributeDefinition, attributeNode);
176            }
177        }
178    }
179
180    @SuppressWarnings("unchecked")
181    private void _importAttribute(ModifiableModelAwareDataHolder dataHolder, ModelItem attributeDefinition, Node attributeNode) throws IOException
182    {
183        if (attributeDefinition != null)
184        {
185            if (attributeDefinition instanceof RepeaterDefinition)
186            {
187                _setRepeater(dataHolder, (RepeaterDefinition) attributeDefinition, attributeNode);
188            }
189            else if (attributeDefinition instanceof CompositeDefinition)
190            {
191                _setComposite(dataHolder, (CompositeDefinition) attributeDefinition, attributeNode);
192            }
193            else if (attributeDefinition instanceof ElementDefinition)
194            {
195                _setAttribute(dataHolder, (ElementDefinition) attributeDefinition, attributeNode);
196            }
197        }
198    }
199    
200    private void _setRepeater(ModifiableModelAwareDataHolder dataHolder, RepeaterDefinition repeaterDefinition, Node repeaterNode) throws IOException
201    {
202        NodeList entryNodes = _xPathProcessor.selectNodeList(repeaterNode, "entry");
203        if (entryNodes.getLength() > 0)
204        {
205            ModifiableModelAwareRepeater repeaterData = dataHolder.getRepeater(repeaterDefinition.getName(), true);
206            for (int i = 0; i < entryNodes.getLength(); i++)
207            {
208                Node entryNode = entryNodes.item(i);
209                ModifiableModelAwareRepeaterEntry entryData = repeaterData.addEntry();
210    
211                NodeList subDataNodes = entryNode.getChildNodes();
212                for (int j = 0; j < subDataNodes.getLength(); j++)
213                {
214                    Node subDataNode = subDataNodes.item(j);
215                    if (subDataNode.getNodeType() == Node.ELEMENT_NODE)
216                    {
217                        String subDataName = subDataNode.getLocalName();
218                        ModelItem childDefinition = repeaterDefinition.getChild(subDataName);
219                        
220                        _importAttribute(entryData, childDefinition, subDataNode);
221                    }
222                }
223            }
224        }
225    }
226    
227    private void _setComposite(ModifiableModelAwareDataHolder dataHolder, CompositeDefinition compositeDefinition, Node compositeNode) throws IOException
228    {
229        NodeList subDataNodes = compositeNode.getChildNodes();
230        if (subDataNodes.getLength() > 0)
231        {
232            ModifiableModelAwareComposite compositeData = dataHolder.getComposite(compositeDefinition.getName(), true);
233            for (int i = 0; i < subDataNodes.getLength(); i++)
234            {
235                Node subDataNode = subDataNodes.item(i);
236                if (subDataNode.getNodeType() == Node.ELEMENT_NODE)
237                {
238                    String subDataName = subDataNode.getLocalName();
239                    ModelItem childDefinition = compositeDefinition.getChild(subDataName);
240                    
241                    _importAttribute(compositeData, childDefinition, subDataNode);
242                }
243            }
244        }
245    }
246
247    @SuppressWarnings("unchecked")
248    private <T> void _setAttribute(ModifiableModelAwareDataHolder dataHolder, ElementDefinition<T> attributeDefinition, Node attributeNode) throws IOException
249    {
250        ElementType<T> type = attributeDefinition.getType();
251        
252        if (attributeDefinition.isMultiple())
253        {
254            NodeList valuesNodeList = _xPathProcessor.selectNodeList(attributeNode, "value");
255            List<T> values = new ArrayList<>();
256            for (int i = 0; i < valuesNodeList.getLength(); i++)
257            {
258                _getSingleAttributeValue(valuesNodeList.item(i), type)
259                    .ifPresent(value -> values.add(value));
260            }
261            
262            if (!values.isEmpty())
263            {
264                T[] valuesAsArray = (T[]) Array.newInstance(type.getManagedClass(), values.size());
265                dataHolder.setValue(attributeDefinition.getName(), values.toArray(valuesAsArray));
266            }
267        }
268        else
269        {
270            _getSingleAttributeValue(attributeNode, type)
271                .ifPresent(value -> dataHolder.setValue(attributeDefinition.getName(), value));
272        }
273    }
274    
275    @SuppressWarnings("unchecked")
276    private <T> Optional<T> _getSingleAttributeValue(Node valueNode, ElementType<T> type) throws IOException
277    {
278        String id = type.getId();
279        if (ModelItemTypeConstants.BINARY_ELEMENT_TYPE_ID.equals(id) || ModelItemTypeConstants.FILE_ELEMENT_TYPE_ID.equals(id))
280        {
281            return (Optional<T>) _getSingleBinaryAttributeValue(valueNode);
282        }
283        else if (ModelItemTypeConstants.GEOCODE_ELEMENT_TYPE_ID.equals(id))
284        {
285            return (Optional<T>) _getSingleGeocodeAttributeValue(valueNode);
286        }
287        else if (ModelItemTypeConstants.RICH_TEXT_ELEMENT_TYPE_ID.equals(id))
288        {
289            return (Optional<T>) _getSingleRichTextAttributeValue(valueNode);
290        }
291        else
292        {
293            return _getSingleDefaultAttributeValue(valueNode, type);
294        }
295    }
296
297    private Optional<Geocode> _getSingleGeocodeAttributeValue(Node geocodeNode)
298    {
299        Node latitudeNode = _xPathProcessor.selectSingleNode(geocodeNode, "latitude");
300        String latitude = latitudeNode.getTextContent();
301        
302        Node longitudeNode = _xPathProcessor.selectSingleNode(geocodeNode, "longitude");
303        String longitude = longitudeNode.getTextContent();
304        
305        if (StringUtils.isNotEmpty(latitude) && StringUtils.isNotEmpty(longitude))
306        {
307            return Optional.of(new Geocode(Double.valueOf(latitude), Double.valueOf(longitude)));
308        }
309        else
310        {
311            throw new IllegalArgumentException("Invalid geocode values: latitude='" + latitude + "', longitude='" + longitude + "'.");
312        }
313    }
314    
315    private Optional<Binary> _getSingleBinaryAttributeValue(Node binaryNode)
316    {
317        String value = binaryNode.getTextContent();
318        if (StringUtils.isNotEmpty(value))
319        {
320            try
321            {
322                Pattern pattern = Pattern.compile("filename=\"([^\"]+)\"");
323                
324                URL url = new URL(value);
325                HttpURLConnection connection = (HttpURLConnection) url.openConnection();
326                connection.setConnectTimeout(1000);
327                connection.setReadTimeout(2000);
328                
329                String contentType = StringUtils.defaultString(connection.getContentType(), "application/unknown");
330                String contentEncoding = StringUtils.defaultString(connection.getContentEncoding(), "");
331                String contentDisposition = StringUtils.defaultString(connection.getHeaderField("Content-Disposition"), "");
332                String filename = URIUtils.decode(FilenameUtils.getName(connection.getURL().getPath()));
333                if (StringUtils.isEmpty(filename))
334                {
335                    Matcher matcher = pattern.matcher(contentDisposition);
336                    if (matcher.matches())
337                    {
338                        filename = matcher.group(1);
339                    }
340                    else
341                    {
342                        filename = "unknown";
343                    }
344                }
345                
346                try (InputStream is = connection.getInputStream())
347                {
348                    ByteArrayOutputStream bos = new ByteArrayOutputStream();
349                    IOUtils.copy(is, bos);
350                    
351                    Binary binary = new Binary();
352                    binary.setLastModificationDate(ZonedDateTime.now());
353                    binary.setInputStream(new ByteArrayInputStream(bos.toByteArray()));
354                    
355                    if (StringUtils.isNotEmpty(filename))
356                    {
357                        binary.setFilename(filename);
358                    }
359                    if (StringUtils.isNotEmpty(contentType))
360                    {
361                        binary.setMimeType(contentType);
362                    }
363                    if (StringUtils.isNotEmpty(contentEncoding))
364                    {
365                        binary.setEncoding(contentEncoding);
366                    }
367                    
368                    return Optional.of(binary);
369                }
370            }
371            catch (Exception e)
372            {
373                throw new IllegalArgumentException("Unable to fetch file from URL '" + value + "', it will be ignored.", e);
374            }
375        }
376        else
377        {
378            return Optional.empty();
379        }
380    }
381    
382    private Optional<RichText> _getSingleRichTextAttributeValue(Node richTextNode) throws IOException
383    {
384        NodeList docbookNodes = richTextNode.getChildNodes();
385        for (int i = 0; i < docbookNodes.getLength(); i++)
386        {
387            Node docbookNode = docbookNodes.item(i);
388            if (docbookNode.getNodeType() == Node.ELEMENT_NODE && "article".equals(docbookNode.getLocalName()))
389            {
390                try
391                {
392                    String docbook = _serializeNode(docbookNode);
393
394                    RichText richText = new RichText();
395                    richText.setEncoding("UTF-8");
396                    richText.setLastModificationDate(ZonedDateTime.now());
397                    richText.setMimeType("text/xml");
398                    richText.setInputStream(new ByteArrayInputStream(docbook.getBytes("UTF-8")));
399                    return Optional.of(richText);
400                }
401                catch (TransformerException e)
402                {
403                    throw new IOException("Error serializing a docbook node.", e);
404                }
405            }
406        }
407        
408        // No article found, return an empty Optional
409        return Optional.empty();
410    }
411    
412    private String _serializeNode(Node node) throws TransformerException
413    {
414        Transformer transformer = TransformerFactory.newInstance().newTransformer();
415        
416        Properties format = new Properties();
417        format.put(OutputKeys.METHOD, "xml");
418        format.put(OutputKeys.ENCODING, "UTF-8");
419        
420        transformer.setOutputProperties(format);
421        
422        StringWriter writer = new StringWriter();
423        DOMSource domSource = new DOMSource(node);
424        StreamResult result = new StreamResult(writer);
425        
426        transformer.transform(domSource, result);
427        
428        return writer.toString();
429    }
430    
431    private <T> Optional<T> _getSingleDefaultAttributeValue(Node valueNode, ElementType<T> type)
432    {
433        String valueAsString = valueNode.getTextContent();
434        return Optional.of(type.castValue(valueAsString));
435    }
436}