001/*
002 *  Copyright 2016 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.plugins.webcontentio.xml;
017
018import java.io.ByteArrayInputStream;
019import java.io.ByteArrayOutputStream;
020import java.io.File;
021import java.io.FileInputStream;
022import java.io.FileNotFoundException;
023import java.io.IOException;
024import java.io.InputStream;
025import java.io.InputStreamReader;
026import java.io.Reader;
027import java.io.StringWriter;
028import java.io.UnsupportedEncodingException;
029import java.lang.reflect.Array;
030import java.net.HttpURLConnection;
031import java.net.URL;
032import java.time.ZonedDateTime;
033import java.util.ArrayList;
034import java.util.List;
035import java.util.Map;
036import java.util.Objects;
037import java.util.Optional;
038import java.util.Properties;
039import java.util.regex.Matcher;
040import java.util.regex.Pattern;
041
042import javax.xml.transform.OutputKeys;
043import javax.xml.transform.Transformer;
044import javax.xml.transform.TransformerException;
045import javax.xml.transform.TransformerFactory;
046import javax.xml.transform.dom.DOMSource;
047import javax.xml.transform.stream.StreamResult;
048
049import org.apache.avalon.framework.logger.AbstractLogEnabled;
050import org.apache.avalon.framework.service.ServiceException;
051import org.apache.avalon.framework.service.ServiceManager;
052import org.apache.avalon.framework.service.Serviceable;
053import org.apache.commons.io.FilenameUtils;
054import org.apache.commons.io.IOUtils;
055import org.apache.commons.lang3.StringUtils;
056import org.apache.excalibur.xml.dom.DOMParser;
057import org.apache.excalibur.xml.xpath.XPathProcessor;
058import org.w3c.dom.Document;
059import org.w3c.dom.Node;
060import org.w3c.dom.NodeList;
061import org.xml.sax.InputSource;
062import org.xml.sax.SAXException;
063
064import org.ametys.cms.contenttype.ContentTypeExtensionPoint;
065import org.ametys.cms.data.Binary;
066import org.ametys.cms.data.Geocode;
067import org.ametys.cms.data.RichText;
068import org.ametys.cms.data.type.ModelItemTypeConstants;
069import org.ametys.cms.repository.Content;
070import org.ametys.core.util.URIUtils;
071import org.ametys.plugins.repository.data.holder.ModifiableModelAwareDataHolder;
072import org.ametys.plugins.repository.data.holder.group.ModifiableModelAwareComposite;
073import org.ametys.plugins.repository.data.holder.group.ModifiableModelAwareRepeater;
074import org.ametys.plugins.repository.data.holder.group.ModifiableModelAwareRepeaterEntry;
075import org.ametys.plugins.repository.model.CompositeDefinition;
076import org.ametys.plugins.repository.model.RepeaterDefinition;
077import org.ametys.plugins.webcontentio.ContentImporter;
078import org.ametys.runtime.model.ElementDefinition;
079import org.ametys.runtime.model.ModelItem;
080import org.ametys.runtime.model.type.ElementType;
081import org.ametys.web.repository.content.ModifiableWebContent;
082import org.ametys.web.repository.page.ModifiablePage;
083
084/**
085 * Default XML content importer
086 */
087public class XmlContentImporter extends AbstractLogEnabled implements ContentImporter, Serviceable
088{
089    private DOMParser _domParser;
090    private XPathProcessor _xPathProcessor;
091    private ContentTypeExtensionPoint _contentTypeExtensionPoint;
092
093    @Override
094    public void service(ServiceManager manager) throws ServiceException
095    {
096        _domParser = (DOMParser) manager.lookup(DOMParser.ROLE);
097        _xPathProcessor = (XPathProcessor) manager.lookup(XPathProcessor.ROLE);
098        _contentTypeExtensionPoint = (ContentTypeExtensionPoint) manager.lookup(ContentTypeExtensionPoint.ROLE);
099    }
100    
101    @Override
102    public void importContent(File file, ModifiableWebContent content, Map<String, String> params) throws IOException
103    {
104        Document doc = getXmlDocFromFile(file);
105        
106        if (doc == null)
107        {
108            throw new IOException("Unable to retrieve the xml document from the file received.");
109        }
110        
111        Node xmlContent = _xPathProcessor.selectSingleNode(doc, "/content");
112
113        String contentTypeId = _xPathProcessor.evaluateAsString(xmlContent, "@type");
114        
115        if (StringUtils.isEmpty(contentTypeId))
116        {
117            throw new IOException("Invalid file content : no content type specified.");
118        }
119        
120        if (!_contentTypeExtensionPoint.hasExtension(contentTypeId))
121        {
122            throw new IOException("Invalid file content : the specified content type does not exist.");
123        }
124        
125        content.setTypes(new String[] {contentTypeId});
126        
127        Node title = _xPathProcessor.selectSingleNode(xmlContent, "title");
128        
129        if (title == null)
130        {
131            throw new IOException("Invalid file content : no title found, but it is mandatory.");
132        }
133        
134        _importAttributes(content, xmlContent);
135    }
136    
137    @Override
138    public String[] getMimeTypes()
139    {
140        // handles xml mime-types
141        return new String[] {"application/xml", "text/xml"};
142    }
143    
144    @Override
145    public void postTreatment(ModifiablePage page, Content content, File file) throws IOException
146    {
147        // Nothing to do
148    }
149    
150    private Document getXmlDocFromFile(File file) throws FileNotFoundException, UnsupportedEncodingException, IOException
151    {
152        InputStream is = new FileInputStream(file);
153        Reader reader = new InputStreamReader(is, "UTF-8");
154        Document doc = null;
155        try
156        {
157            doc = _domParser.parseDocument(new InputSource(reader));
158        }
159        catch (SAXException e)
160        {
161            getLogger().error("[IMPORT] Unable to parse imported file " + file.getName(), e);
162        }
163        return doc;
164    }
165    
166    private void _importAttributes(ModifiableWebContent content, Node xmlContent) throws IOException
167    {
168        NodeList attributesNodes = xmlContent.getChildNodes();
169        for (int i = 0; i < attributesNodes.getLength(); i++)
170        {
171            Node attributeNode = attributesNodes.item(i);
172            
173            if (attributeNode.getNodeType() == Node.ELEMENT_NODE && content.hasDefinition(attributeNode.getLocalName()))
174            {
175                ModelItem attributeDefinition = content.getDefinition(attributeNode.getLocalName());
176                _importAttribute(content, attributeDefinition, attributeNode);
177            }
178        }
179    }
180
181    @SuppressWarnings("unchecked")
182    private void _importAttribute(ModifiableModelAwareDataHolder dataHolder, ModelItem attributeDefinition, Node attributeNode) throws IOException
183    {
184        if (attributeDefinition != null)
185        {
186            if (attributeDefinition instanceof RepeaterDefinition)
187            {
188                _setRepeater(dataHolder, (RepeaterDefinition) attributeDefinition, attributeNode);
189            }
190            else if (attributeDefinition instanceof CompositeDefinition)
191            {
192                _setComposite(dataHolder, (CompositeDefinition) attributeDefinition, attributeNode);
193            }
194            else if (attributeDefinition instanceof ElementDefinition)
195            {
196                _setAttribute(dataHolder, (ElementDefinition) attributeDefinition, attributeNode);
197            }
198        }
199    }
200    
201    private void _setRepeater(ModifiableModelAwareDataHolder dataHolder, RepeaterDefinition repeaterDefinition, Node repeaterNode) throws IOException
202    {
203        NodeList entryNodes = _xPathProcessor.selectNodeList(repeaterNode, "entry");
204        if (entryNodes.getLength() > 0)
205        {
206            ModifiableModelAwareRepeater repeaterData = dataHolder.getRepeater(repeaterDefinition.getName(), true);
207            for (int i = 0; i < entryNodes.getLength(); i++)
208            {
209                Node entryNode = entryNodes.item(i);
210                ModifiableModelAwareRepeaterEntry entryData = repeaterData.addEntry();
211    
212                NodeList subDataNodes = entryNode.getChildNodes();
213                for (int j = 0; j < subDataNodes.getLength(); j++)
214                {
215                    Node subDataNode = subDataNodes.item(j);
216                    if (subDataNode.getNodeType() == Node.ELEMENT_NODE)
217                    {
218                        String subDataName = subDataNode.getLocalName();
219                        ModelItem childDefinition = repeaterDefinition.getChild(subDataName);
220                        
221                        _importAttribute(entryData, childDefinition, subDataNode);
222                    }
223                }
224            }
225        }
226    }
227    
228    private void _setComposite(ModifiableModelAwareDataHolder dataHolder, CompositeDefinition compositeDefinition, Node compositeNode) throws IOException
229    {
230        NodeList subDataNodes = compositeNode.getChildNodes();
231        if (subDataNodes.getLength() > 0)
232        {
233            ModifiableModelAwareComposite compositeData = dataHolder.getComposite(compositeDefinition.getName(), true);
234            for (int i = 0; i < subDataNodes.getLength(); i++)
235            {
236                Node subDataNode = subDataNodes.item(i);
237                if (subDataNode.getNodeType() == Node.ELEMENT_NODE)
238                {
239                    String subDataName = subDataNode.getLocalName();
240                    ModelItem childDefinition = compositeDefinition.getChild(subDataName);
241                    
242                    _importAttribute(compositeData, childDefinition, subDataNode);
243                }
244            }
245        }
246    }
247
248    @SuppressWarnings("unchecked")
249    private <T> void _setAttribute(ModifiableModelAwareDataHolder dataHolder, ElementDefinition<T> attributeDefinition, Node attributeNode) throws IOException
250    {
251        ElementType<T> type = attributeDefinition.getType();
252        
253        if (attributeDefinition.isMultiple())
254        {
255            NodeList valuesNodeList = _xPathProcessor.selectNodeList(attributeNode, "value");
256            List<T> values = new ArrayList<>();
257            for (int i = 0; i < valuesNodeList.getLength(); i++)
258            {
259                _getSingleAttributeValue(valuesNodeList.item(i), type)
260                    .ifPresent(value -> values.add(value));
261            }
262            
263            if (!values.isEmpty())
264            {
265                T[] valuesAsArray = (T[]) Array.newInstance(type.getManagedClass(), values.size());
266                dataHolder.setValue(attributeDefinition.getName(), values.toArray(valuesAsArray));
267            }
268        }
269        else
270        {
271            _getSingleAttributeValue(attributeNode, type)
272                .ifPresent(value -> dataHolder.setValue(attributeDefinition.getName(), value));
273        }
274    }
275    
276    @SuppressWarnings("unchecked")
277    private <T> Optional<T> _getSingleAttributeValue(Node valueNode, ElementType<T> type) throws IOException
278    {
279        String id = type.getId();
280        if (ModelItemTypeConstants.BINARY_ELEMENT_TYPE_ID.equals(id) || ModelItemTypeConstants.FILE_ELEMENT_TYPE_ID.equals(id))
281        {
282            return (Optional<T>) _getSingleBinaryAttributeValue(valueNode);
283        }
284        else if (ModelItemTypeConstants.GEOCODE_ELEMENT_TYPE_ID.equals(id))
285        {
286            return (Optional<T>) _getSingleGeocodeAttributeValue(valueNode);
287        }
288        else if (ModelItemTypeConstants.RICH_TEXT_ELEMENT_TYPE_ID.equals(id))
289        {
290            return (Optional<T>) _getSingleRichTextAttributeValue(valueNode);
291        }
292        else
293        {
294            return _getSingleDefaultAttributeValue(valueNode, type);
295        }
296    }
297
298    private Optional<Geocode> _getSingleGeocodeAttributeValue(Node geocodeNode)
299    {
300        Node latitudeNode = _xPathProcessor.selectSingleNode(geocodeNode, "latitude");
301        String latitude = latitudeNode.getTextContent();
302        
303        Node longitudeNode = _xPathProcessor.selectSingleNode(geocodeNode, "longitude");
304        String longitude = longitudeNode.getTextContent();
305        
306        if (StringUtils.isNotEmpty(latitude) && StringUtils.isNotEmpty(longitude))
307        {
308            return Optional.of(new Geocode(Double.valueOf(latitude), Double.valueOf(longitude)));
309        }
310        else
311        {
312            throw new IllegalArgumentException("Invalid geocode values: latitude='" + latitude + "', longitude='" + longitude + "'.");
313        }
314    }
315    
316    private Optional<Binary> _getSingleBinaryAttributeValue(Node binaryNode)
317    {
318        String value = binaryNode.getTextContent();
319        if (StringUtils.isNotEmpty(value))
320        {
321            try
322            {
323                Pattern pattern = Pattern.compile("filename=\"([^\"]+)\"");
324                
325                URL url = new URL(value);
326                HttpURLConnection connection = (HttpURLConnection) url.openConnection();
327                connection.setConnectTimeout(1000);
328                connection.setReadTimeout(2000);
329                
330                String contentType = Objects.toString(connection.getContentType(), "application/unknown");
331                String contentEncoding = Objects.toString(connection.getContentEncoding(), "");
332                String contentDisposition = Objects.toString(connection.getHeaderField("Content-Disposition"), "");
333                String filename = URIUtils.decode(FilenameUtils.getName(connection.getURL().getPath()));
334                if (StringUtils.isEmpty(filename))
335                {
336                    Matcher matcher = pattern.matcher(contentDisposition);
337                    if (matcher.matches())
338                    {
339                        filename = matcher.group(1);
340                    }
341                    else
342                    {
343                        filename = "unknown";
344                    }
345                }
346                
347                try (InputStream is = connection.getInputStream())
348                {
349                    ByteArrayOutputStream bos = new ByteArrayOutputStream();
350                    IOUtils.copy(is, bos);
351                    
352                    Binary binary = new Binary();
353                    binary.setLastModificationDate(ZonedDateTime.now());
354                    binary.setInputStream(new ByteArrayInputStream(bos.toByteArray()));
355                    
356                    if (StringUtils.isNotEmpty(filename))
357                    {
358                        binary.setFilename(filename);
359                    }
360                    if (StringUtils.isNotEmpty(contentType))
361                    {
362                        binary.setMimeType(contentType);
363                    }
364                    if (StringUtils.isNotEmpty(contentEncoding))
365                    {
366                        binary.setEncoding(contentEncoding);
367                    }
368                    
369                    return Optional.of(binary);
370                }
371            }
372            catch (Exception e)
373            {
374                throw new IllegalArgumentException("Unable to fetch file from URL '" + value + "', it will be ignored.", e);
375            }
376        }
377        else
378        {
379            return Optional.empty();
380        }
381    }
382    
383    private Optional<RichText> _getSingleRichTextAttributeValue(Node richTextNode) throws IOException
384    {
385        NodeList docbookNodes = richTextNode.getChildNodes();
386        for (int i = 0; i < docbookNodes.getLength(); i++)
387        {
388            Node docbookNode = docbookNodes.item(i);
389            if (docbookNode.getNodeType() == Node.ELEMENT_NODE && "article".equals(docbookNode.getLocalName()))
390            {
391                try
392                {
393                    String docbook = _serializeNode(docbookNode);
394
395                    RichText richText = new RichText();
396                    richText.setEncoding("UTF-8");
397                    richText.setLastModificationDate(ZonedDateTime.now());
398                    richText.setMimeType("text/xml");
399                    richText.setInputStream(new ByteArrayInputStream(docbook.getBytes("UTF-8")));
400                    return Optional.of(richText);
401                }
402                catch (TransformerException e)
403                {
404                    throw new IOException("Error serializing a docbook node.", e);
405                }
406            }
407        }
408        
409        // No article found, return an empty Optional
410        return Optional.empty();
411    }
412    
413    private String _serializeNode(Node node) throws TransformerException
414    {
415        Transformer transformer = TransformerFactory.newInstance().newTransformer();
416        
417        Properties format = new Properties();
418        format.put(OutputKeys.METHOD, "xml");
419        format.put(OutputKeys.ENCODING, "UTF-8");
420        
421        transformer.setOutputProperties(format);
422        
423        StringWriter writer = new StringWriter();
424        DOMSource domSource = new DOMSource(node);
425        StreamResult result = new StreamResult(writer);
426        
427        transformer.transform(domSource, result);
428        
429        return writer.toString();
430    }
431    
432    private <T> Optional<T> _getSingleDefaultAttributeValue(Node valueNode, ElementType<T> type)
433    {
434        String valueAsString = valueNode.getTextContent();
435        return Optional.of(type.castValue(valueAsString));
436    }
437}