001/*
002 *  Copyright 2016 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.plugins.webcontentio.xml;
017
018import java.io.ByteArrayInputStream;
019import java.io.ByteArrayOutputStream;
020import java.io.File;
021import java.io.FileInputStream;
022import java.io.FileNotFoundException;
023import java.io.IOException;
024import java.io.InputStream;
025import java.io.InputStreamReader;
026import java.io.Reader;
027import java.io.StringWriter;
028import java.io.UnsupportedEncodingException;
029import java.net.HttpURLConnection;
030import java.net.URL;
031import java.net.URLDecoder;
032import java.time.ZonedDateTime;
033import java.time.format.DateTimeFormatter;
034import java.util.Date;
035import java.util.Map;
036import java.util.Properties;
037import java.util.regex.Matcher;
038import java.util.regex.Pattern;
039
040import javax.xml.transform.OutputKeys;
041import javax.xml.transform.Transformer;
042import javax.xml.transform.TransformerException;
043import javax.xml.transform.TransformerFactory;
044import javax.xml.transform.dom.DOMSource;
045import javax.xml.transform.stream.StreamResult;
046
047import org.apache.avalon.framework.logger.AbstractLogEnabled;
048import org.apache.avalon.framework.service.ServiceException;
049import org.apache.avalon.framework.service.ServiceManager;
050import org.apache.avalon.framework.service.Serviceable;
051import org.apache.commons.io.FilenameUtils;
052import org.apache.commons.io.IOUtils;
053import org.apache.commons.lang.StringUtils;
054import org.apache.excalibur.xml.dom.DOMParser;
055import org.apache.excalibur.xml.xpath.XPathProcessor;
056import org.w3c.dom.Document;
057import org.w3c.dom.Node;
058import org.w3c.dom.NodeList;
059import org.xml.sax.InputSource;
060import org.xml.sax.SAXException;
061
062import org.ametys.cms.contenttype.ContentTypesHelper;
063import org.ametys.cms.contenttype.MetadataDefinition;
064import org.ametys.cms.contenttype.RepeaterDefinition;
065import org.ametys.cms.repository.Content;
066import org.ametys.plugins.repository.metadata.ModifiableBinaryMetadata;
067import org.ametys.plugins.repository.metadata.ModifiableCompositeMetadata;
068import org.ametys.plugins.repository.metadata.ModifiableRichText;
069import org.ametys.plugins.webcontentio.ContentImporter;
070import org.ametys.runtime.parameter.ParameterHelper;
071import org.ametys.runtime.parameter.ParameterHelper.ParameterType;
072import org.ametys.web.repository.content.ModifiableWebContent;
073import org.ametys.web.repository.page.ModifiablePage;
074
075/**
076 * Default XML content importer
077 */
078public class XmlContentImporter extends AbstractLogEnabled implements ContentImporter, Serviceable
079{
080    private DOMParser _domParser;
081    private XPathProcessor _xPathProcessor;
082    private ContentTypesHelper _cTypeHelper;
083
084    @Override
085    public void service(ServiceManager manager) throws ServiceException
086    {
087        _domParser = (DOMParser) manager.lookup(DOMParser.ROLE);
088        _xPathProcessor = (XPathProcessor) manager.lookup(XPathProcessor.ROLE);
089        _cTypeHelper = (ContentTypesHelper) manager.lookup(ContentTypesHelper.ROLE);
090    }
091    
092    @Override
093    public void importContent(File file, ModifiableWebContent content, Map<String, String> params) throws IOException
094    {
095        Document doc = getXmlDocFromFile(file);
096
097        if (doc == null)
098        {
099            throw new IOException("Unable to retrieve the xml document from the file received.");
100        }
101        
102        Node xmlContent = _xPathProcessor.selectSingleNode(doc, "/content");
103        String contentType = _xPathProcessor.evaluateAsString(xmlContent, "@type");
104        
105        if (contentType == null)
106        {
107            throw new IOException("Invalid file content : no content type specified.");
108        }
109        content.setTypes(new String[] {contentType});
110        
111        Node title = _xPathProcessor.selectSingleNode(xmlContent, "title");
112        
113        if (title == null)
114        {
115            throw new IOException("Invalid file content : no title found, but it is mandatory.");
116        }
117
118        _importMetadata(content, xmlContent);
119    }
120    
121    @Override
122    public String[] getMimeTypes()
123    {
124        // handles xml mime-types
125        return new String[] {"application/xml", "text/xml"};
126    }
127    
128    @Override
129    public void postTreatment(ModifiablePage page, Content content, File file) throws IOException
130    {
131        // Nothing to do
132    }
133    
134    private Document getXmlDocFromFile(File file) throws FileNotFoundException, UnsupportedEncodingException, IOException
135    {
136        InputStream is = new FileInputStream(file);
137        Reader reader = new InputStreamReader(is, "UTF-8");
138        Document doc = null;
139        try
140        {
141            doc = _domParser.parseDocument(new InputSource(reader));
142        }
143        catch (SAXException e)
144        {
145            getLogger().error("[IMPORT] Unable to parse imported file " + file.getName(), e);
146        }
147        return doc;
148    }
149    
150    private void _importMetadata(ModifiableWebContent content, Node domNode) throws IOException
151    {
152        NodeList childNodes = domNode.getChildNodes();
153        for (int i = 0; i < childNodes.getLength(); i++)
154        {
155            Node metadataNode = childNodes.item(i);
156            
157            if (metadataNode.getNodeType() == Node.ELEMENT_NODE)
158            {
159                MetadataDefinition metaDef = _cTypeHelper.getMetadataDefinition(metadataNode.getLocalName(), content);
160                _importMetadata(content.getMetadataHolder(), metaDef, metadataNode);
161            }
162        }
163    }
164
165    private void _importMetadata(ModifiableCompositeMetadata contentNode, MetadataDefinition metaDef, Node domNode) throws IOException
166    {
167        if (metaDef != null)
168        {
169            if (metaDef instanceof RepeaterDefinition)
170            {
171                _setRepeater(contentNode, (RepeaterDefinition) metaDef, domNode);
172            }
173            else
174            {
175                _setMetadata(contentNode, metaDef, domNode);
176            }
177        }
178    }
179    
180    private void _setRepeater(ModifiableCompositeMetadata meta, RepeaterDefinition repeaterDef, Node domNode) throws IOException
181    {
182        ModifiableCompositeMetadata repeaterMeta = meta.getCompositeMetadata(repeaterDef.getName(), true);
183        
184        NodeList entryNodes = _xPathProcessor.selectNodeList(domNode, "entry");
185        for (int i = 0; i < entryNodes.getLength(); i++)
186        {
187            Node entryNode = entryNodes.item(i);
188            String entryName = Integer.toString(i + 1);
189            
190            ModifiableCompositeMetadata entryMeta = repeaterMeta.getCompositeMetadata(entryName, true);
191            NodeList subMetaNodes = entryNode.getChildNodes();
192            for (int j = 0; j < subMetaNodes.getLength(); j++)
193            {
194                Node subMetaNode = subMetaNodes.item(j);
195                if (subMetaNode.getNodeType() == Node.ELEMENT_NODE)
196                {
197                    String subMetaName = subMetaNode.getLocalName();
198                    MetadataDefinition childDef = repeaterDef.getMetadataDefinition(subMetaName);
199                    
200                    _importMetadata(entryMeta, childDef, subMetaNode);
201                }
202            }
203        }
204    }
205
206    private void _setMetadata(ModifiableCompositeMetadata contentNode, MetadataDefinition metaDef, Node domNode) throws IOException
207    {
208        switch (metaDef.getType())
209        {
210            case STRING:
211                setStringMetadata(contentNode, metaDef, getMetadataValues(metaDef, domNode));
212                break;
213            case BOOLEAN:
214                setBooleanMetadata(contentNode, metaDef, getMetadataValues(metaDef, domNode));
215                break;
216            case DOUBLE:
217                setDoubleMetadata(contentNode, metaDef, getMetadataValues(metaDef, domNode));
218                break;
219            case LONG:
220                setLongMetadata(contentNode, metaDef, getMetadataValues(metaDef, domNode));
221                break;
222            case COMPOSITE:
223                setCompositeMetadata(contentNode, metaDef, domNode);
224                break;
225            case DATE:
226            case DATETIME:
227                setDateMetadata(contentNode, metaDef, getMetadataValues(metaDef, domNode));
228                break;
229            case BINARY:
230            case FILE:
231                String[] values = getMetadataValues(metaDef, domNode);
232                if (values.length > 0)
233                {
234                    setBinaryMetadata(contentNode, metaDef, values[0]);
235                }
236                break;
237            case GEOCODE:
238                Node latitudeNode = _xPathProcessor.selectSingleNode(domNode, "latitude");
239                Node longitudeNode = _xPathProcessor.selectSingleNode(domNode, "longitude");
240                setGeocodeMetadata(contentNode, metaDef, latitudeNode.getTextContent(), longitudeNode.getTextContent());
241                break;
242            case REFERENCE:
243                break;
244            case RICH_TEXT:
245                setRichTextMetadata(contentNode, domNode, metaDef.getName());
246                break;
247            case USER:
248                break;
249            default:
250                break;
251        }
252    }
253
254    private String[] getMetadataValues(MetadataDefinition metaDef, Node domNode)
255    {
256        String[] values;
257        if (metaDef.isMultiple())
258        {
259            NodeList valuesNodeList = _xPathProcessor.selectNodeList(domNode, "value");
260            values = new String[valuesNodeList.getLength()];
261            for (int i = 0; i < valuesNodeList.getLength(); i++)
262            {
263                values[i] = valuesNodeList.item(i).getTextContent();
264            }
265        }
266        else
267        {
268            values = new String[1];
269            values[0] = domNode.getTextContent();
270        }
271        return values;
272    }
273
274    private void setStringMetadata(ModifiableCompositeMetadata meta, MetadataDefinition metaDef, String[] values)
275    {
276        if (values != null)
277        {
278            if (metaDef.isMultiple())
279            {
280                meta.setMetadata(metaDef.getName(), values);
281            }
282            else
283            {
284                meta.setMetadata(metaDef.getName(), values[0]);
285            }
286        }
287    }
288    
289    /**
290     * Set a boolean metadata.
291     * @param meta the metadata holder.
292     * @param metaDef the metadata definition.
293     * @param values the metadata values.
294     */
295    protected void setBooleanMetadata(ModifiableCompositeMetadata meta, MetadataDefinition metaDef, String[] values)
296    {
297        if (values != null)
298        {
299            if (metaDef.isMultiple())
300            {
301                boolean[] bValues = new boolean[values.length];
302                for (int i = 0; i < values.length; i++)
303                {
304                    bValues[i] = Boolean.parseBoolean(values[i]);
305                }
306                
307                meta.setMetadata(metaDef.getName(), bValues);
308            }
309            else
310            {
311                meta.setMetadata(metaDef.getName(), Boolean.parseBoolean(values[0]));
312            }
313        }
314    }
315    
316    /**
317     * Set a long metadata.
318     * @param meta the metadata holder.
319     * @param metaDef the metadata definition.
320     * @param values the metadata values.
321     */
322    protected void setLongMetadata(ModifiableCompositeMetadata meta, MetadataDefinition metaDef, String[] values)
323    {
324        if (values != null)
325        {
326            if (metaDef.isMultiple())
327            {
328                long[] lValues = new long[values.length];
329                for (int i = 0; i < values.length; i++)
330                {
331                    lValues[i] = Long.parseLong(values[i]);
332                }
333                
334                meta.setMetadata(metaDef.getName(), lValues);
335            }
336            else
337            {
338                meta.setMetadata(metaDef.getName(), Long.parseLong(values[0]));
339            }
340        }
341    }
342    
343    /**
344     * Set a double metadata.
345     * @param meta the metadata holder.
346     * @param metaDef the metadata definition.
347     * @param values the metadata values.
348     */
349    protected void setDoubleMetadata(ModifiableCompositeMetadata meta, MetadataDefinition metaDef, String[] values)
350    {
351        if (values != null)
352        {
353            if (metaDef.isMultiple())
354            {
355                double[] dValues = new double[values.length];
356                for (int i = 0; i < values.length; i++)
357                {
358                    dValues[i] = Double.parseDouble(values[i]);
359                }
360                
361                meta.setMetadata(metaDef.getName(), dValues);
362            }
363            else
364            {
365                meta.setMetadata(metaDef.getName(), Double.parseDouble(values[0]));
366            }
367        }
368    }
369    
370    /**
371     * Set a date or datetime metadata.
372     * @param meta the metadata holder.
373     * @param metaDef the metadata definition.
374     * @param values the metadata values.
375     */
376    protected void setDateMetadata(ModifiableCompositeMetadata meta, MetadataDefinition metaDef, String[] values)
377    {
378        if (values != null)
379        {
380            if (metaDef.isMultiple())
381            {
382                Date[] dValues = new Date[values.length];
383                for (int i = 0; i < values.length; i++)
384                {
385                    dValues[i] = parseDate(values[i]);
386                }
387                
388                meta.setMetadata(metaDef.getName(), dValues);
389            }
390            else
391            {
392                meta.setMetadata(metaDef.getName(), parseDate(values[0]));
393            }
394        }
395    }
396    
397    /**
398     * Parse a String value as a Date.<br>
399     * Allowed formats:
400     * <ul>
401     *   <li>yyyy-MM-dd</li>
402     *   <li>yyyy-MM-dd'T'HH:mm:ss.SSSZZ</li>
403     * </ul>
404     * @param value the String value.
405     * @return the parsed Date or <code>null</code> if the value can't be parsed.
406     */
407    protected Date parseDate(String value)
408    {
409        return parseDate(value, false);
410    }
411    
412    /**
413     * Parse a String value as a Date.<br>
414     * Allowed formats:
415     * <ul>
416     *   <li>yyyy-MM-dd</li>
417     *   <li>yyyy-MM-dd'T'HH:mm:ss.SSSZZ</li>
418     * </ul>
419     * @param value the String value.
420     * @param throwException true to throw an exception if the value can't be parsed, false to return null.
421     * @return the parsed Date or <code>null</code> if the value can't be parsed and throwException is false.
422     */
423    protected Date parseDate(String value, boolean throwException)
424    {
425        Date dateValue = null;
426        
427        try
428        {
429            dateValue = Date.from(ZonedDateTime.parse(value, DateTimeFormatter.ISO_LOCAL_DATE).toInstant());
430        }
431        catch (Exception e)
432        {
433            dateValue = (Date) ParameterHelper.castValue(value, ParameterType.DATE);
434        }
435        
436        if (dateValue == null && throwException)
437        {
438            throw new IllegalArgumentException("'" + value + "' could not be cast as a Date.");
439        }
440        
441        return dateValue;
442    }
443    
444    /**
445     * Set a geocode metadata.
446     * @param meta the metadata holder.
447     * @param metaDef the metadata definition.
448     * @param latitude the geocode latitude as a String.
449     * @param longitude the geocode longitude as a String.
450     */
451    protected void setGeocodeMetadata(ModifiableCompositeMetadata meta, MetadataDefinition metaDef, String latitude, String longitude)
452    {
453        if (StringUtils.isNotEmpty(latitude) && StringUtils.isNotEmpty(longitude))
454        {
455            double dLong = Double.parseDouble(longitude);
456            double dLat = Double.parseDouble(latitude);
457            
458            ModifiableCompositeMetadata geoCode = meta.getCompositeMetadata(metaDef.getName(), true);
459            geoCode.setMetadata("longitude", dLong);
460            geoCode.setMetadata("latitude", dLat);
461        }
462        else
463        {
464            throw new IllegalArgumentException("Invalid geocode values: latitude='" + latitude + "', longitude='" + longitude + "'.");
465        }
466    }
467    
468    /**
469     * Set a composite metadata.
470     * @param meta the metadata holder.
471     * @param domNode the metadata DOM node.
472     * @param metaDef the metadata definition.
473     * @throws IOException If an error occurres 
474     */
475    protected void setCompositeMetadata(ModifiableCompositeMetadata meta, MetadataDefinition metaDef, Node domNode) throws IOException
476    {
477        NodeList subMetaNodes = domNode.getChildNodes();
478        if (subMetaNodes.getLength() > 0)
479        {
480            ModifiableCompositeMetadata composite = meta.getCompositeMetadata(metaDef.getName(), true);
481            for (int i = 0; i < subMetaNodes.getLength(); i++)
482            {
483                Node subMetaNode = subMetaNodes.item(i);
484                if (subMetaNode.getNodeType() == Node.ELEMENT_NODE)
485                {
486                    MetadataDefinition childDef = metaDef.getMetadataDefinition(subMetaNode.getLocalName());
487                    
488                    _importMetadata(composite, childDef, subMetaNode);
489                }
490            }
491        }
492    }
493    
494    /**
495     * Set a file metadata.
496     * @param meta the metadata holder.
497     * @param metaDef The metadata definition
498     * @param value The value
499     */
500    protected void setBinaryMetadata(ModifiableCompositeMetadata meta, MetadataDefinition metaDef, String value)
501    {
502        if (StringUtils.isNotEmpty(value))
503        {
504            
505            try
506            {
507                Pattern pattern = Pattern.compile("filename=\"([^\"]+)\"");
508                
509                URL url = new URL(value);
510                HttpURLConnection connection = (HttpURLConnection) url.openConnection();
511                connection.setConnectTimeout(1000);
512                connection.setReadTimeout(2000);
513                
514                String contentType = StringUtils.defaultString(connection.getContentType(), "application/unknown");
515                String contentEncoding = StringUtils.defaultString(connection.getContentEncoding(), "");
516                String contentDisposition = StringUtils.defaultString(connection.getHeaderField("Content-Disposition"), "");
517                String filename = URLDecoder.decode(FilenameUtils.getName(connection.getURL().getPath()), "UTF-8");
518                if (StringUtils.isEmpty(filename))
519                {
520                    Matcher matcher = pattern.matcher(contentDisposition);
521                    if (matcher.matches())
522                    {
523                        filename = matcher.group(1);
524                    }
525                    else
526                    {
527                        filename = "unknown";
528                    }
529                }
530                
531                try (InputStream is = connection.getInputStream())
532                {
533                    ByteArrayOutputStream bos = new ByteArrayOutputStream();
534                    IOUtils.copy(is, bos);
535                    
536                    ModifiableBinaryMetadata binaryMeta = meta.getBinaryMetadata(metaDef.getName(), true);
537                    binaryMeta.setLastModified(new Date());
538                    binaryMeta.setInputStream(new ByteArrayInputStream(bos.toByteArray()));
539                    
540                    if (StringUtils.isNotEmpty(filename))
541                    {
542                        binaryMeta.setFilename(filename);
543                    }
544                    if (StringUtils.isNotEmpty(contentType))
545                    {
546                        binaryMeta.setMimeType(contentType);
547                    }
548                    if (StringUtils.isNotEmpty(contentEncoding))
549                    {
550                        binaryMeta.setEncoding(contentEncoding);
551                    }
552                }
553            }
554            catch (Exception e)
555            {
556                throw new IllegalArgumentException("Unable to fetch file from URL '" + value + "', it will be ignored.", e);
557            }
558        }
559    }
560    
561    /**
562     * Set a RichText metadata.
563     * @param meta the metadata holder.
564     * @param domNode the metadata node.
565     * @param name the metadata name.
566     * @throws IOException if an error occurs.
567     */
568    protected void setRichTextMetadata(ModifiableCompositeMetadata meta, Node domNode, String name) throws IOException
569    {
570        NodeList docbookNodes = domNode.getChildNodes();
571        for (int i = 0; i < docbookNodes.getLength(); i++)
572        {
573            Node docbookNode = docbookNodes.item(i);
574            if (docbookNode.getNodeType() == Node.ELEMENT_NODE && "article".equals(docbookNode.getLocalName()))
575            {
576                try
577                {
578                    String docbook = serializeNode(docbookNode);
579                    ModifiableRichText richText = meta.getRichText(name, true);
580                    
581                    richText.setEncoding("UTF-8");
582                    richText.setLastModified(new Date());
583                    richText.setMimeType("text/xml");
584                    richText.setInputStream(new ByteArrayInputStream(docbook.getBytes("UTF-8")));
585                }
586                catch (TransformerException e)
587                {
588                    throw new IOException("Error serializing a docbook node.", e);
589                }
590            }
591        }
592    }
593    
594    /**
595     * Serialize a XML node as a String.
596     * @param node the node.
597     * @return the XML string.
598     * @throws TransformerException if an error occurs.
599     */
600    protected String serializeNode(Node node) throws TransformerException
601    {
602        Transformer transformer = TransformerFactory.newInstance().newTransformer();
603        
604        Properties format = new Properties();
605        format.put(OutputKeys.METHOD, "xml");
606        format.put(OutputKeys.ENCODING, "UTF-8");
607        
608        transformer.setOutputProperties(format);
609        
610        StringWriter writer = new StringWriter();
611        DOMSource domSource = new DOMSource(node);
612        StreamResult result = new StreamResult(writer);
613        
614        transformer.transform(domSource, result);
615        
616        return writer.toString();
617    }
618}