001/*
002 *  Copyright 2016 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.plugins.webcontentio.xml;
017
018import java.io.ByteArrayInputStream;
019import java.io.ByteArrayOutputStream;
020import java.io.File;
021import java.io.FileInputStream;
022import java.io.FileNotFoundException;
023import java.io.IOException;
024import java.io.InputStream;
025import java.io.InputStreamReader;
026import java.io.Reader;
027import java.io.StringWriter;
028import java.io.UnsupportedEncodingException;
029import java.net.HttpURLConnection;
030import java.net.URL;
031import java.net.URLDecoder;
032import java.util.Date;
033import java.util.Map;
034import java.util.Properties;
035import java.util.regex.Matcher;
036import java.util.regex.Pattern;
037
038import javax.xml.transform.OutputKeys;
039import javax.xml.transform.Transformer;
040import javax.xml.transform.TransformerException;
041import javax.xml.transform.TransformerFactory;
042import javax.xml.transform.dom.DOMSource;
043import javax.xml.transform.stream.StreamResult;
044
045import org.apache.avalon.framework.logger.AbstractLogEnabled;
046import org.apache.avalon.framework.service.ServiceException;
047import org.apache.avalon.framework.service.ServiceManager;
048import org.apache.avalon.framework.service.Serviceable;
049import org.apache.commons.io.FilenameUtils;
050import org.apache.commons.io.IOUtils;
051import org.apache.commons.lang.StringUtils;
052import org.apache.excalibur.xml.dom.DOMParser;
053import org.apache.excalibur.xml.xpath.XPathProcessor;
054import org.joda.time.format.ISODateTimeFormat;
055import org.w3c.dom.Document;
056import org.w3c.dom.Node;
057import org.w3c.dom.NodeList;
058import org.xml.sax.InputSource;
059import org.xml.sax.SAXException;
060
061import org.ametys.cms.contenttype.ContentTypesHelper;
062import org.ametys.cms.contenttype.MetadataDefinition;
063import org.ametys.cms.contenttype.RepeaterDefinition;
064import org.ametys.cms.repository.Content;
065import org.ametys.plugins.repository.metadata.ModifiableBinaryMetadata;
066import org.ametys.plugins.repository.metadata.ModifiableCompositeMetadata;
067import org.ametys.plugins.repository.metadata.ModifiableRichText;
068import org.ametys.plugins.webcontentio.ContentImporter;
069import org.ametys.runtime.parameter.ParameterHelper;
070import org.ametys.runtime.parameter.ParameterHelper.ParameterType;
071import org.ametys.web.repository.content.ModifiableWebContent;
072import org.ametys.web.repository.page.ModifiablePage;
073
074/**
075 * Default XML content importer
076 */
077public class XmlContentImporter extends AbstractLogEnabled implements ContentImporter, Serviceable
078{
079    private DOMParser _domParser;
080    private XPathProcessor _xPathProcessor;
081    private ContentTypesHelper _cTypeHelper;
082
083    @Override
084    public void service(ServiceManager manager) throws ServiceException
085    {
086        _domParser = (DOMParser) manager.lookup(DOMParser.ROLE);
087        _xPathProcessor = (XPathProcessor) manager.lookup(XPathProcessor.ROLE);
088        _cTypeHelper = (ContentTypesHelper) manager.lookup(ContentTypesHelper.ROLE);
089    }
090    
091    @Override
092    public void importContent(File file, ModifiableWebContent content, Map<String, String> params) throws IOException
093    {
094        Document doc = getXmlDocFromFile(file);
095
096        if (doc == null)
097        {
098            throw new IOException("Unable to retrieve the xml document from the file received.");
099        }
100        
101        Node xmlContent = _xPathProcessor.selectSingleNode(doc, "/content");
102        String contentType = _xPathProcessor.evaluateAsString(xmlContent, "@type");
103        
104        if (contentType == null)
105        {
106            throw new IOException("Invalid file content : no content type specified.");
107        }
108        content.setTypes(new String[] {contentType});
109        
110        Node title = _xPathProcessor.selectSingleNode(xmlContent, "title");
111        
112        if (title == null)
113        {
114            throw new IOException("Invalid file content : no title found, but it is mandatory.");
115        }
116
117        _importMetadata(content, xmlContent);
118    }
119    
120    @Override
121    public String[] getMimeTypes()
122    {
123        // handles xml mime-types
124        return new String[] {"application/xml", "text/xml"};
125    }
126    
127    @Override
128    public void postTreatment(ModifiablePage page, Content content, File file) throws IOException
129    {
130        // Nothing to do
131    }
132    
133    private Document getXmlDocFromFile(File file) throws FileNotFoundException, UnsupportedEncodingException, IOException
134    {
135        InputStream is = new FileInputStream(file);
136        Reader reader = new InputStreamReader(is, "UTF-8");
137        Document doc = null;
138        try
139        {
140            doc = _domParser.parseDocument(new InputSource(reader));
141        }
142        catch (SAXException e)
143        {
144            getLogger().error("[IMPORT] Unable to parse imported file " + file.getName(), e);
145        }
146        return doc;
147    }
148    
149    private void _importMetadata(ModifiableWebContent content, Node domNode) throws IOException
150    {
151        NodeList childNodes = domNode.getChildNodes();
152        for (int i = 0; i < childNodes.getLength(); i++)
153        {
154            Node metadataNode = childNodes.item(i);
155            
156            if (metadataNode.getNodeType() == Node.ELEMENT_NODE)
157            {
158                MetadataDefinition metaDef = _cTypeHelper.getMetadataDefinition(metadataNode.getLocalName(), content);
159                _importMetadata(content.getMetadataHolder(), metaDef, metadataNode);
160            }
161        }
162    }
163
164    private void _importMetadata(ModifiableCompositeMetadata contentNode, MetadataDefinition metaDef, Node domNode) throws IOException
165    {
166        if (metaDef != null)
167        {
168            if (metaDef instanceof RepeaterDefinition)
169            {
170                _setRepeater(contentNode, (RepeaterDefinition) metaDef, domNode);
171            }
172            else
173            {
174                _setMetadata(contentNode, metaDef, domNode);
175            }
176        }
177    }
178    
179    private void _setRepeater(ModifiableCompositeMetadata meta, RepeaterDefinition repeaterDef, Node domNode) throws IOException
180    {
181        ModifiableCompositeMetadata repeaterMeta = meta.getCompositeMetadata(repeaterDef.getName(), true);
182        
183        NodeList entryNodes = _xPathProcessor.selectNodeList(domNode, "entry");
184        for (int i = 0; i < entryNodes.getLength(); i++)
185        {
186            Node entryNode = entryNodes.item(i);
187            String entryName = Integer.toString(i + 1);
188            
189            ModifiableCompositeMetadata entryMeta = repeaterMeta.getCompositeMetadata(entryName, true);
190            NodeList subMetaNodes = entryNode.getChildNodes();
191            for (int j = 0; j < subMetaNodes.getLength(); j++)
192            {
193                Node subMetaNode = subMetaNodes.item(j);
194                if (subMetaNode.getNodeType() == Node.ELEMENT_NODE)
195                {
196                    String subMetaName = subMetaNode.getLocalName();
197                    MetadataDefinition childDef = repeaterDef.getMetadataDefinition(subMetaName);
198                    
199                    _importMetadata(entryMeta, childDef, subMetaNode);
200                }
201            }
202        }
203    }
204
205    private void _setMetadata(ModifiableCompositeMetadata contentNode, MetadataDefinition metaDef, Node domNode) throws IOException
206    {
207        switch (metaDef.getType())
208        {
209            case STRING:
210                setStringMetadata(contentNode, metaDef, getMetadataValues(metaDef, domNode));
211                break;
212            case BOOLEAN:
213                setBooleanMetadata(contentNode, metaDef, getMetadataValues(metaDef, domNode));
214                break;
215            case DOUBLE:
216                setDoubleMetadata(contentNode, metaDef, getMetadataValues(metaDef, domNode));
217                break;
218            case LONG:
219                setLongMetadata(contentNode, metaDef, getMetadataValues(metaDef, domNode));
220                break;
221            case COMPOSITE:
222                setCompositeMetadata(contentNode, metaDef, domNode);
223                break;
224            case DATE:
225            case DATETIME:
226                setDateMetadata(contentNode, metaDef, getMetadataValues(metaDef, domNode));
227                break;
228            case BINARY:
229            case FILE:
230                String[] values = getMetadataValues(metaDef, domNode);
231                if (values.length > 0)
232                {
233                    setBinaryMetadata(contentNode, metaDef, values[0]);
234                }
235                break;
236            case GEOCODE:
237                Node latitudeNode = _xPathProcessor.selectSingleNode(domNode, "latitude");
238                Node longitudeNode = _xPathProcessor.selectSingleNode(domNode, "longitude");
239                setGeocodeMetadata(contentNode, metaDef, latitudeNode.getTextContent(), longitudeNode.getTextContent());
240                break;
241            case REFERENCE:
242                break;
243            case RICH_TEXT:
244                setRichTextMetadata(contentNode, domNode, metaDef.getName());
245                break;
246            case USER:
247                break;
248            default:
249                break;
250        }
251    }
252
253    private String[] getMetadataValues(MetadataDefinition metaDef, Node domNode)
254    {
255        String[] values;
256        if (metaDef.isMultiple())
257        {
258            NodeList valuesNodeList = _xPathProcessor.selectNodeList(domNode, "value");
259            values = new String[valuesNodeList.getLength()];
260            for (int i = 0; i < valuesNodeList.getLength(); i++)
261            {
262                values[i] = valuesNodeList.item(i).getTextContent();
263            }
264        }
265        else
266        {
267            values = new String[1];
268            values[0] = domNode.getTextContent();
269        }
270        return values;
271    }
272
273    private void setStringMetadata(ModifiableCompositeMetadata meta, MetadataDefinition metaDef, String[] values)
274    {
275        if (values != null)
276        {
277            if (metaDef.isMultiple())
278            {
279                meta.setMetadata(metaDef.getName(), values);
280            }
281            else
282            {
283                meta.setMetadata(metaDef.getName(), values[0]);
284            }
285        }
286    }
287    
288    /**
289     * Set a boolean metadata.
290     * @param meta the metadata holder.
291     * @param metaDef the metadata definition.
292     * @param values the metadata values.
293     */
294    protected void setBooleanMetadata(ModifiableCompositeMetadata meta, MetadataDefinition metaDef, String[] values)
295    {
296        if (values != null)
297        {
298            if (metaDef.isMultiple())
299            {
300                boolean[] bValues = new boolean[values.length];
301                for (int i = 0; i < values.length; i++)
302                {
303                    bValues[i] = Boolean.parseBoolean(values[i]);
304                }
305                
306                meta.setMetadata(metaDef.getName(), bValues);
307            }
308            else
309            {
310                meta.setMetadata(metaDef.getName(), Boolean.parseBoolean(values[0]));
311            }
312        }
313    }
314    
315    /**
316     * Set a long metadata.
317     * @param meta the metadata holder.
318     * @param metaDef the metadata definition.
319     * @param values the metadata values.
320     */
321    protected void setLongMetadata(ModifiableCompositeMetadata meta, MetadataDefinition metaDef, String[] values)
322    {
323        if (values != null)
324        {
325            if (metaDef.isMultiple())
326            {
327                long[] lValues = new long[values.length];
328                for (int i = 0; i < values.length; i++)
329                {
330                    lValues[i] = Long.parseLong(values[i]);
331                }
332                
333                meta.setMetadata(metaDef.getName(), lValues);
334            }
335            else
336            {
337                meta.setMetadata(metaDef.getName(), Long.parseLong(values[0]));
338            }
339        }
340    }
341    
342    /**
343     * Set a double metadata.
344     * @param meta the metadata holder.
345     * @param metaDef the metadata definition.
346     * @param values the metadata values.
347     */
348    protected void setDoubleMetadata(ModifiableCompositeMetadata meta, MetadataDefinition metaDef, String[] values)
349    {
350        if (values != null)
351        {
352            if (metaDef.isMultiple())
353            {
354                double[] dValues = new double[values.length];
355                for (int i = 0; i < values.length; i++)
356                {
357                    dValues[i] = Double.parseDouble(values[i]);
358                }
359                
360                meta.setMetadata(metaDef.getName(), dValues);
361            }
362            else
363            {
364                meta.setMetadata(metaDef.getName(), Double.parseDouble(values[0]));
365            }
366        }
367    }
368    
369    /**
370     * Set a date or datetime metadata.
371     * @param meta the metadata holder.
372     * @param metaDef the metadata definition.
373     * @param values the metadata values.
374     */
375    protected void setDateMetadata(ModifiableCompositeMetadata meta, MetadataDefinition metaDef, String[] values)
376    {
377        if (values != null)
378        {
379            if (metaDef.isMultiple())
380            {
381                Date[] dValues = new Date[values.length];
382                for (int i = 0; i < values.length; i++)
383                {
384                    dValues[i] = parseDate(values[i]);
385                }
386                
387                meta.setMetadata(metaDef.getName(), dValues);
388            }
389            else
390            {
391                meta.setMetadata(metaDef.getName(), parseDate(values[0]));
392            }
393        }
394    }
395    
396    /**
397     * Parse a String value as a Date.<br>
398     * Allowed formats:
399     * <ul>
400     *   <li>yyyy-MM-dd</li>
401     *   <li>yyyy-MM-dd'T'HH:mm:ss.SSSZZ</li>
402     * </ul>
403     * @param value the String value.
404     * @return the parsed Date or <code>null</code> if the value can't be parsed.
405     */
406    protected Date parseDate(String value)
407    {
408        return parseDate(value, false);
409    }
410    
411    /**
412     * Parse a String value as a Date.<br>
413     * Allowed formats:
414     * <ul>
415     *   <li>yyyy-MM-dd</li>
416     *   <li>yyyy-MM-dd'T'HH:mm:ss.SSSZZ</li>
417     * </ul>
418     * @param value the String value.
419     * @param throwException true to throw an exception if the value can't be parsed, false to return null.
420     * @return the parsed Date or <code>null</code> if the value can't be parsed and throwException is false.
421     */
422    protected Date parseDate(String value, boolean throwException)
423    {
424        Date dateValue = null;
425        
426        try
427        {
428            dateValue = ISODateTimeFormat.date().parseDateTime(value).toDate();
429        }
430        catch (Exception e)
431        {
432            dateValue = (Date) ParameterHelper.castValue(value, ParameterType.DATE);
433        }
434        
435        if (dateValue == null && throwException)
436        {
437            throw new IllegalArgumentException("'" + value + "' could not be cast as a Date.");
438        }
439        
440        return dateValue;
441    }
442    
443    /**
444     * Set a geocode metadata.
445     * @param meta the metadata holder.
446     * @param metaDef the metadata definition.
447     * @param latitude the geocode latitude as a String.
448     * @param longitude the geocode longitude as a String.
449     */
450    protected void setGeocodeMetadata(ModifiableCompositeMetadata meta, MetadataDefinition metaDef, String latitude, String longitude)
451    {
452        if (StringUtils.isNotEmpty(latitude) && StringUtils.isNotEmpty(longitude))
453        {
454            double dLong = Double.parseDouble(longitude);
455            double dLat = Double.parseDouble(latitude);
456            
457            ModifiableCompositeMetadata geoCode = meta.getCompositeMetadata(metaDef.getName(), true);
458            geoCode.setMetadata("longitude", dLong);
459            geoCode.setMetadata("latitude", dLat);
460        }
461        else
462        {
463            throw new IllegalArgumentException("Invalid geocode values: latitude='" + latitude + "', longitude='" + longitude + "'.");
464        }
465    }
466    
467    /**
468     * Set a composite metadata.
469     * @param meta the metadata holder.
470     * @param domNode the metadata DOM node.
471     * @param metaDef the metadata definition.
472     * @throws IOException If an error occurres 
473     */
474    protected void setCompositeMetadata(ModifiableCompositeMetadata meta, MetadataDefinition metaDef, Node domNode) throws IOException
475    {
476        NodeList subMetaNodes = domNode.getChildNodes();
477        if (subMetaNodes.getLength() > 0)
478        {
479            ModifiableCompositeMetadata composite = meta.getCompositeMetadata(metaDef.getName(), true);
480            for (int i = 0; i < subMetaNodes.getLength(); i++)
481            {
482                Node subMetaNode = subMetaNodes.item(i);
483                if (subMetaNode.getNodeType() == Node.ELEMENT_NODE)
484                {
485                    MetadataDefinition childDef = metaDef.getMetadataDefinition(subMetaNode.getLocalName());
486                    
487                    _importMetadata(composite, childDef, subMetaNode);
488                }
489            }
490        }
491    }
492    
493    /**
494     * Set a file metadata.
495     * @param meta the metadata holder.
496     * @param metaDef The metadata definition
497     * @param value The value
498     */
499    protected void setBinaryMetadata(ModifiableCompositeMetadata meta, MetadataDefinition metaDef, String value)
500    {
501        if (StringUtils.isNotEmpty(value))
502        {
503            
504            try
505            {
506                Pattern pattern = Pattern.compile("filename=\"([^\"]+)\"");
507                
508                URL url = new URL(value);
509                HttpURLConnection connection = (HttpURLConnection) url.openConnection();
510                connection.setConnectTimeout(1000);
511                connection.setReadTimeout(2000);
512                
513                String contentType = StringUtils.defaultString(connection.getContentType(), "application/unknown");
514                String contentEncoding = StringUtils.defaultString(connection.getContentEncoding(), "");
515                String contentDisposition = StringUtils.defaultString(connection.getHeaderField("Content-Disposition"), "");
516                String filename = URLDecoder.decode(FilenameUtils.getName(connection.getURL().getPath()), "UTF-8");
517                if (StringUtils.isEmpty(filename))
518                {
519                    Matcher matcher = pattern.matcher(contentDisposition);
520                    if (matcher.matches())
521                    {
522                        filename = matcher.group(1);
523                    }
524                    else
525                    {
526                        filename = "unknown";
527                    }
528                }
529                
530                try (InputStream is = connection.getInputStream())
531                {
532                    ByteArrayOutputStream bos = new ByteArrayOutputStream();
533                    IOUtils.copy(is, bos);
534                    
535                    ModifiableBinaryMetadata binaryMeta = meta.getBinaryMetadata(metaDef.getName(), true);
536                    binaryMeta.setLastModified(new Date());
537                    binaryMeta.setInputStream(new ByteArrayInputStream(bos.toByteArray()));
538                    
539                    if (StringUtils.isNotEmpty(filename))
540                    {
541                        binaryMeta.setFilename(filename);
542                    }
543                    if (StringUtils.isNotEmpty(contentType))
544                    {
545                        binaryMeta.setMimeType(contentType);
546                    }
547                    if (StringUtils.isNotEmpty(contentEncoding))
548                    {
549                        binaryMeta.setEncoding(contentEncoding);
550                    }
551                }
552            }
553            catch (Exception e)
554            {
555                throw new IllegalArgumentException("Unable to fetch file from URL '" + value + "', it will be ignored.", e);
556            }
557        }
558    }
559    
560    /**
561     * Set a RichText metadata.
562     * @param meta the metadata holder.
563     * @param domNode the metadata node.
564     * @param name the metadata name.
565     * @throws IOException if an error occurs.
566     */
567    protected void setRichTextMetadata(ModifiableCompositeMetadata meta, Node domNode, String name) throws IOException
568    {
569        NodeList docbookNodes = domNode.getChildNodes();
570        for (int i = 0; i < docbookNodes.getLength(); i++)
571        {
572            Node docbookNode = docbookNodes.item(i);
573            if (docbookNode.getNodeType() == Node.ELEMENT_NODE && "article".equals(docbookNode.getLocalName()))
574            {
575                try
576                {
577                    String docbook = serializeNode(docbookNode);
578                    ModifiableRichText richText = meta.getRichText(name, true);
579                    
580                    richText.setEncoding("UTF-8");
581                    richText.setLastModified(new Date());
582                    richText.setMimeType("text/xml");
583                    richText.setInputStream(new ByteArrayInputStream(docbook.getBytes("UTF-8")));
584                }
585                catch (TransformerException e)
586                {
587                    throw new IOException("Error serializing a docbook node.", e);
588                }
589            }
590        }
591    }
592    
593    /**
594     * Serialize a XML node as a String.
595     * @param node the node.
596     * @return the XML string.
597     * @throws TransformerException if an error occurs.
598     */
599    protected String serializeNode(Node node) throws TransformerException
600    {
601        Transformer transformer = TransformerFactory.newInstance().newTransformer();
602        
603        Properties format = new Properties();
604        format.put(OutputKeys.METHOD, "xml");
605        format.put(OutputKeys.ENCODING, "UTF-8");
606        
607        transformer.setOutputProperties(format);
608        
609        StringWriter writer = new StringWriter();
610        DOMSource domSource = new DOMSource(node);
611        StreamResult result = new StreamResult(writer);
612        
613        transformer.transform(domSource, result);
614        
615        return writer.toString();
616    }
617}