001/*
002 *  Copyright 2015 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.cms.content.indexing.solr;
017
018import java.io.IOException;
019import java.io.InputStream;
020import java.util.Arrays;
021import java.util.Collection;
022import java.util.Date;
023import java.util.HashMap;
024import java.util.Iterator;
025import java.util.List;
026import java.util.Locale;
027import java.util.Map;
028import java.util.Map.Entry;
029import java.util.Optional;
030import java.util.Set;
031import java.util.stream.Collectors;
032
033import org.apache.avalon.framework.component.Component;
034import org.apache.avalon.framework.service.ServiceException;
035import org.apache.avalon.framework.service.ServiceManager;
036import org.apache.avalon.framework.service.Serviceable;
037import org.apache.commons.lang3.ArrayUtils;
038import org.apache.commons.lang3.StringUtils;
039import org.apache.excalibur.xml.sax.SAXParser;
040import org.apache.solr.common.SolrInputDocument;
041import org.apache.tika.Tika;
042import org.apache.tika.exception.TikaException;
043import org.xml.sax.InputSource;
044import org.xml.sax.SAXException;
045
046import org.ametys.cms.content.ContentHelper;
047import org.ametys.cms.content.RichTextHandler;
048import org.ametys.cms.content.indexing.solr.content.attachment.ContentVisibleAttachmentIndexerExtensionPoint;
049import org.ametys.cms.content.references.OutgoingReferences;
050import org.ametys.cms.content.references.OutgoingReferencesExtractor;
051import org.ametys.cms.contenttype.ContentConstants;
052import org.ametys.cms.contenttype.ContentType;
053import org.ametys.cms.contenttype.ContentTypeExtensionPoint;
054import org.ametys.cms.contenttype.ContentTypesHelper;
055import org.ametys.cms.contenttype.MetadataDefinition;
056import org.ametys.cms.contenttype.MetadataManager;
057import org.ametys.cms.contenttype.MetadataType;
058import org.ametys.cms.contenttype.RepeaterDefinition;
059import org.ametys.cms.contenttype.indexing.CustomIndexingField;
060import org.ametys.cms.contenttype.indexing.CustomMetadataIndexingField;
061import org.ametys.cms.contenttype.indexing.IndexingField;
062import org.ametys.cms.contenttype.indexing.IndexingModel;
063import org.ametys.cms.contenttype.indexing.MetadataIndexingField;
064import org.ametys.cms.languages.Language;
065import org.ametys.cms.languages.LanguagesManager;
066import org.ametys.cms.repository.Content;
067import org.ametys.cms.repository.DefaultContent;
068import org.ametys.cms.search.model.SystemProperty;
069import org.ametys.cms.search.model.SystemPropertyExtensionPoint;
070import org.ametys.core.user.UserIdentity;
071import org.ametys.plugins.core.user.UserHelper;
072import org.ametys.plugins.explorer.resources.Resource;
073import org.ametys.plugins.explorer.resources.metadata.TikaProvider;
074import org.ametys.plugins.repository.AmetysObject;
075import org.ametys.plugins.repository.AmetysObjectIterable;
076import org.ametys.plugins.repository.AmetysObjectResolver;
077import org.ametys.plugins.repository.AmetysRepositoryException;
078import org.ametys.plugins.repository.TraversableAmetysObject;
079import org.ametys.plugins.repository.UnknownAmetysObjectException;
080import org.ametys.plugins.repository.metadata.BinaryMetadata;
081import org.ametys.plugins.repository.metadata.CompositeMetadata;
082import org.ametys.plugins.repository.metadata.MultilingualString;
083import org.ametys.plugins.repository.metadata.MultilingualStringHelper;
084import org.ametys.plugins.repository.metadata.RichText;
085import org.ametys.runtime.plugin.component.AbstractLogEnabled;
086
087/**
088 * Component for {@link Content} indexing into a Solr server.
089 */
090public class SolrContentIndexer extends AbstractLogEnabled implements Component, Serviceable, SolrFieldNames
091{
092    /** The component role. */
093    public static final String ROLE = SolrContentIndexer.class.getName();
094    
095    /** The Ametys objet resolver */
096    protected AmetysObjectResolver _resolver;
097    /** The content type extension point */
098    protected ContentTypeExtensionPoint _cTypeEP;
099    /** The content type helper */
100    protected ContentTypesHelper _cTypesHelper;
101    /** The users manager */
102    protected UserHelper _userHelper;
103    /** The Tika instance */
104    protected Tika _tika;
105    /** The resource indexer */
106    protected SolrResourceIndexer _resourceIndexer;
107    /** The system property extension point. */
108    protected SystemPropertyExtensionPoint _systemPropEP;
109    /** The content helper */
110    protected ContentHelper _contentHelper;
111    /** The outgoing references extractor */
112    protected OutgoingReferencesExtractor _outgoingReferencesExtractor;
113    /** The extension point for ContentVisibleAttachmentIndexers */
114    protected ContentVisibleAttachmentIndexerExtensionPoint _contentVisibleAttachmentIndexerEP;
115    /** The manager for languages */
116    protected LanguagesManager _languagesManager;
117    /** Avalon service manager */
118    protected ServiceManager _manager;
119    
120    @Override
121    public void service(ServiceManager manager) throws ServiceException
122    {
123        _manager = manager;
124        _resolver = (AmetysObjectResolver) manager.lookup(AmetysObjectResolver.ROLE);
125        _resourceIndexer = (SolrResourceIndexer) manager.lookup(SolrResourceIndexer.ROLE);
126        _cTypeEP = (ContentTypeExtensionPoint) manager.lookup(ContentTypeExtensionPoint.ROLE);
127        _contentHelper = (ContentHelper) manager.lookup(ContentHelper.ROLE);
128        _cTypesHelper = (ContentTypesHelper) manager.lookup(ContentTypesHelper.ROLE);
129        _userHelper = (UserHelper) manager.lookup(UserHelper.ROLE);
130        TikaProvider tikaProvider = (TikaProvider) manager.lookup(TikaProvider.ROLE);
131        _tika = tikaProvider.getTika();
132        _systemPropEP = (SystemPropertyExtensionPoint) manager.lookup(SystemPropertyExtensionPoint.ROLE);
133        _outgoingReferencesExtractor = (OutgoingReferencesExtractor) manager.lookup(OutgoingReferencesExtractor.ROLE);
134        _contentVisibleAttachmentIndexerEP = (ContentVisibleAttachmentIndexerExtensionPoint) manager.lookup(ContentVisibleAttachmentIndexerExtensionPoint.ROLE);
135        _languagesManager = (LanguagesManager) manager.lookup(LanguagesManager.ROLE);
136    }
137    
138    /**
139     * Populate a solr input document by adding fields to index into it.
140     * @param content The content to index
141     * @param document The main solr document to index into
142     * @param additionalDocuments The additional documents for repeater instances
143     * @throws Exception if an error occurred while indexing
144     */
145    public void indexContent(Content content, SolrInputDocument document, List<SolrInputDocument> additionalDocuments) throws Exception
146    {
147        // Properties specific to a stand-alone indexation.
148        String contentId = content.getId();
149        document.addField(ID, contentId);
150        document.addField(DOCUMENT_TYPE, TYPE_CONTENT);
151        
152        indexContentTitle(content, document); 
153       
154        document.addField(CONTENT_NAME, SolrIndexer.truncateUtf8StringValue(content.getName(), getLogger(), contentId, CONTENT_NAME));
155        _indexOutgoingReferences(content, document);
156        _indexVisibleAttachments(content, document);
157        
158        document.addField(WORKFLOW_REF_DV, contentId + "#workflow");
159        
160        // Index content system properties.
161        indexSystemProperties(content, document);
162        
163        // Index the fields specified in the indexation model.
164        indexModelFields(content, document, additionalDocuments);
165    }
166    
167    private void _indexOutgoingReferences(Content content, SolrInputDocument document)
168    {
169        // Found by the extractor (resource references found in all metadata of the content)
170        _outgoingReferencesExtractor.getOutgoingReferences(content).values() // key is the metadata,we do not care what metadata it comes from
171                .parallelStream()
172                .map(OutgoingReferences::entrySet)
173                .flatMap(Set::parallelStream)
174                .filter(outgoingRefs -> outgoingRefs.getKey().equals("explorer")) // only references of the resource explorer
175                .map(Entry::getValue)
176                .flatMap(List::parallelStream) // flat the resource ids
177                .forEach(resourceId -> document.addField(CONTENT_OUTGOING_REFEERENCES_RESOURCE_IDS, resourceId));
178        
179        // Attachments of the content (just the root folder)
180        Optional.ofNullable(content.getRootAttachments())
181                .map(AmetysObject::getId)
182                .ifPresent(id -> document.addField(CONTENT_OUTGOING_REFEERENCES_RESOURCE_IDS, id));
183    }
184    
185    private void _indexVisibleAttachments(Content content, SolrInputDocument document)
186    {
187        Collection<String> values = _contentVisibleAttachmentIndexerEP.getExtensionsIds()
188                .stream()
189                .map(_contentVisibleAttachmentIndexerEP::getExtension)
190                .map(attachmentIndexer -> attachmentIndexer.getVisibleAttachmentIds(content))
191                .flatMap(Collection::stream)
192                .collect(Collectors.toList());
193        document.addField(CONTENT_VISIBLE_ATTACHMENT_RESOURCE_IDS, values);
194    }
195    
196    /**
197     * Index the content title
198     * @param content The title
199     * @param document The main solr document to index into
200     */
201    protected void indexContentTitle(Content content, SolrInputDocument document)
202    {
203        if (content.getMetadataHolder().getType("title") == org.ametys.plugins.repository.metadata.CompositeMetadata.MetadataType.MULTILINGUAL_STRING)    
204        {
205            MultilingualString value = content.getMetadataHolder().getMultilingualString(DefaultContent.METADATA_TITLE);
206            indexMultilingualStringValues(value, content.getId(), document, null, TITLE);
207        }
208        else
209        {
210            String title = _contentHelper.getTitle(content);
211            document.addField(TITLE, SolrIndexer.truncateUtf8StringValue(title, getLogger(), content.getId(), TITLE));
212            document.addField(TITLE_SORT, title);
213        }
214    }
215    
216    /**
217     * Index the system properties of a content.
218     * @param content The content to index.
219     * @param document The solr document to index into.
220     */
221    protected void indexSystemProperties(Content content, SolrInputDocument document)
222    {
223        for (String sysPropId : _systemPropEP.getExtensionsIds())
224        {
225            SystemProperty sysProp = _systemPropEP.getExtension(sysPropId);
226            
227            sysProp.index(content, document);
228        }
229    }
230    
231    /**
232     * Index the content type and all its supertypes in the given document (recursively).
233     * @param cTypeId The ID of the content type to index.
234     * @param document The solr document to index into.
235     * @param fieldName The field name.
236     */
237    protected void indexAllContentTypes(String cTypeId, SolrInputDocument document, String fieldName)
238    {
239        document.addField(fieldName, cTypeId);
240        
241        if (_cTypeEP.hasExtension(cTypeId))
242        {
243            ContentType contentType = _cTypeEP.getExtension(cTypeId);
244            for (String supertypeId : contentType.getSupertypeIds())
245            {
246                indexAllContentTypes(supertypeId, document, fieldName);
247            }
248        }
249    }
250    
251    /**
252     * Index the fields specified in the indexation model.
253     * @param content The content to index.
254     * @param document The main content solr document.
255     * @param additionalDocuments The additional documents for repeater instances.
256     */
257    protected void indexModelFields(Content content, SolrInputDocument document, List<SolrInputDocument> additionalDocuments)
258    {
259        IndexingModel indexingModel = null;
260        try
261        {
262            indexingModel = _cTypesHelper.getIndexingModel(content);
263        }
264        catch (RuntimeException e)
265        {
266            getLogger().error("indexContent > Error getting the indexing model of content " + content.getId(), e);
267            throw e;
268        }
269        
270        for (IndexingField field : indexingModel.getFields())
271        {
272            if (field instanceof CustomIndexingField)
273            {
274                Object[] values = ((CustomIndexingField) field).getValues(content);
275                indexValues(content, field.getName(), field.getType(), values, document, null);
276            }
277            else if (field instanceof MetadataIndexingField)
278            {
279                String metadataPath = ((MetadataIndexingField) field).getMetadataPath();
280                String[] pathSegments = metadataPath.split(ContentConstants.METADATA_PATH_SEPARATOR);
281                
282                MetadataDefinition definition = _cTypesHelper.getMetadataDefinition(pathSegments[0], content.getTypes(), content.getMixinTypes());
283                if (definition != null)
284                {
285                    findAndIndexMetadata(content, pathSegments, content.getMetadataHolder(), definition, field, field.getName(), document, null, additionalDocuments);
286                }
287            }
288        }
289    }
290    
291    /**
292     * Populate a Solr input document by adding fields for a single system property.
293     * @param content The content to index
294     * @param propertyId The system property ID.
295     * @param document The solr document
296     * @return true if there are partial update to apply
297     * @throws Exception if an error occurred
298     */
299    public boolean indexPartialSystemProperty(Content content, String propertyId, SolrInputDocument document) throws Exception
300    {
301        if (!_systemPropEP.hasExtension(propertyId))
302        {
303            throw new IllegalStateException("The property '" + propertyId + "' can't be indexed as it does not exist.");
304        }
305        
306        SolrInputDocument tempDocument = new SolrInputDocument();
307        
308        SystemProperty property = _systemPropEP.getExtension(propertyId);
309        property.index(content, tempDocument);
310        
311        if (tempDocument.isEmpty())
312        {
313            // Does not have any partial update to apply, avoid to erase all the existing fields on the Solr document corresponding to this content (it would be lost)
314            return false;
315        }
316        
317        // Copy the indexed values as partial updates.
318        for (String fieldName : tempDocument.getFieldNames())
319        {
320            Collection<Object> fieldValues = tempDocument.getFieldValues(fieldName);
321            
322            Map<String, Object> partialUpdate = new HashMap<>();
323            partialUpdate.put("set", fieldValues);
324            document.addField(fieldName, partialUpdate);
325        }
326        
327        document.addField("id", content.getId());
328        
329        return true;
330    }
331    
332    /**
333     * Find the metadata to index from its path
334     * @param content the content currently being traversed.
335     * @param pathSegments The segments of path of metadata to index
336     * @param metadata The parent composite metadata
337     * @param definition The metadata definition
338     * @param field the current indexing field.
339     * @param fieldName the name of the field to index.
340     * @param document The main solr document to index into
341     * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself).
342     * @param additionalDocuments The additional documents
343     */
344    protected void findAndIndexMetadata(Content content, String[] pathSegments, CompositeMetadata metadata, MetadataDefinition definition, IndexingField field, String fieldName, SolrInputDocument document, SolrInputDocument contentDoc, List<SolrInputDocument> additionalDocuments)
345    {
346        String currentFieldName = pathSegments[0];
347        
348        IndexingModel indexingModel = null;
349        try
350        {
351            indexingModel = _cTypesHelper.getIndexingModel(content);
352        }
353        catch (RuntimeException e)
354        {
355            if (content != null)
356            {
357                getLogger().error("findAndIndexMetadata > Error while indexing content " + content.getId() + " metadata", e);
358            }
359            else
360            {
361                getLogger().error("findAndIndexMetadata > Error while indexing null content metadata");
362            }
363            throw e;
364        }
365        
366        IndexingField refField = indexingModel.getField(currentFieldName);
367        if (refField != null && refField instanceof CustomMetadataIndexingField)
368        {
369            CustomMetadataIndexingField overridingField = (CustomMetadataIndexingField) refField;
370            findAndIndexOverridingField(content, indexingModel, overridingField, fieldName, definition, pathSegments, document, contentDoc, additionalDocuments);
371        }
372        else
373        {
374            if (metadata.hasMetadata(currentFieldName))
375            {
376                findAndIndexMetadataField(content, pathSegments, metadata, definition, field, fieldName, document, contentDoc, additionalDocuments);
377            }
378        }
379    }
380
381    /**
382     * Find and index a metadata.
383     * @param content the current content being traversed.
384     * @param pathSegments the full metadata path segments.
385     * @param metadata the current metadata holder.
386     * @param definition the current metadata definition.
387     * @param field the current indexing field.
388     * @param fieldName the name of the field to index.
389     * @param document the solr main document.
390     * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 
391     * @param additionalDocuments the solr additional documents.
392     */
393    protected void findAndIndexMetadataField(Content content, String[] pathSegments, CompositeMetadata metadata, MetadataDefinition definition, IndexingField field, String fieldName, SolrInputDocument document, SolrInputDocument contentDoc, List<SolrInputDocument> additionalDocuments)
394    {
395        String currentFieldName = pathSegments[0];
396        
397        if (pathSegments.length == 1)
398        {
399            indexMetadata(content, currentFieldName, metadata, document, contentDoc, additionalDocuments, fieldName, definition);
400            return;
401        }
402        
403        String[] followingSegments = ArrayUtils.subarray(pathSegments, 1, pathSegments.length);
404        
405        switch (definition.getType())
406        {
407            case BINARY:
408            case BOOLEAN:
409            case STRING:
410            case MULTILINGUAL_STRING:
411            case USER:
412            case LONG:
413            case DOUBLE:
414            case DATE:
415            case DATETIME:
416            case REFERENCE:
417            case RICH_TEXT:
418            case FILE:
419            case GEOCODE:
420                getLogger().warn("The metadata '{}' of type {} can not be a part of a path to index : {}", currentFieldName, definition.getType().toString(), StringUtils.join(pathSegments, ContentConstants.METADATA_PATH_SEPARATOR));
421                break;
422            case CONTENT:
423                String[] contentIds = metadata.getStringArray(currentFieldName, new String[0]);
424                for (String contentId : contentIds)
425                {
426                    try
427                    {
428                        Content refContent = _resolver.resolveById(contentId);
429                        MetadataDefinition remoteMetadataDef = _cTypesHelper.getMetadataDefinition(followingSegments[0], refContent.getTypes(), refContent.getMixinTypes());
430                        findAndIndexMetadata(refContent, followingSegments, refContent.getMetadataHolder(), remoteMetadataDef, field, fieldName, document, contentDoc, additionalDocuments);
431                    }
432                    catch (UnknownAmetysObjectException e)
433                    {
434                        // Nothing to index
435                    }
436                }
437                break;
438            case SUB_CONTENT:
439                TraversableAmetysObject objectCollection = metadata.getObjectCollection(currentFieldName);
440                AmetysObjectIterable<Content> subcontents = objectCollection.getChildren();
441                for (Content subcontent : subcontents)
442                {
443                    MetadataDefinition remoteMetadataDef = _cTypesHelper.getMetadataDefinition(followingSegments[0], subcontent.getTypes(), subcontent.getMixinTypes());
444                    findAndIndexMetadata(subcontent, followingSegments, subcontent.getMetadataHolder(), remoteMetadataDef, field, fieldName, document, contentDoc, additionalDocuments);
445                }
446                break;
447            case COMPOSITE:
448                CompositeMetadata composite = metadata.getCompositeMetadata(currentFieldName);
449   
450                if (definition instanceof RepeaterDefinition)
451                {
452                    String[] entries = composite.getMetadataNames();
453                    for (String entry : entries)
454                    {
455                        findAndIndexMetadata(content, followingSegments, composite.getCompositeMetadata(entry), definition.getMetadataDefinition(followingSegments[0]), field, fieldName, document, contentDoc, additionalDocuments);
456                    }
457                }
458                else
459                {
460                    findAndIndexMetadata(content, followingSegments, composite, definition.getMetadataDefinition(followingSegments[0]), field, fieldName, document, contentDoc, additionalDocuments);
461                }
462                break;
463            default:
464                break;
465            
466        }
467    }
468    
469    /**
470     * Find and index a property represented by an overriding field.
471     * @param content the current content being traversed.
472     * @param indexingModel the current indexing model.
473     * @param pathSegments the full metadata path segments.
474     * @param definition the current metadata definition.
475     * @param field the current indexing field.
476     * @param fieldName the name of the field to index.
477     * @param document the solr main document.
478     * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself).
479     * @param additionalDocuments the solr additional documents.
480     */
481    protected void findAndIndexOverridingField(Content content, IndexingModel indexingModel, CustomMetadataIndexingField field, String fieldName, MetadataDefinition definition, String[] pathSegments, SolrInputDocument document, SolrInputDocument contentDoc, List<SolrInputDocument> additionalDocuments)
482    {
483        String currentFieldName = field.getName();
484        
485        if (pathSegments.length == 1)
486        {
487            indexOverridingField(field, content, fieldName, document, contentDoc, additionalDocuments);
488            return;
489        }
490        
491        String[] followingSegments = ArrayUtils.subarray(pathSegments, 1, pathSegments.length);
492        MetadataType type = definition.getType();
493        
494        switch (type)
495        {
496            case BINARY:
497            case BOOLEAN:
498            case STRING:
499            case MULTILINGUAL_STRING:
500            case USER:
501            case LONG:
502            case DOUBLE:
503            case DATE:
504            case DATETIME:
505            case REFERENCE:
506            case RICH_TEXT:
507            case FILE:
508            case GEOCODE:
509                getLogger().warn("The field '{}' of type {} can not be a part of a path to index : {}", currentFieldName, type.toString(), StringUtils.join(pathSegments, ContentConstants.METADATA_PATH_SEPARATOR));
510                break;
511            case COMPOSITE:
512                getLogger().warn("The type {} is invalid for the overriding field '{}'.", type.toString(), currentFieldName);
513                break;
514            case CONTENT:
515            case SUB_CONTENT:
516                String[] contentIds = (String[]) field.getValues(content);
517                for (String contentId : contentIds)
518                {
519                    Content refContent = _resolver.resolveById(contentId);
520                    MetadataDefinition remoteMetadataDef = _cTypesHelper.getMetadataDefinition(followingSegments[0], refContent.getTypes(), refContent.getMixinTypes());
521                    findAndIndexMetadata(refContent, followingSegments, refContent.getMetadataHolder(), remoteMetadataDef, field, fieldName, document, contentDoc, additionalDocuments);
522                }
523                break;
524            default:
525                break;
526        }
527    }
528    
529    /**
530     * Index a content metadata.
531     * @param content the current content being traversed.
532     * @param metadataName The name of metadata to index
533     * @param metadata The parent composite metadata
534     * @param document the solr document to index into.
535     * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself).
536     * @param additionalDocuments The solr additional documents used for repeater instance
537     * @param fieldName the name of the indexed field.
538     * @param definition the metadata definition.
539     */
540    public void indexMetadata(Content content, String metadataName, CompositeMetadata metadata, SolrInputDocument document, SolrInputDocument contentDoc, List<SolrInputDocument> additionalDocuments, String fieldName, MetadataDefinition definition)
541    {
542        String language = content.getLanguage();
543        
544        switch (definition.getType())
545        {
546            case STRING:
547                indexStringMetadata(metadata, metadataName, content.getId(), document, contentDoc, fieldName, language, definition);
548                break;
549            case MULTILINGUAL_STRING:
550                indexMultilingualStringMetadata(metadata, metadataName, content.getId(), document, contentDoc, fieldName, definition);
551                break;
552            case USER:
553                indexUserMetadata(metadata, metadataName, document, contentDoc, fieldName, language, definition);
554                break;
555            case GEOCODE:
556                indexGeoCodeMetadata(metadata, metadataName, document, fieldName, definition);
557                break;
558            case BINARY:
559                indexBinaryMetadata(metadata, metadataName, document, contentDoc, fieldName, language, definition);
560                break;
561            case FILE:
562                indexFileMetadata(metadata, metadataName, document, contentDoc, fieldName, language, definition);
563                break;
564            case DATE:
565                indexDateMetadata(metadata, metadataName, document, fieldName, definition);
566                break;
567            case DATETIME:
568                indexDateTimeMetadata(metadata, metadataName, document, fieldName, definition);
569                break;
570            case CONTENT:
571                indexContentMetadata(metadata, metadataName, document, fieldName, definition);
572                break;
573            case SUB_CONTENT:
574                indexSubContentMetadata(metadata, metadataName, document, fieldName, definition);
575                break;
576            case LONG:
577                indexLongMetadata(metadata, metadataName, document, fieldName, definition);
578                break;
579            case DOUBLE:
580                indexDoubleMetadata(metadata, metadataName, document, fieldName, definition);
581                break;
582            case BOOLEAN:
583                indexBooleanMetadata(metadata, metadataName, document, fieldName, definition);
584                break;
585            case RICH_TEXT:
586                indexRichtextMetadata(metadata, metadataName, document, contentDoc, fieldName, language, definition);
587                break;
588            case COMPOSITE:
589                if (definition instanceof RepeaterDefinition)
590                {
591                    indexRepeaterMetadata(content, metadata, metadataName, document, contentDoc, fieldName, definition, additionalDocuments);
592                }
593                else
594                {
595                    indexCompositeMetadata(content, metadata, metadataName, document, contentDoc, fieldName, definition, additionalDocuments);
596                }
597                break;
598            case REFERENCE:
599                // TODO reference -> to be indexed? https://issues.ametys.org/browse/CMS-8623
600                break;
601            default:
602                break;
603        }
604    }
605    
606    /**
607     * Index a property represented by an overriding field.
608     * @param field The overriding field.
609     * @param content The content of which to get the property.
610     * @param fieldName The name of the field to index.
611     * @param document the solr document to index into.
612     * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself).
613     * @param additionalDocuments The solr additional documents used for repeater instance
614     */
615    public void indexOverridingField(CustomMetadataIndexingField field, Content content, String fieldName, SolrInputDocument document, SolrInputDocument contentDoc, List<SolrInputDocument> additionalDocuments)
616    {
617        Object[] values = field.getValues(content);
618        MetadataDefinition definition = field.getMetadataDefinition();
619        boolean isFacetable = definition.getEnumerator() != null;
620        String language = content.getLanguage();
621        
622        switch (definition.getType())
623        {
624            case STRING:
625                indexStringValues((String[]) values, content.getId(), document, contentDoc, fieldName, language, isFacetable);
626                break;
627            case MULTILINGUAL_STRING:
628                indexMultilingualStringValues((MultilingualString) values[0], content.getId(), document, contentDoc, fieldName);
629                break;
630            case USER:
631                UserIdentity[] users = new UserIdentity[values.length];
632                for (int i = 0; i < values.length; i++)
633                {
634                    users[i] = UserIdentity.stringToUserIdentity((String) values[i]);
635                }
636                indexUserValues(users, document, contentDoc, fieldName, language);
637                break;
638            case GEOCODE:
639                if (values.length > 1)
640                {
641                    indexGeocodeValue((double) values[0], (double) values[1], document, fieldName);
642                }
643                break;
644            case BINARY:
645                if (values.length > 0)
646                {
647                    indexFullTextBinaryValue((InputStream) values[0], document, contentDoc, fieldName, language);
648                }
649                break;
650            case FILE:
651                indexFileValue(values, document, contentDoc, fieldName, language);
652                break;
653            case DATE:
654                indexDateValues((Date[]) values, document, fieldName);
655                break;
656            case DATETIME:
657                indexDateTimeValues((Date[]) values, document, fieldName);
658                break;
659            case CONTENT:
660                indexContentValues((String[]) values, document, fieldName);
661                break;
662            case SUB_CONTENT:
663                indexContentValues((String[]) values, document, fieldName);
664                break;
665            case LONG:
666                indexLongValues((Long[]) values, document, fieldName, isFacetable);
667                break;
668            case DOUBLE:
669                indexDoubleValues((Double[]) values, document, fieldName, isFacetable);
670                break;
671            case BOOLEAN:
672                indexBooleanValues((Boolean[]) values, document, fieldName);
673                break;
674            case RICH_TEXT:
675                if (values.length > 0)
676                {
677                    indexRichtextValue((RichText) values[0], document, contentDoc, fieldName, language);
678                }
679                break;
680            case COMPOSITE:
681                break;
682            case REFERENCE:
683                // TODO reference -> to be indexed? https://issues.ametys.org/browse/CMS-8623
684                break;
685            default:
686                break;
687        }
688    }
689    
690    /**
691     * Index values 
692     * @param content The content being indexed.
693     * @param fieldName The Solr field's name
694     * @param type the type of values to index
695     * @param values the values
696     * @param document the Solr document
697     * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself).
698     */
699    public void indexValues(Content content, String fieldName, MetadataType type, Object[] values, SolrInputDocument document, SolrInputDocument contentDoc)
700    {
701        String language = content.getLanguage();
702        
703        switch (type)
704        {
705            case STRING:
706                indexStringValues((String[]) values, content.getId(), document, contentDoc, fieldName, language, false);
707                break;
708            case MULTILINGUAL_STRING:
709                if (values.length > 0)
710                {
711                    indexMultilingualStringValues((MultilingualString) values[0], content.getId(), document, contentDoc, fieldName);
712                }
713                break;
714            case LONG:
715                indexLongValues((Long[]) values, document, fieldName, false);
716                break;
717            case DOUBLE:
718                indexDoubleValues((Double[]) values, document, fieldName, false);
719                break;
720            case DATE:
721                indexDateValues((Date[]) values, document, fieldName);
722                break;
723            case DATETIME:
724                indexDateTimeValues((Date[]) values, document, fieldName);
725                break;
726            case CONTENT:
727                indexContentValues((String[]) values, document, fieldName);
728                break;
729            case BOOLEAN:
730                indexBooleanValues((Boolean[]) values, document, fieldName);
731                break;
732            case USER:
733                UserIdentity[] users = new UserIdentity[values.length];
734                for (int i = 0; i < values.length; i++)
735                {
736                    users[i] = UserIdentity.stringToUserIdentity((String) values[i]);
737                }
738                indexUserValues(users, document, contentDoc, fieldName, language);
739                break;
740            case RICH_TEXT:
741                indexRichtextValue((RichText) values[0], document, contentDoc, fieldName, language);
742                break;
743            case BINARY:
744            case FILE:
745            case COMPOSITE:
746            case REFERENCE:
747            case SUB_CONTENT:
748            case GEOCODE:
749                getLogger().warn("Only primitive type is allowed on a custom indexing field");
750                break;
751            default:
752                break;
753        }
754    }
755    
756    
757    /**
758     * Index a 'string' metadata
759     * @param metadata The parent composite metadata
760     * @param metadataName The name of metadata to index
761     * @param contentId The content id. For logging purposes
762     * @param document The solr document to index into
763     * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself).
764     * @param fieldName The index field name
765     * @param language The content language.
766     * @param definition The metadata definition
767     */
768    public void indexStringMetadata(CompositeMetadata metadata, String metadataName, String contentId, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, MetadataDefinition definition)
769    {
770        String[] strValues = metadata.getStringArray(metadataName, new String[0]);
771        indexStringValues(strValues, contentId, document, contentDoc, fieldName, language, definition.getEnumerator() != null);
772    }
773    
774    /**
775     * Index a multilingual string metadata
776     * @param metadata The parent composite metadata
777     * @param metadataName The name of metadata to index
778     * @param contentId The content id. For logging purposes
779     * @param document The solr document to index into
780     * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself).
781     * @param fieldName The index field name
782     * @param definition The metadata definition
783     */
784    public void indexMultilingualStringMetadata(CompositeMetadata metadata, String metadataName, String contentId, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, MetadataDefinition definition)
785    {
786        MultilingualString multilingualString = metadata.getMultilingualString(metadataName);
787        indexMultilingualStringValues(multilingualString, contentId, document, contentDoc, fieldName);
788    }
789    
790    /**
791     * Index a multilingual string values
792     * @param value The multilingual string
793     * @param contentId The content id. For logging purposes
794     * @param document The solr document to index into
795     * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself).
796     * @param fieldName The index field name
797     */
798    public void indexMultilingualStringValues(MultilingualString value, String contentId, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName)
799    {
800        Set<Locale> metaLocales = value.getLocales();
801        List<String> appLanguages = _languagesManager.getAvailableLanguages()
802                                                     .values()
803                                                     .stream()
804                                                     .map(Language::getCode)
805                                                     .collect(Collectors.toList());
806        for (String appLanguageCode : appLanguages)
807        {
808            Locale appLocale = new Locale(appLanguageCode);
809            if (metaLocales.contains(appLocale))
810            {
811                String str = value.getValue(appLocale);
812                indexMultilingualStringValues(new String[] {str}, contentId, document, contentDoc, fieldName, appLocale.getLanguage());
813            }
814            
815            // Need to index sort field for every language of application, even if metadata does not have value for the given language
816            String sortValue = MultilingualStringHelper.getValue(value, appLocale);
817            indexMultilingualStringValuesForSorting(sortValue, document, fieldName, appLocale.getLanguage());
818        }
819    }
820    
821    /**
822     * Index multilingual 'string' values
823     * @param values The values
824     * @param contentId The content id. For logging purposes
825     * @param document The solr document to index into
826     * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself).
827     * @param fieldName The index field name
828     * @param language The language for values.
829     */
830    public void indexMultilingualStringValues(String[] values, String contentId, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language)
831    {
832        for (String value : values)
833        {
834            document.addField(fieldName + "_txt_" + language, value);
835            document.addField(fieldName + "_txt_stemmed_" + language, value);
836            document.addField(fieldName + "_txt_ws_" + language, value);
837            
838            // Index without analyzing.
839            String possiblyTruncatedValue = SolrIndexer.truncateUtf8StringValue(value, getLogger(), contentId, fieldName);
840            document.addField(fieldName + "_" + language + "_s", possiblyTruncatedValue);
841            
842            // Index without analyzing but lower-case (for wildcard queries).
843            document.addField(fieldName + "_" + language + "_s_lower", possiblyTruncatedValue.toLowerCase());
844            
845            // Exact words tokenized by whitespace.
846            document.addField(fieldName + "_" + language + "_s_ws", value.toLowerCase());
847            
848            // Index with analyze (full-text search).
849            document.addField(fieldName + "_" + language + "_txt", value);
850            
851            indexFulltextValue(document, contentDoc, value, language);
852        }
853    }
854    
855    /**
856     * Index multilingual 'string' value in sort field
857     * @param value The value
858     * @param document The solr document to index into
859     * @param fieldName The index field name
860     * @param language The language
861     */
862    public void indexMultilingualStringValuesForSorting(String value, SolrInputDocument document, String fieldName, String language)
863    {
864        String sortField = fieldName + "_" + language + SolrFieldHelper.getSortFieldSuffix(MetadataType.MULTILINGUAL_STRING);
865        if (StringUtils.isNotEmpty(value) && !document.containsKey(sortField))
866        {
867            document.addField(sortField, SolrFieldHelper.getSortValue(value));
868        }
869    }
870    
871    /**
872     * Index 'string' values
873     * @param values The values
874     * @param contentId The content id. For logging purposes
875     * @param document The solr document to index into
876     * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself).
877     * @param fieldName The index field name
878     * @param language The content language.
879     * @param isFacetable true if the field can be used as a facet.
880     */
881    public void indexStringValues(String[] values, String contentId, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, boolean isFacetable)
882    {
883        for (String value : values)
884        {
885            if (!isFacetable)
886            {
887                if (language != null) // Language can be null for multilingual content
888                {
889                    // No enumerator: index as full-text.
890                    document.addField(fieldName + "_txt_" + language, value);
891                    document.addField(fieldName + "_txt_stemmed_" + language, value);
892                    document.addField(fieldName + "_txt_ws_" + language, value);
893                }
894            }
895            else
896            {
897                // Facets (enumeration only)
898                document.addField(fieldName + "_s_dv", value);
899            }
900            
901            // Index without analyzing.
902            String possiblyTruncatedValue = SolrIndexer.truncateUtf8StringValue(value, getLogger(), contentId, fieldName);
903            document.addField(fieldName + "_s", possiblyTruncatedValue);
904            
905            // Index without analyzing but lower-case (for wildcard queries).
906            document.addField(fieldName + "_s_lower", possiblyTruncatedValue.toLowerCase());
907            
908            // Exact words tokenized by whitespace.
909            document.addField(fieldName + "_s_ws", value.toLowerCase());
910            
911            // Index with analyze (full-text search).
912            document.addField(fieldName + "_txt", value);
913            
914            indexFulltextValue(document, contentDoc, value, language);
915        }
916        
917        String sortField = SolrFieldHelper.getMetadataSortFieldName(MetadataType.STRING, fieldName);
918        if (values.length > 0 && !document.containsKey(sortField))
919        {
920            // FIXME Si la meta est enumerée, indexer le label ? dans quelle langue ?
921            document.addField(sortField, SolrFieldHelper.getSortValue(values[0]));
922        }
923    }
924    
925    /**
926     * Index a 'date' metadata
927     * @param metadata The parent composite metadata
928     * @param metadataName The name of metadata to index
929     * @param document The solr document to index into
930     * @param fieldName The index field name
931     * @param definition The metadata definition
932     */
933    public void indexDateMetadata (CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition)
934    {
935        Date[] dateValues = metadata.getDateArray(metadataName, new Date[0]);
936        indexDateValues (dateValues, document, fieldName);
937    }
938    
939    /**
940     * Index 'date' values
941     * @param values The values
942     * @param document The solr document to index into
943     * @param fieldName The index field name
944     */
945    public void indexDateValues (Date[] values, SolrInputDocument document, String fieldName)
946    {
947        for (Date value : values)
948        {
949            document.addField(fieldName + "_dt", SolrIndexer.dateFormat().format(value));
950        }
951        
952        String sortField = fieldName + "_dt_sort";
953        if (values.length > 0 && !document.containsKey(sortField))
954        {
955            document.addField(sortField, SolrIndexer.dateFormat().format(values[0]));
956        }
957    }
958    
959    /**
960     * Index a 'datetime' metadata
961     * @param metadata The parent composite metadata
962     * @param metadataName The name of metadata to index
963     * @param document The solr document to index into
964     * @param fieldName The index field name
965     * @param definition The metadata definition
966     */
967    public void indexDateTimeMetadata (CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition)
968    {
969        Date[] dateValues = metadata.getDateArray(metadataName, new Date[0]);
970        indexDateTimeValues(dateValues, document, fieldName);
971    }
972    
973    /**
974     * Index 'datetime' values
975     * @param values The values
976     * @param document The solr document to index into
977     * @param fieldName The index field name
978     */
979    public void indexDateTimeValues (Date[] values, SolrInputDocument document, String fieldName)
980    {
981        for (Date value : values)
982        {
983            document.addField(fieldName + "_dt", SolrIndexer.dateFormat().format(value));
984        }
985        
986        String sortField = fieldName + "_dt_sort";
987        if (values.length > 0 && !document.containsKey(sortField))
988        {
989            document.addField(sortField, SolrIndexer.dateFormat().format(values[0]));
990        }
991    }
992    
993    /**
994     * Index a 'double' metadata
995     * @param metadata The parent composite metadata
996     * @param metadataName The name of metadata to index
997     * @param document The solr document to index into
998     * @param fieldName The index field name
999     * @param definition The metadata definition
1000     */
1001    public void indexDoubleMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition)
1002    {
1003        boolean isFacetable = definition.getEnumerator() != null;
1004        double[] values = metadata.getDoubleArray(metadataName, new double[0]);
1005        indexDoubleValues (ArrayUtils.toObject(values), document, fieldName, isFacetable);
1006    }
1007    
1008    /**
1009     * Index 'double' values
1010     * @param values The values
1011     * @param document The solr document to index into
1012     * @param fieldName The index field name
1013     * @param isFacetable true if the field can be used as a facet.
1014     */
1015    public void indexDoubleValues(Double[] values, SolrInputDocument document, String fieldName, boolean isFacetable)
1016    {
1017        for (Double value : values)
1018        {
1019            document.addField(fieldName + "_d", value);
1020            if (isFacetable)
1021            {
1022                document.addField(fieldName + "_d_dv", value);
1023            }
1024        }
1025        
1026        String sortField = fieldName + "_d_sort";
1027        if (values.length > 0 && !document.containsKey(sortField))
1028        {
1029            document.addField(sortField, values[0]);
1030        }
1031    }
1032    
1033    /**
1034     * Index a 'long' metadata
1035     * @param metadata The parent composite metadata
1036     * @param metadataName The name of metadata to index
1037     * @param document The solr document to index into
1038     * @param fieldName The index field name
1039     * @param definition The metadata definition
1040     */
1041    public void indexLongMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition)
1042    {
1043        boolean isFacetable = definition.getEnumerator() != null;
1044        long[] values = metadata.getLongArray(metadataName, new long[0]);
1045        indexLongValues(ArrayUtils.toObject(values), document, fieldName, isFacetable);
1046    }
1047    
1048    /**
1049     * Index 'long' values
1050     * @param values The values
1051     * @param document The solr document to index into
1052     * @param fieldName The index field name
1053     * @param isFacetable true if the field can be used as a facet. 
1054     */
1055    public void indexLongValues(Long[] values, SolrInputDocument document, String fieldName, boolean isFacetable)
1056    {
1057        for (Long value : values)
1058        {
1059            document.addField(fieldName + "_l", value);
1060            if (isFacetable)
1061            {
1062                document.addField(fieldName + "_l_dv", value);
1063            }
1064        }
1065        
1066        String sortField = fieldName + "_l_sort";
1067        if (values.length > 0 && !document.containsKey(sortField))
1068        {
1069            document.addField(sortField, values[0]);
1070        }
1071    }
1072    
1073    /**
1074     * Index a 'user' metadata
1075     * @param metadata The parent composite metadata
1076     * @param metadataName The name of metadata to index
1077     * @param document The solr document to index into
1078     * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself).
1079     * @param fieldName The index field name
1080     * @param language The content language.
1081     * @param definition The metadata definition
1082     */
1083    public void indexUserMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, MetadataDefinition definition)
1084    {
1085        UserIdentity[] users = metadata.getUserArray(metadataName);
1086        indexUserValues(users, document, contentDoc, fieldName, language);
1087    }
1088    
1089    /**
1090     * Index 'user' values
1091     * @param users The users
1092     * @param document The solr document to index into
1093     * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself).
1094     * @param fieldName The index field name
1095     * @param language The content language.
1096     */
1097    public void indexUserValues(UserIdentity[] users, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language)
1098    {
1099        int count = 0;
1100        for (UserIdentity userIdentity : users)
1101        {
1102            String fullName = _userHelper.getUserFullName(userIdentity);
1103            String sortableName = _userHelper.getUserSortableName(userIdentity);
1104            String identityAsString = UserIdentity.userIdentityToString(userIdentity);
1105            
1106            indexFulltextValue(document, contentDoc, identityAsString, language);
1107            
1108            // Facets
1109            document.addField(fieldName + "_s_dv", identityAsString);
1110            
1111            // Dynamic fields
1112            document.addField(fieldName + "_s", identityAsString);
1113            
1114            if (StringUtils.isNotEmpty(fullName))
1115            {
1116                document.addField(fieldName + "_txt", fullName);
1117                
1118                indexFulltextValue(document, contentDoc, fullName, language);
1119            }
1120            
1121            String sortField = SolrFieldHelper.getMetadataSortFieldName(MetadataType.STRING, fieldName);
1122            if (count == 0 && StringUtils.isNotEmpty(sortableName) && !document.containsKey(sortField))
1123            {
1124                // Index only first user for sorting
1125                document.addField(sortField, SolrFieldHelper.getSortValue(sortableName));
1126            }
1127            count++;
1128        }
1129    }
1130    
1131    /**
1132     * Index a 'boolean' metadata
1133     * @param metadata The parent composite metadata
1134     * @param metadataName The name of metadata to index
1135     * @param document The solr document to index into
1136     * @param fieldName The index field name
1137     * @param definition The metadata definition
1138     */
1139    public void indexBooleanMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition)
1140    {
1141        boolean[] values = metadata.getBooleanArray(metadataName, new boolean[0]);
1142        indexBooleanValues(ArrayUtils.toObject(values), document, fieldName);
1143    }
1144    
1145    /**
1146     * Index 'boolean' values
1147     * @param values The values
1148     * @param document The solr document to index into
1149     * @param fieldName The index field name
1150     */
1151    public void indexBooleanValues(Boolean[] values, SolrInputDocument document, String fieldName)
1152    {
1153        for (Boolean value : values)
1154        {
1155            document.addField(fieldName + "_b", value);
1156            document.addField(fieldName + "_b_dv", value);
1157        }
1158        
1159        String sortField = fieldName + "_b_sort";
1160        if (values.length > 0 && !document.containsKey(sortField))
1161        {
1162            document.addField(sortField, values[0]);
1163        }
1164    }
1165    
1166    /**
1167     * Index a 'richtext' metadata
1168     * @param metadata The parent composite metadata
1169     * @param metadataName The name of metadata to index
1170     * @param document The solr document to index into
1171     * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself).
1172     * @param fieldName The index field name
1173     * @param language The content language.
1174     * @param definition The metadata definition
1175     */
1176    public void indexRichtextMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, MetadataDefinition definition)
1177    {
1178        indexRichtextValue(metadata.getRichText(metadataName), document, contentDoc, fieldName, language);
1179    }
1180    
1181    /**
1182     * Index 'richtext' values
1183     * @param richText The rich text to index.
1184     * @param document The solr document to index into
1185     * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself).
1186     * @param fieldName The index field name.
1187     * @param language The content language.
1188     */
1189    public void indexRichtextValue(RichText richText, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language)
1190    {
1191        try (InputStream is = richText.getInputStream())
1192        {
1193            String value = _richTextToString(is);
1194            
1195            if (language != null) // language can be null for multilingual content
1196            {
1197                // Index as a text field.
1198                document.addField(fieldName + "_txt_" + language, value);
1199                document.addField(fieldName + "_txt_stemmed_" + language, value);
1200                document.addField(fieldName + "_txt_ws_" + language, value);
1201            }
1202            
1203            // Index in the full-text value.
1204            SolrContentIndexer.indexFulltextValue(document, value, language);
1205            
1206            if (contentDoc != null)
1207            {
1208                SolrContentIndexer.indexFulltextValue(contentDoc, value, language);
1209            }
1210        }
1211        catch (Exception e)
1212        {
1213            getLogger().warn("Failed to index RICH_TEXT '" + fieldName + "'", e);
1214        }
1215    }
1216    
1217    /**
1218     * Gets a XML as a string and extract the text only
1219     * @param is The inputstream of XML
1220     * @return The text or null if the XML is not well formed
1221     */
1222    protected String _richTextToString(InputStream is)
1223    {
1224        SAXParser saxParser = null;
1225        try
1226        {
1227            RichTextHandler txtHandler = new RichTextHandler();
1228            saxParser = (SAXParser) _manager.lookup(SAXParser.ROLE);
1229            saxParser.parse(new InputSource(is), txtHandler);
1230            return txtHandler.getValue().trim();
1231        }
1232        catch (ServiceException e)
1233        {
1234            getLogger().error("Unable to get a SAX parser", e);
1235            return null;
1236        }
1237        catch (IOException | SAXException e)
1238        {
1239            getLogger().error("Cannot parse inputstream", e);
1240            return null;
1241        }
1242        finally
1243        {
1244            _manager.release(saxParser);
1245        }
1246    }
1247    
1248    
1249   
1250    /**
1251     * Index a 'binary' metadata
1252     * @param metadata The parent composite metadata
1253     * @param metadataName The name of metadata to index
1254     * @param document The solr document to index into
1255     * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself).
1256     * @param fieldName The index field name
1257     * @param language The content language.
1258     * @param definition The metadata definition
1259     */
1260    public void indexBinaryMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, MetadataDefinition definition)
1261    {
1262        // Index file name.
1263        BinaryMetadata binary = metadata.getBinaryMetadata(metadataName);
1264        document.addField(fieldName + "_txt", binary.getFilename());
1265        
1266        // Index the contents.
1267        indexFullTextBinary(metadata, metadataName, document, contentDoc, fieldName, language, definition);
1268    }
1269    
1270    /**
1271     * Index a 'file' metadata
1272     * @param metadata The parent composite metadata
1273     * @param metadataName The name of metadata to index
1274     * @param document The solr document to index into
1275     * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself).
1276     * @param fieldName The index field name
1277     * @param language The content language.
1278     * @param definition The metadata definition
1279     */
1280    public void indexFileMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, MetadataDefinition definition)
1281    {
1282        if (org.ametys.plugins.repository.metadata.CompositeMetadata.MetadataType.BINARY.equals(metadata.getType(metadataName)))
1283        {
1284            indexBinaryMetadata(metadata, metadataName, document, contentDoc, fieldName, language, definition);
1285        }
1286        else
1287        {
1288            // Resource from the explorer.
1289            String value = metadata.getString(metadataName);
1290            
1291            try
1292            {
1293                Resource resource = (Resource) _resolver.resolveById(value);
1294                
1295                // Index file name.
1296                document.addField(fieldName + "_txt", resource.getName());
1297                
1298                // Index the contents.
1299                indexResourceContent(resource, document, contentDoc, language);
1300                
1301//                document.addField(prefix + fieldName + "$path", resource.getId());
1302//                document.addField(prefix + fieldName + "$type", "explorer");
1303//                document.addField(prefix + fieldName + "$mime-type", resource.getMimeType());
1304//                document.addField(prefix + fieldName + "$filename", filename);
1305//                document.addField(prefix + fieldName + "$lastModified", resource.getLastModified());
1306//                document.addField(prefix + fieldName + "$size", resource.getLength());
1307//                
1308//                String viewUrl = "/plugins/explorer/resource?id=" + resource.getId();
1309//                document.addField(prefix + fieldName + "$viewUrl", viewUrl);
1310//                document.addField(prefix + fieldName + "$downloadUrl", viewUrl + "&download=true");
1311            }
1312            catch (AmetysRepositoryException e)
1313            {
1314                getLogger().warn(String.format("Unable to index the resource of id '%s' : resource does not exist.", value), e);
1315            }
1316        }
1317    }
1318    
1319    /**
1320     * Index a 'file' metadata
1321     * @param values The values.
1322     * @param document The solr document to index into
1323     * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself).
1324     * @param fieldName The index field name
1325     * @param language The content language.
1326     */
1327    public void indexFileValue(Object[] values, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language)
1328    {
1329        String type = (String) values[0];
1330        if (StringUtils.equalsIgnoreCase(org.ametys.plugins.repository.metadata.CompositeMetadata.MetadataType.BINARY.name(), type))
1331        {
1332            indexFullTextBinaryValue((InputStream) values[1], document, contentDoc, fieldName, language);
1333        }
1334        else
1335        {
1336            indexResourceContent((Resource) values[1], document, contentDoc, language);
1337        }
1338    }
1339    
1340    /**
1341     * Index a 'binary' metadata
1342     * @param metadata The parent composite metadata
1343     * @param metadataName The name of metadata to index
1344     * @param document The solr document to index into
1345     * @param contentDoc The content document.
1346     * @param fieldName The index field name
1347     * @param language The content language.
1348     * @param definition The metadata definition
1349     */
1350    protected void indexFullTextBinary(CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, MetadataDefinition definition)
1351    {
1352        try (InputStream is = metadata.getBinaryMetadata(metadataName).getInputStream())
1353        {
1354            indexFullTextBinaryValue(is, document, contentDoc, fieldName, language);
1355        }
1356        catch (IOException e)
1357        {
1358            throw new RuntimeException(e);
1359        }
1360    }
1361    
1362    /**
1363     * Index a 'binary' value
1364     * @param is An InputStream on the binary data.
1365     * @param document The solr document to index into
1366     * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself).
1367     * @param fieldName The index field name
1368     * @param language The content language.
1369     */
1370    protected void indexFullTextBinaryValue(InputStream is, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language)
1371    {
1372        try
1373        {
1374            String text = _tika.parseToString(is);
1375            
1376            indexFulltextValue(document, contentDoc, text, language);
1377        }
1378        catch (Throwable e)
1379        {
1380            getLogger().warn(String.format("Failed to index binary field '%s'", fieldName), e);
1381        }
1382    }
1383    
1384    /**
1385     * Index a 'content' metadata
1386     * @param metadata The parent composite metadata
1387     * @param metadataName The name of metadata to index
1388     * @param document The solr document to index into
1389     * @param fieldName The index field name
1390     * @param definition The metadata definition
1391     */
1392    public void indexContentMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition)
1393    {
1394        String[] contentIds = metadata.getStringArray(metadataName, new String[0]);
1395        indexContentValues(contentIds, document, fieldName);
1396    }
1397    
1398    /**
1399     * Index content values.
1400     * @param contentIds The ID of the contents to index.
1401     * @param document The solr document to index into.
1402     * @param fieldName the field name.
1403     */
1404    public void indexContentValues (String[] contentIds, SolrInputDocument document, String fieldName)
1405    {
1406        for (String contentId : contentIds)
1407        {
1408            document.addField(fieldName + "_s", contentId);
1409            // Facets
1410            document.addField(fieldName + "_s_dv", contentId);
1411        }
1412        
1413        String sortField = SolrFieldHelper.getMetadataSortFieldName(MetadataType.STRING, fieldName);
1414        if (contentIds.length > 0 && !document.containsKey(sortField))
1415        {
1416            try
1417            {
1418                // TODO Est-ce qu'on peut faire autrement qu'un resolve ?
1419                Content content = _resolver.resolveById(contentIds[0]);
1420                CompositeMetadata metadataHolder = content.getMetadataHolder();
1421                if (org.ametys.plugins.repository.metadata.CompositeMetadata.MetadataType.MULTILINGUAL_STRING.equals(metadataHolder.getType(DefaultContent.METADATA_TITLE)))
1422                {
1423                    MultilingualString value = metadataHolder.getMultilingualString(DefaultContent.METADATA_TITLE);
1424                    for (Locale locale : value.getLocales())
1425                    {
1426                        String str = value.getValue(locale);
1427                        document.addField(sortField + "_" + locale.getLanguage(), SolrFieldHelper.getSortValue(str));
1428                    }
1429                }
1430                else
1431                {
1432                    document.addField(sortField, SolrFieldHelper.getSortValue(_contentHelper.getTitle(content)));
1433                }
1434            }
1435            catch (AmetysRepositoryException e)
1436            {
1437                // Do not index
1438            }
1439        }
1440    }
1441    
1442    /**
1443     * Index a 'sub_content' metadata
1444     * @param metadata The parent composite metadata
1445     * @param metadataName The name of metadata to index
1446     * @param document The solr document to index into
1447     * @param fieldName The index field name
1448     * @param definition The metadata definition
1449     */
1450    public void indexSubContentMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition)
1451    {
1452        TraversableAmetysObject objectCollection = metadata.getObjectCollection(metadataName);
1453        AmetysObjectIterable<Content> subcontents = objectCollection.getChildren();
1454        for (Content subcontent : subcontents)
1455        {
1456            document.addField(fieldName + "_s", subcontent.getId());
1457            // Facets
1458            document.addField(fieldName + "_s_dv", subcontent.getId());
1459        }
1460        
1461//        String sortField = fieldName + "_s_sort";
1462        String sortField = SolrFieldHelper.getMetadataSortFieldName(MetadataType.STRING, fieldName);
1463        subcontents = objectCollection.getChildren();
1464        Iterator<Content> it = subcontents.iterator();
1465        
1466        if (it.hasNext() && !document.containsKey(sortField))
1467        {
1468            Content subcontent = it.next();
1469            CompositeMetadata metadataHolder = subcontent.getMetadataHolder();
1470            if (org.ametys.plugins.repository.metadata.CompositeMetadata.MetadataType.MULTILINGUAL_STRING.equals(metadataHolder.getType(DefaultContent.METADATA_TITLE)))
1471            {
1472                MultilingualString value = metadataHolder.getMultilingualString(DefaultContent.METADATA_TITLE);
1473                for (Locale locale : value.getLocales())
1474                {
1475                    String str = value.getValue(locale);
1476                    document.addField(sortField + "_" + locale.getLanguage(), SolrFieldHelper.getSortValue(str));
1477                }
1478            }
1479            else
1480            {
1481                document.addField(sortField, SolrFieldHelper.getSortValue(_contentHelper.getTitle(subcontent)));
1482            }
1483        }
1484    }
1485    
1486    /**
1487     * Index a 'geocode' metadata
1488     * @param metadata The parent composite metadata
1489     * @param metadataName The name of metadata to index
1490     * @param document The solr document to index into
1491     * @param fieldName The index field name
1492     * @param definition The metadata definition
1493     */
1494    public void indexGeoCodeMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition)
1495    {
1496        CompositeMetadata geoCodeMetadata = metadata.getCompositeMetadata(metadataName);
1497        if (geoCodeMetadata.hasMetadata("longitude") && geoCodeMetadata.hasMetadata("latitude"))
1498        {
1499            double longitude = geoCodeMetadata.getDouble("longitude");
1500            double latitude = geoCodeMetadata.getDouble("latitude");
1501            
1502            indexGeocodeValue(latitude, longitude, document, fieldName);
1503        }
1504    }
1505    
1506    /**
1507     * Index a 'geocode' metadata
1508     * @param latitude the coord latitude.
1509     * @param longitude the coord longitude.
1510     * @param document The solr document to index into
1511     * @param fieldName The index field name
1512     */
1513    public void indexGeocodeValue(double latitude, double longitude, SolrInputDocument document, String fieldName)
1514    {
1515        document.addField(fieldName + "$longitude_d", longitude);
1516        document.addField(fieldName + "$latitude_d", latitude);
1517        
1518        String geoFieldName = SolrFieldHelper.getIndexingFieldName(MetadataType.GEOCODE, fieldName);
1519        document.addField(geoFieldName, longitude + " " + latitude);
1520    }
1521    
1522    /**
1523     * Index a composite metadata, i.e. browse and index the sub-metadatas.
1524     * @param content The content being indexed.
1525     * @param metadata The parent metadata.
1526     * @param metadataName The composite metadata name.
1527     * @param document The solr document to index into.
1528     * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself).
1529     * @param fieldName The field name.
1530     * @param definition The composite metadata definition.
1531     * @param additionalDocuments The solr additional documents used for repeater instance
1532     */
1533    public void indexCompositeMetadata(Content content, CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, MetadataDefinition definition, List<SolrInputDocument> additionalDocuments)
1534    {
1535        CompositeMetadata compositeMetadata = metadata.getCompositeMetadata(metadataName);
1536        
1537        // Index recursively
1538        Set<String> subMetadataNames = definition.getMetadataNames();
1539        for (String subMetadataName : subMetadataNames)
1540        {
1541            if (compositeMetadata.hasMetadata(subMetadataName))
1542            {
1543                indexMetadata(content, subMetadataName, compositeMetadata, document, contentDoc, additionalDocuments, fieldName + ContentConstants.METADATA_PATH_SEPARATOR + subMetadataName, definition.getMetadataDefinition(subMetadataName));
1544            }
1545        }
1546    }
1547    
1548    /**
1549     * Index a repeater metadata, i.e. browse and index the entries.
1550     * @param content The content being indexed.
1551     * @param metadata The parent metadata.
1552     * @param metadataName The repeater metadata name.
1553     * @param document The solr document to index into.
1554     * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself).
1555     * @param fieldName The field name.
1556     * @param definition The repeater metadata definition.
1557     * @param additionalDocuments The solr additional documents used for repeater instance
1558     */
1559    public void indexRepeaterMetadata(Content content, CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, MetadataDefinition definition, List<SolrInputDocument> additionalDocuments)
1560    {
1561        CompositeMetadata compositeMetadata = metadata.getCompositeMetadata(metadataName);
1562        
1563        // Get and sort the entry names.
1564        String[] entries = compositeMetadata.getMetadataNames();
1565        Arrays.sort(entries, MetadataManager.REPEATER_ENTRY_COMPARATOR);
1566        
1567        for (int i = 0; i < entries.length; i++)
1568        {
1569            String entryName = entries[i];
1570            int position = i + 1;
1571            
1572            CompositeMetadata entry = compositeMetadata.getCompositeMetadata(entryName);
1573            
1574            String repeaterID = document.getField("id").getFirstValue().toString() + "/" + fieldName + "/" + entryName;
1575            
1576            // Creates a new Solr document for each entry
1577            SolrInputDocument repDocument = new SolrInputDocument();
1578            repDocument.addField("id", repeaterID);
1579            document.addField(fieldName + "_s_dv", repeaterID);
1580            
1581            repDocument.addField(DOCUMENT_TYPE, TYPE_REPEATER);
1582            repDocument.addField(REPEATER_ENTRY_POSITION, position);
1583            // Add the created document to additional documents
1584            additionalDocuments.add(repDocument);
1585            
1586            SolrInputDocument parentContentDoc = contentDoc != null ? contentDoc : document; 
1587            
1588            Set<String> subMetadataNames = definition.getMetadataNames();
1589            for (String subMetadataName : subMetadataNames)
1590            {
1591                if (entry.hasMetadata(subMetadataName))
1592                {
1593                    // Created document is now the main document
1594                    indexMetadata(content, subMetadataName, entry, repDocument, parentContentDoc, additionalDocuments, subMetadataName, definition.getMetadataDefinition(subMetadataName));
1595                }
1596            }
1597        }
1598    }
1599    
1600    /**
1601     * Index the content of a resource.
1602     * @param resource The resource
1603     * @param document The solr document to index into
1604     * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself).
1605     * @param language The content language.
1606     */
1607    protected void indexResourceContent(Resource resource, SolrInputDocument document, SolrInputDocument contentDoc, String language)
1608    {
1609        try (InputStream is = resource.getInputStream())
1610        {
1611            indexResourceContentValue(is, resource.getDCSubject(), resource.getDCDescription(), language, document, contentDoc);
1612            
1613            // TODO Declare and index DC metadata?
1614            // DC meta
1615//            _resourceIndexer.indexDublinCoreMetadata(resource, document);
1616        }
1617        catch (Exception e)
1618        {
1619            getLogger().error("Unable to index resource at " + resource.getPath(), e);
1620        }
1621    }
1622    
1623    /**
1624     * Index the content of a resource.
1625     * @param is An input stream on the resource content.
1626     * @param keywords The resource keywords.
1627     * @param description The resource description.
1628     * @param language The content language.
1629     * @param document The solr document to index into
1630     * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself).
1631     * @throws TikaException If an error occurs extracting the document's text content.
1632     * @throws IOException If an error occurs reading the document's text content.
1633     */
1634    protected void indexResourceContentValue(InputStream is, String[] keywords, String description, String language, SolrInputDocument document, SolrInputDocument contentDoc) throws IOException, TikaException
1635    {
1636        String value = _tika.parseToString(is);
1637        
1638        indexFulltextValue(document, contentDoc, value, language);
1639        
1640        for (String keyword : keywords)
1641        {
1642            indexFulltextValue(document, contentDoc, keyword, language);
1643        }
1644        
1645        if (description != null)
1646        {
1647            indexFulltextValue(document, contentDoc, description, language);
1648        }
1649    }
1650    
1651    /**
1652     * Index a full-text value.
1653     * @param mainDocument The document being used, can be either the content document itself or a repeater document.
1654     * @param contentDoc The parent content document. If the mainDocument is the content document, this will be null.
1655     * @param text The text to index.
1656     * @param language The content language.
1657     */
1658    protected void indexFulltextValue(SolrInputDocument mainDocument, SolrInputDocument contentDoc, String text, String language)
1659    {
1660        indexFulltextValue(mainDocument, text, language);
1661        
1662        // The content doc is null if the main document is the content doc (to prevent indexing the data twice).
1663        if (contentDoc != null)
1664        {
1665            indexFulltextValue(contentDoc, text, language);
1666        }
1667    }
1668    
1669    /**
1670     * Index a full-text value.
1671     * @param document The document to index into.
1672     * @param text The text to index.
1673     * @param language The content language.
1674     */
1675    public static void indexFulltextValue(SolrInputDocument document, String text, String language)
1676    {
1677        if (StringUtils.isNotBlank(text))
1678        {
1679            document.addField(FULL_GENERAL, text);
1680            document.addField(FULL_EXACT_WS, text);
1681            
1682            if (StringUtils.isNotEmpty(language))
1683            {
1684                indexLanguageFulltextValue(document, text, language);
1685            }
1686        }
1687    }
1688    
1689    /**
1690     * Index a full-text value.
1691     * @param document The document to index into.
1692     * @param text The text to index.
1693     * @param languages The languages.
1694     */
1695    public static void indexFulltextValue(SolrInputDocument document, String text, Collection<String> languages)
1696    {
1697        if (StringUtils.isNotBlank(text))
1698        {
1699            document.addField(FULL_GENERAL, text);
1700            document.addField(FULL_EXACT_WS, text);
1701            
1702            for (String language : languages)
1703            {
1704                indexLanguageFulltextValue(document, text, language);
1705            }
1706        }
1707    }
1708    
1709    /**
1710     * Index a full-text value in the language-specific fields.
1711     * @param document The document to index into.
1712     * @param text The text to index.
1713     * @param language The content language.
1714     */
1715    protected static void indexLanguageFulltextValue(SolrInputDocument document, String text, String language)
1716    {
1717        document.addField(FULL_PREFIX + language, text);
1718        document.addField(FULL_STEMMED_PREFIX + language, text);
1719    }
1720}