001/*
002 *  Copyright 2015 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.cms.content.indexing.solr;
017
018import java.io.IOException;
019import java.io.InputStream;
020import java.nio.ByteBuffer;
021import java.nio.CharBuffer;
022import java.nio.charset.CharsetDecoder;
023import java.nio.charset.CodingErrorAction;
024import java.nio.charset.StandardCharsets;
025import java.util.Arrays;
026import java.util.Collection;
027import java.util.Date;
028import java.util.HashMap;
029import java.util.Iterator;
030import java.util.List;
031import java.util.Map;
032import java.util.Set;
033
034import org.apache.avalon.framework.component.Component;
035import org.apache.avalon.framework.service.ServiceException;
036import org.apache.avalon.framework.service.ServiceManager;
037import org.apache.avalon.framework.service.Serviceable;
038import org.apache.commons.lang3.ArrayUtils;
039import org.apache.commons.lang3.StringUtils;
040import org.apache.excalibur.xml.sax.SAXParser;
041import org.apache.solr.common.SolrInputDocument;
042import org.apache.tika.Tika;
043import org.apache.tika.exception.TikaException;
044import org.xml.sax.InputSource;
045import org.xml.sax.SAXException;
046
047import org.ametys.cms.content.RichTextHandler;
048import org.ametys.cms.contenttype.ContentConstants;
049import org.ametys.cms.contenttype.ContentType;
050import org.ametys.cms.contenttype.ContentTypeExtensionPoint;
051import org.ametys.cms.contenttype.ContentTypesHelper;
052import org.ametys.cms.contenttype.MetadataDefinition;
053import org.ametys.cms.contenttype.MetadataManager;
054import org.ametys.cms.contenttype.MetadataType;
055import org.ametys.cms.contenttype.RepeaterDefinition;
056import org.ametys.cms.contenttype.indexing.CustomIndexingField;
057import org.ametys.cms.contenttype.indexing.CustomMetadataIndexingField;
058import org.ametys.cms.contenttype.indexing.IndexingField;
059import org.ametys.cms.contenttype.indexing.IndexingModel;
060import org.ametys.cms.contenttype.indexing.MetadataIndexingField;
061import org.ametys.cms.repository.Content;
062import org.ametys.cms.search.model.SystemProperty;
063import org.ametys.cms.search.model.SystemPropertyExtensionPoint;
064import org.ametys.core.user.UserIdentity;
065import org.ametys.plugins.core.user.UserHelper;
066import org.ametys.plugins.explorer.resources.Resource;
067import org.ametys.plugins.explorer.resources.metadata.TikaProvider;
068import org.ametys.plugins.repository.AmetysObjectIterable;
069import org.ametys.plugins.repository.AmetysObjectResolver;
070import org.ametys.plugins.repository.AmetysRepositoryException;
071import org.ametys.plugins.repository.TraversableAmetysObject;
072import org.ametys.plugins.repository.UnknownAmetysObjectException;
073import org.ametys.plugins.repository.metadata.BinaryMetadata;
074import org.ametys.plugins.repository.metadata.CompositeMetadata;
075import org.ametys.plugins.repository.metadata.RichText;
076import org.ametys.runtime.plugin.component.AbstractLogEnabled;
077
078/**
079 * Component for {@link Content} indexing into a Solr server.
080 */
081public class SolrContentIndexer extends AbstractLogEnabled implements Component, Serviceable, SolrFieldNames
082{
083    /** The component role. */
084    public static final String ROLE = SolrContentIndexer.class.getName();
085    
086    private static final int __SOLR_STRING_NB_BYTES_LIMIT = 32766; 
087    
088    /** The Ametys objet resolver */
089    protected AmetysObjectResolver _resolver;
090    /** The content type extension point */
091    protected ContentTypeExtensionPoint _cTypeEP;
092    /** The content type helper */
093    protected ContentTypesHelper _cTypesHelper;
094    /** The users manager */
095    protected UserHelper _userHelper;
096    /** The Tika instance */
097    protected Tika _tika;
098    /** The resource indexer */
099    protected SolrResourceIndexer _resourceIndexer;
100    /** The sax parser */
101    protected SAXParser _parser;
102    /** The system property extension point. */
103    protected SystemPropertyExtensionPoint _systemPropEP;
104    
105    @Override
106    public void service(ServiceManager manager) throws ServiceException
107    {
108        _resolver = (AmetysObjectResolver) manager.lookup(AmetysObjectResolver.ROLE);
109        _resourceIndexer = (SolrResourceIndexer) manager.lookup(SolrResourceIndexer.ROLE);
110        _cTypeEP = (ContentTypeExtensionPoint) manager.lookup(ContentTypeExtensionPoint.ROLE);
111        _cTypesHelper = (ContentTypesHelper) manager.lookup(ContentTypesHelper.ROLE);
112        _userHelper = (UserHelper) manager.lookup(UserHelper.ROLE);
113        TikaProvider tikaProvider = (TikaProvider) manager.lookup(TikaProvider.ROLE);
114        _tika = tikaProvider.getTika();
115        _parser = (SAXParser) manager.lookup(SAXParser.ROLE);
116        _systemPropEP = (SystemPropertyExtensionPoint) manager.lookup(SystemPropertyExtensionPoint.ROLE);
117    }
118    
119    /**
120     * Populate a solr input document by adding fields to index into it.
121     * @param content The content to index
122     * @param document The main solr document to index into
123     * @param additionalDocuments The additional documents for repeater instances
124     * @throws Exception if an error occurred while indexing
125     */
126    public void indexContent(Content content, SolrInputDocument document, List<SolrInputDocument> additionalDocuments) throws Exception
127    {
128        // Properties specific to a stand-alone indexation.
129        String contentId = content.getId();
130        document.addField(ID, contentId);
131        document.addField(DOCUMENT_TYPE, TYPE_CONTENT);
132        document.addField(TITLE, _truncateUtf8StringValue(content.getTitle(), contentId, TITLE));
133        document.addField(CONTENT_NAME, _truncateUtf8StringValue(content.getName(), contentId, CONTENT_NAME));
134        
135        document.addField(WORKFLOW_REF_DV, contentId + "#workflow");
136        
137        // Index content system properties.
138        indexSystemProperties(content, document);
139        
140        // Index the fields specified in the indexation model.
141        indexModelFields(content, document, additionalDocuments);
142    }
143    
144    /**
145     * Index the system properties of a content.
146     * @param content The content to index.
147     * @param document The solr document to index into.
148     */
149    protected void indexSystemProperties(Content content, SolrInputDocument document)
150    {
151        for (String sysPropId : _systemPropEP.getExtensionsIds())
152        {
153            SystemProperty sysProp = _systemPropEP.getExtension(sysPropId);
154            
155            sysProp.index(content, document);
156        }
157    }
158    
159    /**
160     * Index the content type and all its supertypes in the given document (recursively).
161     * @param cTypeId The ID of the content type to index.
162     * @param document The solr document to index into.
163     * @param fieldName The field name.
164     */
165    protected void indexAllContentTypes(String cTypeId, SolrInputDocument document, String fieldName)
166    {
167        document.addField(fieldName, cTypeId);
168        
169        if (_cTypeEP.hasExtension(cTypeId))
170        {
171            ContentType contentType = _cTypeEP.getExtension(cTypeId);
172            for (String supertypeId : contentType.getSupertypeIds())
173            {
174                indexAllContentTypes(supertypeId, document, fieldName);
175            }
176        }
177    }
178    
179    /**
180     * Index the fields specified in the indexation model.
181     * @param content The content to index.
182     * @param document The main content solr document.
183     * @param additionalDocuments The additional documents for repeater instances.
184     */
185    protected void indexModelFields(Content content, SolrInputDocument document, List<SolrInputDocument> additionalDocuments)
186    {
187        IndexingModel indexingModel = null;
188        try
189        {
190            indexingModel = _cTypesHelper.getIndexingModel(content);
191        }
192        catch (RuntimeException e)
193        {
194            getLogger().error("indexContent > Error getting the indexing model of content " + content.getId(), e);
195            throw e;
196        }
197        
198        for (IndexingField field : indexingModel.getFields())
199        {
200            if (field instanceof CustomIndexingField)
201            {
202                Object[] values = ((CustomIndexingField) field).getValues(content);
203                indexValues(content, field.getName(), field.getType(), values, document, null);
204            }
205            else if (field instanceof MetadataIndexingField)
206            {
207                String metadataPath = ((MetadataIndexingField) field).getMetadataPath();
208                String[] pathSegments = metadataPath.split(ContentConstants.METADATA_PATH_SEPARATOR);
209                
210                MetadataDefinition definition = _cTypesHelper.getMetadataDefinition(pathSegments[0], content.getTypes(), content.getMixinTypes());
211                if (definition != null)
212                {
213                    findAndIndexMetadata(content, pathSegments, content.getMetadataHolder(), definition, field, field.getName(), document, null, additionalDocuments);
214                }
215            }
216        }
217    }
218    
219    /**
220     * Populate a Solr input document by adding fields for a single system property.
221     * @param content The content to index
222     * @param propertyId The system property ID.
223     * @param document The solr document
224     * @throws Exception if an error occurred
225     */
226    public void indexPartialSystemProperty(Content content, String propertyId, SolrInputDocument document) throws Exception
227    {
228        if (!_systemPropEP.hasExtension(propertyId))
229        {
230            throw new IllegalStateException("The property '" + propertyId + "' can't be indexed as it does not exist.");
231        }
232        
233        SolrInputDocument tempDocument = new SolrInputDocument();
234        
235        SystemProperty property = _systemPropEP.getExtension(propertyId);
236        property.index(content, tempDocument);
237        
238        // Copy the indexed values as partial updates.
239        for (String fieldName : tempDocument.getFieldNames())
240        {
241            Collection<Object> fieldValues = tempDocument.getFieldValues(fieldName);
242            
243            Map<String, Object> partialUpdate = new HashMap<>();
244            partialUpdate.put("set", fieldValues);
245            document.addField(fieldName, partialUpdate);
246        }
247        
248        document.addField("id", content.getId());
249    }
250    
251    /**
252     * Find the metadata to index from its path
253     * @param content the content currently being traversed.
254     * @param pathSegments The segments of path of metadata to index
255     * @param metadata The parent composite metadata
256     * @param definition The metadata definition
257     * @param field the current indexing field.
258     * @param fieldName the name of the field to index.
259     * @param document The main solr document to index into
260     * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself).
261     * @param additionalDocuments The additional documents
262     */
263    protected void findAndIndexMetadata(Content content, String[] pathSegments, CompositeMetadata metadata, MetadataDefinition definition, IndexingField field, String fieldName, SolrInputDocument document, SolrInputDocument contentDoc, List<SolrInputDocument> additionalDocuments)
264    {
265        String currentFieldName = pathSegments[0];
266        
267        IndexingModel indexingModel = null;
268        try
269        {
270            indexingModel = _cTypesHelper.getIndexingModel(content);
271        }
272        catch (RuntimeException e)
273        {
274            if (content != null)
275            {
276                getLogger().error("findAndIndexMetadata > Error while indexing content " + content.getId() + " metadata", e);
277            }
278            else
279            {
280                getLogger().error("findAndIndexMetadata > Error while indexing null content metadata");
281            }
282            throw e;
283        }
284        
285        IndexingField refField = indexingModel.getField(currentFieldName);
286        if (refField != null && refField instanceof CustomMetadataIndexingField)
287        {
288            CustomMetadataIndexingField overridingField = (CustomMetadataIndexingField) refField;
289            findAndIndexOverridingField(content, indexingModel, overridingField, fieldName, definition, pathSegments, document, contentDoc, additionalDocuments);
290        }
291        else
292        {
293            if (metadata.hasMetadata(currentFieldName))
294            {
295                findAndIndexMetadataField(content, pathSegments, metadata, definition, field, fieldName, document, contentDoc, additionalDocuments);
296            }
297        }
298    }
299
300    /**
301     * Find and index a metadata.
302     * @param content the current content being traversed.
303     * @param pathSegments the full metadata path segments.
304     * @param metadata the current metadata holder.
305     * @param definition the current metadata definition.
306     * @param field the current indexing field.
307     * @param fieldName the name of the field to index.
308     * @param document the solr main document.
309     * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 
310     * @param additionalDocuments the solr additional documents.
311     */
312    protected void findAndIndexMetadataField(Content content, String[] pathSegments, CompositeMetadata metadata, MetadataDefinition definition, IndexingField field, String fieldName, SolrInputDocument document, SolrInputDocument contentDoc, List<SolrInputDocument> additionalDocuments)
313    {
314        String currentFieldName = pathSegments[0];
315        
316        if (pathSegments.length == 1)
317        {
318            indexMetadata(content, currentFieldName, metadata, document, contentDoc, additionalDocuments, fieldName, definition);
319            return;
320        }
321        
322        String[] followingSegments = ArrayUtils.subarray(pathSegments, 1, pathSegments.length);
323        
324        switch (definition.getType())
325        {
326            case BINARY:
327            case BOOLEAN:
328            case STRING:
329            case USER:
330            case LONG:
331            case DOUBLE:
332            case DATE:
333            case DATETIME:
334            case REFERENCE:
335            case RICH_TEXT:
336            case FILE:
337            case GEOCODE:
338                getLogger().warn("The metadata '{}' of type {} can not be a part of a path to index : {}", currentFieldName, definition.getType().toString(), StringUtils.join(pathSegments, ContentConstants.METADATA_PATH_SEPARATOR));
339                break;
340            case CONTENT:
341                String[] contentIds = metadata.getStringArray(currentFieldName, new String[0]);
342                for (String contentId : contentIds)
343                {
344                    try
345                    {
346                        Content refContent = _resolver.resolveById(contentId);
347                        MetadataDefinition remoteMetadataDef = _cTypesHelper.getMetadataDefinition(followingSegments[0], refContent.getTypes(), refContent.getMixinTypes());
348                        findAndIndexMetadata(refContent, followingSegments, refContent.getMetadataHolder(), remoteMetadataDef, field, fieldName, document, contentDoc, additionalDocuments);
349                    }
350                    catch (UnknownAmetysObjectException e)
351                    {
352                        // Nothing to index
353                    }
354                }
355                break;
356            case SUB_CONTENT:
357                TraversableAmetysObject objectCollection = metadata.getObjectCollection(currentFieldName);
358                AmetysObjectIterable<Content> subcontents = objectCollection.getChildren();
359                for (Content subcontent : subcontents)
360                {
361                    MetadataDefinition remoteMetadataDef = _cTypesHelper.getMetadataDefinition(followingSegments[0], subcontent.getTypes(), subcontent.getMixinTypes());
362                    findAndIndexMetadata(subcontent, followingSegments, subcontent.getMetadataHolder(), remoteMetadataDef, field, fieldName, document, contentDoc, additionalDocuments);
363                }
364                break;
365            case COMPOSITE:
366                CompositeMetadata composite = metadata.getCompositeMetadata(currentFieldName);
367   
368                if (definition instanceof RepeaterDefinition)
369                {
370                    String[] entries = composite.getMetadataNames();
371                    for (String entry : entries)
372                    {
373                        findAndIndexMetadata(content, followingSegments, composite.getCompositeMetadata(entry), definition.getMetadataDefinition(followingSegments[0]), field, fieldName, document, contentDoc, additionalDocuments);
374                    }
375                }
376                else
377                {
378                    findAndIndexMetadata(content, followingSegments, composite, definition.getMetadataDefinition(followingSegments[0]), field, fieldName, document, contentDoc, additionalDocuments);
379                }
380                break;
381            default:
382                break;
383            
384        }
385    }
386    
387    /**
388     * Find and index a property represented by an overriding field.
389     * @param content the current content being traversed.
390     * @param indexingModel the current indexing model.
391     * @param pathSegments the full metadata path segments.
392     * @param definition the current metadata definition.
393     * @param field the current indexing field.
394     * @param fieldName the name of the field to index.
395     * @param document the solr main document.
396     * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself).
397     * @param additionalDocuments the solr additional documents.
398     */
399    protected void findAndIndexOverridingField(Content content, IndexingModel indexingModel, CustomMetadataIndexingField field, String fieldName, MetadataDefinition definition, String[] pathSegments, SolrInputDocument document, SolrInputDocument contentDoc, List<SolrInputDocument> additionalDocuments)
400    {
401        String currentFieldName = field.getName();
402        
403        if (pathSegments.length == 1)
404        {
405            indexOverridingField(field, content, fieldName, document, contentDoc, additionalDocuments);
406            return;
407        }
408        
409        String[] followingSegments = ArrayUtils.subarray(pathSegments, 1, pathSegments.length);
410        MetadataType type = definition.getType();
411        
412        switch (type)
413        {
414            case BINARY:
415            case BOOLEAN:
416            case STRING:
417            case USER:
418            case LONG:
419            case DOUBLE:
420            case DATE:
421            case DATETIME:
422            case REFERENCE:
423            case RICH_TEXT:
424            case FILE:
425            case GEOCODE:
426                getLogger().warn("The field '{}' of type {} can not be a part of a path to index : {}", currentFieldName, type.toString(), StringUtils.join(pathSegments, ContentConstants.METADATA_PATH_SEPARATOR));
427                break;
428            case COMPOSITE:
429                getLogger().warn("The type {} is invalid for the overriding field '{}'.", type.toString(), currentFieldName);
430                break;
431            case CONTENT:
432            case SUB_CONTENT:
433                String[] contentIds = (String[]) field.getValues(content);
434                for (String contentId : contentIds)
435                {
436                    Content refContent = _resolver.resolveById(contentId);
437                    MetadataDefinition remoteMetadataDef = _cTypesHelper.getMetadataDefinition(followingSegments[0], refContent.getTypes(), refContent.getMixinTypes());
438                    findAndIndexMetadata(refContent, followingSegments, refContent.getMetadataHolder(), remoteMetadataDef, field, fieldName, document, contentDoc, additionalDocuments);
439                }
440                break;
441            default:
442                break;
443        }
444    }
445    
446    /**
447     * Index a content metadata.
448     * @param content the current content being traversed.
449     * @param metadataName The name of metadata to index
450     * @param metadata The parent composite metadata
451     * @param document the solr document to index into.
452     * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself).
453     * @param additionalDocuments The solr additional documents used for repeater instance
454     * @param fieldName the name of the indexed field.
455     * @param definition the metadata definition.
456     */
457    public void indexMetadata(Content content, String metadataName, CompositeMetadata metadata, SolrInputDocument document, SolrInputDocument contentDoc, List<SolrInputDocument> additionalDocuments, String fieldName, MetadataDefinition definition)
458    {
459        String language = content.getLanguage();
460        
461        switch (definition.getType())
462        {
463            case STRING:
464                indexStringMetadata(metadata, metadataName, content.getId(), document, contentDoc, fieldName, language, definition);
465                break;
466            case USER:
467                indexUserMetadata(metadata, metadataName, document, contentDoc, fieldName, language, definition);
468                break;
469            case GEOCODE:
470                indexGeoCodeMetadata(metadata, metadataName, document, fieldName, definition);
471                break;
472            case BINARY:
473                indexBinaryMetadata(metadata, metadataName, document, contentDoc, fieldName, language, definition);
474                break;
475            case FILE:
476                indexFileMetadata(metadata, metadataName, document, contentDoc, fieldName, language, definition);
477                break;
478            case DATE:
479                indexDateMetadata(metadata, metadataName, document, fieldName, definition);
480                break;
481            case DATETIME:
482                indexDateTimeMetadata(metadata, metadataName, document, fieldName, definition);
483                break;
484            case CONTENT:
485                indexContentMetadata(metadata, metadataName, document, fieldName, definition);
486                break;
487            case SUB_CONTENT:
488                indexSubContentMetadata(metadata, metadataName, document, fieldName, definition);
489                break;
490            case LONG:
491                indexLongMetadata(metadata, metadataName, document, fieldName, definition);
492                break;
493            case DOUBLE:
494                indexDoubleMetadata(metadata, metadataName, document, fieldName, definition);
495                break;
496            case BOOLEAN:
497                indexBooleanMetadata(metadata, metadataName, document, fieldName, definition);
498                break;
499            case RICH_TEXT:
500                indexRichtextMetadata(metadata, metadataName, document, contentDoc, fieldName, language, definition);
501                break;
502            case COMPOSITE:
503                if (definition instanceof RepeaterDefinition)
504                {
505                    indexRepeaterMetadata(content, metadata, metadataName, document, contentDoc, fieldName, definition, additionalDocuments);
506                }
507                else
508                {
509                    indexCompositeMetadata(content, metadata, metadataName, document, contentDoc, fieldName, definition, additionalDocuments);
510                }
511                break;
512            case REFERENCE:
513                // TODO reference -> to be indexed?
514                break;
515            default:
516                break;
517        }
518    }
519    
520    /**
521     * Index a property represented by an overriding field.
522     * @param field The overriding field.
523     * @param content The content of which to get the property.
524     * @param fieldName The name of the field to index.
525     * @param document the solr document to index into.
526     * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself).
527     * @param additionalDocuments The solr additional documents used for repeater instance
528     */
529    public void indexOverridingField(CustomMetadataIndexingField field, Content content, String fieldName, SolrInputDocument document, SolrInputDocument contentDoc, List<SolrInputDocument> additionalDocuments)
530    {
531        Object[] values = field.getValues(content);
532        MetadataDefinition definition = field.getMetadataDefinition();
533        boolean isFacetable = definition.getEnumerator() != null;
534        String language = content.getLanguage();
535        
536        switch (definition.getType())
537        {
538            case STRING:
539                indexStringValues((String[]) values, content.getId(), document, contentDoc, fieldName, language, isFacetable);
540                break;
541            case USER:
542                UserIdentity[] users = new UserIdentity[values.length];
543                for (int i = 0; i < values.length; i++)
544                {
545                    users[i] = UserIdentity.stringToUserIdentity((String) values[i]);
546                }
547                indexUserValues(users, document, contentDoc, fieldName, language);
548                break;
549            case GEOCODE:
550                if (values.length > 1)
551                {
552                    indexGeocodeValue((double) values[0], (double) values[1], document, fieldName);
553                }
554                break;
555            case BINARY:
556                if (values.length > 0)
557                {
558                    indexFullTextBinaryValue((InputStream) values[0], document, contentDoc, fieldName, language);
559                }
560                break;
561            case FILE:
562                indexFileValue(values, document, contentDoc, fieldName, language);
563                break;
564            case DATE:
565                indexDateValues((Date[]) values, document, fieldName);
566                break;
567            case DATETIME:
568                indexDateTimeValues((Date[]) values, document, fieldName);
569                break;
570            case CONTENT:
571                indexContentValues((String[]) values, document, fieldName);
572                break;
573            case SUB_CONTENT:
574                indexContentValues((String[]) values, document, fieldName);
575                break;
576            case LONG:
577                indexLongValues((Long[]) values, document, fieldName, isFacetable);
578                break;
579            case DOUBLE:
580                indexDoubleValues((Double[]) values, document, fieldName, isFacetable);
581                break;
582            case BOOLEAN:
583                indexBooleanValues((Boolean[]) values, document, fieldName);
584                break;
585            case RICH_TEXT:
586                if (values.length > 0)
587                {
588                    indexRichtextValue((RichText) values[0], document, contentDoc, fieldName, language);
589                }
590                break;
591            case COMPOSITE:
592                break;
593            case REFERENCE:
594                // TODO reference -> to be indexed?
595                break;
596            default:
597                break;
598        }
599    }
600    
601    /**
602     * Index values 
603     * @param content The content being indexed.
604     * @param fieldName The Solr field's name
605     * @param type the type of values to index
606     * @param values the values
607     * @param document the Solr document
608     * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself).
609     */
610    public void indexValues(Content content, String fieldName, MetadataType type, Object[] values, SolrInputDocument document, SolrInputDocument contentDoc)
611    {
612        String language = content.getLanguage();
613        
614        switch (type)
615        {
616            case STRING:
617                indexStringValues((String[]) values, content.getId(), document, contentDoc, fieldName, language, false);
618                break;
619            case LONG:
620                indexLongValues((Long[]) values, document, fieldName, false);
621                break;
622            case DOUBLE:
623                indexDoubleValues((Double[]) values, document, fieldName, false);
624                break;
625            case DATE:
626                indexDateValues((Date[]) values, document, fieldName);
627                break;
628            case DATETIME:
629                indexDateTimeValues((Date[]) values, document, fieldName);
630                break;
631            case CONTENT:
632                indexContentValues((String[]) values, document, fieldName);
633                break;
634            case BOOLEAN:
635                indexBooleanValues((Boolean[]) values, document, fieldName);
636                break;
637            case USER:
638                UserIdentity[] users = new UserIdentity[values.length];
639                for (int i = 0; i < values.length; i++)
640                {
641                    users[i] = UserIdentity.stringToUserIdentity((String) values[i]);
642                }
643                indexUserValues(users, document, contentDoc, fieldName, language);
644                break;
645            case RICH_TEXT:
646                indexRichtextValue((RichText) values[0], document, contentDoc, fieldName, language);
647                break;
648            case BINARY:
649            case FILE:
650            case COMPOSITE:
651            case REFERENCE:
652            case SUB_CONTENT:
653            case GEOCODE:
654                getLogger().warn("Only primitive type is allowed on a custom indexing field");
655                break;
656            default:
657                break;
658        }
659    }
660    
661    
662    /**
663     * Index a 'string' metadata
664     * @param metadata The parent composite metadata
665     * @param metadataName The name of metadata to index
666     * @param contentId The content id. For logging purposes
667     * @param document The solr document to index into
668     * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself).
669     * @param fieldName The index field name
670     * @param language The content language.
671     * @param definition The metadata definition
672     */
673    public void indexStringMetadata(CompositeMetadata metadata, String metadataName, String contentId, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, MetadataDefinition definition)
674    {
675        String[] strValues = metadata.getStringArray(metadataName, new String[0]);
676        indexStringValues(strValues, contentId, document, contentDoc, fieldName, language, definition.getEnumerator() != null);
677    }
678    
679    /**
680     * Index 'string' values
681     * @param values The values
682     * @param contentId The content id. For logging purposes
683     * @param document The solr document to index into
684     * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself).
685     * @param fieldName The index field name
686     * @param language The content language.
687     * @param isFacetable true if the field can be used as a facet.
688     */
689    public void indexStringValues(String[] values, String contentId, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, boolean isFacetable)
690    {
691        for (String value : values)
692        {
693            if (!isFacetable)
694            {
695                // No enumerator: index as full-text.
696                document.addField(fieldName + "_txt_" + language, value);
697                document.addField(fieldName + "_txt_stemmed_" + language, value);
698                document.addField(fieldName + "_txt_ws_" + language, value);
699            }
700            else
701            {
702                // Facets (enumeration only)
703                document.addField(fieldName + "_s_dv", value);
704            }
705            
706            // Index without analyzing.
707            String possiblyTruncatedValue = _truncateUtf8StringValue(value, contentId, fieldName);
708            document.addField(fieldName + "_s", possiblyTruncatedValue);
709            
710            // Index without analyzing but lower-case (for wildcard queries).
711            document.addField(fieldName + "_s_lower", possiblyTruncatedValue.toLowerCase());
712            
713            // Exact words tokenized by whitespace.
714            document.addField(fieldName + "_s_ws", value.toLowerCase());
715            
716            // Index with analyze (full-text search).
717            document.addField(fieldName + "_txt", value);
718            
719            indexFulltextValue(document, contentDoc, value, language);
720        }
721        
722        String sortField = SolrFieldHelper.getMetadataSortFieldName(MetadataType.STRING, fieldName);
723        if (values.length > 0 && !document.containsKey(sortField))
724        {
725            // FIXME Si la meta est enumerée, indexer le label ? dans quelle langue ?
726            document.addField(sortField, SolrFieldHelper.getSortValue(values[0]));
727        }
728    }
729    
730    private String _truncateUtf8StringValue(String value, String contentId /*logging purpose*/, String fieldName /*logging purpose*/)
731    {
732        if (value.length() * 4 <= __SOLR_STRING_NB_BYTES_LIMIT)
733        {
734            // With UTF-8, a character is encoded using 1, 2, 3 or 4 bytes, so (value.length() <= value.getBytes().length <= 4 * value.length())
735            // As a result, value.getBytes().length <= limit
736            return value;
737        }
738        
739        // There is a doubt, the string may need to be truncated (or not)
740        byte[] valueBytes = value.getBytes(StandardCharsets.UTF_8);
741        int bytesLength = valueBytes.length;
742        if (bytesLength <= __SOLR_STRING_NB_BYTES_LIMIT)
743        {
744            return value;
745        }
746        
747        getLogger().warn("The string value for content '{}' and field name '{}' is longer ({}) than the max bytes length {}. It will be truncated to prevent Solr error, but you should consider verifying why this string is so long.", contentId, fieldName, bytesLength, __SOLR_STRING_NB_BYTES_LIMIT);
748        
749        // Need a truncation (inspired by https://stackoverflow.com/questions/119328/how-do-i-truncate-a-java-string-to-fit-in-a-given-number-of-bytes-once-utf-8-en#answer-35148974)
750        CharBuffer charBuffer = CharBuffer.allocate(__SOLR_STRING_NB_BYTES_LIMIT);
751        CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder()
752                                                       .onMalformedInput(CodingErrorAction.IGNORE);
753        decoder.decode(ByteBuffer.wrap(valueBytes, 0, __SOLR_STRING_NB_BYTES_LIMIT), charBuffer, true);
754        decoder.flush(charBuffer);
755        return new String(charBuffer.array(), 0, charBuffer.position());
756    }
757    
758    /**
759     * Index a 'date' metadata
760     * @param metadata The parent composite metadata
761     * @param metadataName The name of metadata to index
762     * @param document The solr document to index into
763     * @param fieldName The index field name
764     * @param definition The metadata definition
765     */
766    public void indexDateMetadata (CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition)
767    {
768        Date[] dateValues = metadata.getDateArray(metadataName, new Date[0]);
769        indexDateValues (dateValues, document, fieldName);
770    }
771    
772    /**
773     * Index 'date' values
774     * @param values The values
775     * @param document The solr document to index into
776     * @param fieldName The index field name
777     */
778    public void indexDateValues (Date[] values, SolrInputDocument document, String fieldName)
779    {
780        for (Date value : values)
781        {
782            document.addField(fieldName + "_dt", SolrIndexer.dateFormat().format(value));
783        }
784        
785        String sortField = fieldName + "_dt_sort";
786        if (values.length > 0 && !document.containsKey(sortField))
787        {
788            document.addField(sortField, SolrIndexer.dateFormat().format(values[0]));
789        }
790    }
791    
792    /**
793     * Index a 'datetime' metadata
794     * @param metadata The parent composite metadata
795     * @param metadataName The name of metadata to index
796     * @param document The solr document to index into
797     * @param fieldName The index field name
798     * @param definition The metadata definition
799     */
800    public void indexDateTimeMetadata (CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition)
801    {
802        Date[] dateValues = metadata.getDateArray(metadataName, new Date[0]);
803        indexDateTimeValues(dateValues, document, fieldName);
804    }
805    
806    /**
807     * Index 'datetime' values
808     * @param values The values
809     * @param document The solr document to index into
810     * @param fieldName The index field name
811     */
812    public void indexDateTimeValues (Date[] values, SolrInputDocument document, String fieldName)
813    {
814        for (Date value : values)
815        {
816            document.addField(fieldName + "_dt", SolrIndexer.dateFormat().format(value));
817        }
818        
819        String sortField = fieldName + "_dt_sort";
820        if (values.length > 0 && !document.containsKey(sortField))
821        {
822            document.addField(sortField, SolrIndexer.dateFormat().format(values[0]));
823        }
824    }
825    
826    /**
827     * Index a 'double' metadata
828     * @param metadata The parent composite metadata
829     * @param metadataName The name of metadata to index
830     * @param document The solr document to index into
831     * @param fieldName The index field name
832     * @param definition The metadata definition
833     */
834    public void indexDoubleMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition)
835    {
836        boolean isFacetable = definition.getEnumerator() != null;
837        double[] values = metadata.getDoubleArray(metadataName, new double[0]);
838        indexDoubleValues (ArrayUtils.toObject(values), document, fieldName, isFacetable);
839    }
840    
841    /**
842     * Index 'double' values
843     * @param values The values
844     * @param document The solr document to index into
845     * @param fieldName The index field name
846     * @param isFacetable true if the field can be used as a facet.
847     */
848    public void indexDoubleValues(Double[] values, SolrInputDocument document, String fieldName, boolean isFacetable)
849    {
850        for (Double value : values)
851        {
852            document.addField(fieldName + "_d", value);
853            if (isFacetable)
854            {
855                document.addField(fieldName + "_d_dv", value);
856            }
857        }
858        
859        String sortField = fieldName + "_d_sort";
860        if (values.length > 0 && !document.containsKey(sortField))
861        {
862            document.addField(sortField, values[0]);
863        }
864    }
865    
866    /**
867     * Index a 'long' metadata
868     * @param metadata The parent composite metadata
869     * @param metadataName The name of metadata to index
870     * @param document The solr document to index into
871     * @param fieldName The index field name
872     * @param definition The metadata definition
873     */
874    public void indexLongMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition)
875    {
876        boolean isFacetable = definition.getEnumerator() != null;
877        long[] values = metadata.getLongArray(metadataName, new long[0]);
878        indexLongValues(ArrayUtils.toObject(values), document, fieldName, isFacetable);
879    }
880    
881    /**
882     * Index 'long' values
883     * @param values The values
884     * @param document The solr document to index into
885     * @param fieldName The index field name
886     * @param isFacetable true if the field can be used as a facet. 
887     */
888    public void indexLongValues(Long[] values, SolrInputDocument document, String fieldName, boolean isFacetable)
889    {
890        for (Long value : values)
891        {
892            document.addField(fieldName + "_l", value);
893            if (isFacetable)
894            {
895                document.addField(fieldName + "_l_dv", value);
896            }
897        }
898        
899        String sortField = fieldName + "_l_sort";
900        if (values.length > 0 && !document.containsKey(sortField))
901        {
902            document.addField(sortField, values[0]);
903        }
904    }
905    
906    /**
907     * Index a 'user' metadata
908     * @param metadata The parent composite metadata
909     * @param metadataName The name of metadata to index
910     * @param document The solr document to index into
911     * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself).
912     * @param fieldName The index field name
913     * @param language The content language.
914     * @param definition The metadata definition
915     */
916    public void indexUserMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, MetadataDefinition definition)
917    {
918        UserIdentity[] users = metadata.getUserArray(metadataName);
919        indexUserValues(users, document, contentDoc, fieldName, language);
920    }
921    
922    /**
923     * Index 'user' values
924     * @param users The users
925     * @param document The solr document to index into
926     * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself).
927     * @param fieldName The index field name
928     * @param language The content language.
929     */
930    public void indexUserValues(UserIdentity[] users, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language)
931    {
932        int count = 0;
933        for (UserIdentity userIdentity : users)
934        {
935            String fullName = _userHelper.getUserFullName(userIdentity);
936            String identityAsString = UserIdentity.userIdentityToString(userIdentity);
937            
938            indexFulltextValue(document, contentDoc, identityAsString, language);
939            
940            // Dynamic fields
941            document.addField(fieldName + "_s", identityAsString);
942            
943            if (StringUtils.isNotEmpty(fullName))
944            {
945                document.addField(fieldName + "_txt", fullName);
946                
947                indexFulltextValue(document, contentDoc, fullName, language);
948            }
949            
950            String sortField = SolrFieldHelper.getMetadataSortFieldName(MetadataType.STRING, fieldName);
951            if (count == 0 && StringUtils.isNotEmpty(fullName) && !document.containsKey(sortField))
952            {
953                // Index only first user for sorting
954                document.addField(sortField, SolrFieldHelper.getSortValue(fullName));
955            }
956            count++;
957        }
958    }
959    
960    /**
961     * Index a 'boolean' metadata
962     * @param metadata The parent composite metadata
963     * @param metadataName The name of metadata to index
964     * @param document The solr document to index into
965     * @param fieldName The index field name
966     * @param definition The metadata definition
967     */
968    public void indexBooleanMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition)
969    {
970        boolean[] values = metadata.getBooleanArray(metadataName, new boolean[0]);
971        indexBooleanValues(ArrayUtils.toObject(values), document, fieldName);
972    }
973    
974    /**
975     * Index 'boolean' values
976     * @param values The values
977     * @param document The solr document to index into
978     * @param fieldName The index field name
979     */
980    public void indexBooleanValues(Boolean[] values, SolrInputDocument document, String fieldName)
981    {
982        for (Boolean value : values)
983        {
984            document.addField(fieldName + "_b", value);
985        }
986        
987        String sortField = fieldName + "_b_sort";
988        if (values.length > 0 && !document.containsKey(sortField))
989        {
990            document.addField(sortField, values[0]);
991        }
992    }
993    
994    /**
995     * Index a 'richtext' metadata
996     * @param metadata The parent composite metadata
997     * @param metadataName The name of metadata to index
998     * @param document The solr document to index into
999     * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself).
1000     * @param fieldName The index field name
1001     * @param language The content language.
1002     * @param definition The metadata definition
1003     */
1004    public void indexRichtextMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, MetadataDefinition definition)
1005    {
1006        indexRichtextValue(metadata.getRichText(metadataName), document, contentDoc, fieldName, language);
1007    }
1008    
1009    /**
1010     * Index 'richtext' values
1011     * @param richText The rich text to index.
1012     * @param document The solr document to index into
1013     * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself).
1014     * @param fieldName The index field name.
1015     * @param language The content language.
1016     */
1017    public void indexRichtextValue(RichText richText, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language)
1018    {
1019        try (InputStream is = richText.getInputStream())
1020        {
1021            String value = _richTextToString(is);
1022            
1023            // Index as a text field.
1024            document.addField(fieldName + "_txt_" + language, value);
1025            document.addField(fieldName + "_txt_stemmed_" + language, value);
1026            document.addField(fieldName + "_txt_ws_" + language, value);
1027            
1028            // Index in the full-text value.
1029            SolrContentIndexer.indexFulltextValue(document, value, language);
1030            
1031            if (contentDoc != null)
1032            {
1033                SolrContentIndexer.indexFulltextValue(contentDoc, value, language);
1034            }
1035        }
1036        catch (Exception e)
1037        {
1038            getLogger().warn("Failed to index RICH_TEXT value'", e);
1039        }
1040    }
1041    
1042    /**
1043     * Gets a XML as a string and extract the text only
1044     * @param is The inputstream of XML
1045     * @return The text or null if the XML is not well formed
1046     */
1047    protected String _richTextToString(InputStream is)
1048    {
1049        try
1050        {
1051            RichTextHandler txtHandler = new RichTextHandler();
1052            _parser.parse(new InputSource(is), txtHandler);
1053            return txtHandler.getValue().trim();
1054        }
1055        catch (IOException | SAXException e)
1056        {
1057            getLogger().error("Cannot parse inputstream", e);
1058            return null;
1059        }
1060    }
1061    
1062    
1063   
1064    /**
1065     * Index a 'binary' metadata
1066     * @param metadata The parent composite metadata
1067     * @param metadataName The name of metadata to index
1068     * @param document The solr document to index into
1069     * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself).
1070     * @param fieldName The index field name
1071     * @param language The content language.
1072     * @param definition The metadata definition
1073     */
1074    public void indexBinaryMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, MetadataDefinition definition)
1075    {
1076        // Index file name.
1077        BinaryMetadata binary = metadata.getBinaryMetadata(metadataName);
1078        document.addField(fieldName + "_txt", binary.getFilename());
1079        
1080        // Index the contents.
1081        indexFullTextBinary(metadata, metadataName, document, contentDoc, fieldName, language, definition);
1082    }
1083    
1084    /**
1085     * Index a 'file' metadata
1086     * @param metadata The parent composite metadata
1087     * @param metadataName The name of metadata to index
1088     * @param document The solr document to index into
1089     * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself).
1090     * @param fieldName The index field name
1091     * @param language The content language.
1092     * @param definition The metadata definition
1093     */
1094    public void indexFileMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, MetadataDefinition definition)
1095    {
1096        if (org.ametys.plugins.repository.metadata.CompositeMetadata.MetadataType.BINARY.equals(metadata.getType(metadataName)))
1097        {
1098            indexBinaryMetadata(metadata, metadataName, document, contentDoc, fieldName, language, definition);
1099        }
1100        else
1101        {
1102            // Resource from the explorer.
1103            String value = metadata.getString(metadataName);
1104            
1105            try
1106            {
1107                Resource resource = (Resource) _resolver.resolveById(value);
1108                
1109                // Index file name.
1110                document.addField(fieldName + "_txt", resource.getName());
1111                
1112                // Index the contents.
1113                indexResourceContent(resource, document, contentDoc, language);
1114                
1115//                document.addField(prefix + fieldName + "$path", resource.getId());
1116//                document.addField(prefix + fieldName + "$type", "explorer");
1117//                document.addField(prefix + fieldName + "$mime-type", resource.getMimeType());
1118//                document.addField(prefix + fieldName + "$filename", filename);
1119//                document.addField(prefix + fieldName + "$lastModified", resource.getLastModified());
1120//                document.addField(prefix + fieldName + "$size", resource.getLength());
1121//                
1122//                String viewUrl = "/plugins/explorer/resource?id=" + resource.getId();
1123//                document.addField(prefix + fieldName + "$viewUrl", viewUrl);
1124//                document.addField(prefix + fieldName + "$downloadUrl", viewUrl + "&download=true");
1125            }
1126            catch (AmetysRepositoryException e)
1127            {
1128                getLogger().warn(String.format("Unable to index the resource of id '%s' : resource does not exist.", value), e);
1129            }
1130        }
1131    }
1132    
1133    /**
1134     * Index a 'file' metadata
1135     * @param values The values.
1136     * @param document The solr document to index into
1137     * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself).
1138     * @param fieldName The index field name
1139     * @param language The content language.
1140     */
1141    public void indexFileValue(Object[] values, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language)
1142    {
1143        String type = (String) values[0];
1144        if (org.ametys.plugins.repository.metadata.CompositeMetadata.MetadataType.BINARY.equals(type))
1145        {
1146            indexFullTextBinaryValue((InputStream) values[1], document, contentDoc, fieldName, language);
1147        }
1148        else
1149        {
1150            indexResourceContent((Resource) values[1], document, contentDoc, language);
1151        }
1152    }
1153    
1154    /**
1155     * Index a 'binary' metadata
1156     * @param metadata The parent composite metadata
1157     * @param metadataName The name of metadata to index
1158     * @param document The solr document to index into
1159     * @param contentDoc The content document.
1160     * @param fieldName The index field name
1161     * @param language The content language.
1162     * @param definition The metadata definition
1163     */
1164    protected void indexFullTextBinary(CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, MetadataDefinition definition)
1165    {
1166        try (InputStream is = metadata.getBinaryMetadata(metadataName).getInputStream())
1167        {
1168            indexFullTextBinaryValue(is, document, contentDoc, fieldName, language);
1169        }
1170        catch (IOException e)
1171        {
1172            throw new RuntimeException(e);
1173        }
1174    }
1175    
1176    /**
1177     * Index a 'binary' value
1178     * @param is An InputStream on the binary data.
1179     * @param document The solr document to index into
1180     * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself).
1181     * @param fieldName The index field name
1182     * @param language The content language.
1183     */
1184    protected void indexFullTextBinaryValue(InputStream is, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language)
1185    {
1186        try
1187        {
1188            String text = _tika.parseToString(is);
1189            
1190            indexFulltextValue(document, contentDoc, text, language);
1191        }
1192        catch (Throwable e)
1193        {
1194            getLogger().warn(String.format("Failed to index binary field '%s'", fieldName), e);
1195        }
1196    }
1197    
1198    /**
1199     * Index a 'content' metadata
1200     * @param metadata The parent composite metadata
1201     * @param metadataName The name of metadata to index
1202     * @param document The solr document to index into
1203     * @param fieldName The index field name
1204     * @param definition The metadata definition
1205     */
1206    public void indexContentMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition)
1207    {
1208        String[] contentIds = metadata.getStringArray(metadataName, new String[0]);
1209        indexContentValues(contentIds, document, fieldName);
1210    }
1211    
1212    /**
1213     * Index content values.
1214     * @param contentIds The ID of the contents to index.
1215     * @param document The solr document to index into.
1216     * @param fieldName the field name.
1217     */
1218    public void indexContentValues (String[] contentIds, SolrInputDocument document, String fieldName)
1219    {
1220        for (String contentId : contentIds)
1221        {
1222            document.addField(fieldName + "_s", contentId);
1223            // Facets
1224            document.addField(fieldName + "_s_dv", contentId);
1225        }
1226        
1227        String sortField = SolrFieldHelper.getMetadataSortFieldName(MetadataType.STRING, fieldName);
1228        if (contentIds.length > 0 && !document.containsKey(sortField))
1229        {
1230            try
1231            {
1232                // TODO Est-ce qu'on peut faire autrement qu'un resolve ?
1233                Content content = _resolver.resolveById(contentIds[0]);
1234                document.addField(sortField, SolrFieldHelper.getSortValue(content.getTitle()));
1235            }
1236            catch (AmetysRepositoryException e)
1237            {
1238                // Do not index
1239            }
1240        }
1241    }
1242    
1243    /**
1244     * Index a 'sub_content' metadata
1245     * @param metadata The parent composite metadata
1246     * @param metadataName The name of metadata to index
1247     * @param document The solr document to index into
1248     * @param fieldName The index field name
1249     * @param definition The metadata definition
1250     */
1251    public void indexSubContentMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition)
1252    {
1253        TraversableAmetysObject objectCollection = metadata.getObjectCollection(metadataName);
1254        AmetysObjectIterable<Content> subcontents = objectCollection.getChildren();
1255        for (Content subcontent : subcontents)
1256        {
1257            document.addField(fieldName + "_s", subcontent.getId());
1258            // Facets
1259            document.addField(fieldName + "_s_dv", subcontent.getId());
1260        }
1261        
1262//        String sortField = fieldName + "_s_sort";
1263        String sortField = SolrFieldHelper.getMetadataSortFieldName(MetadataType.STRING, fieldName);
1264        subcontents = objectCollection.getChildren();
1265        Iterator<Content> it = subcontents.iterator();
1266        
1267        if (it.hasNext() && !document.containsKey(sortField))
1268        {
1269            Content subcontent = it.next();
1270            document.addField(sortField, SolrFieldHelper.getSortValue(subcontent.getTitle()));
1271        }
1272    }
1273    
1274    /**
1275     * Index a 'geocode' metadata
1276     * @param metadata The parent composite metadata
1277     * @param metadataName The name of metadata to index
1278     * @param document The solr document to index into
1279     * @param fieldName The index field name
1280     * @param definition The metadata definition
1281     */
1282    public void indexGeoCodeMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition)
1283    {
1284        CompositeMetadata geoCodeMetadata = metadata.getCompositeMetadata(metadataName);
1285        if (geoCodeMetadata.hasMetadata("longitude") && geoCodeMetadata.hasMetadata("latitude"))
1286        {
1287            double longitude = geoCodeMetadata.getDouble("longitude");
1288            double latitude = geoCodeMetadata.getDouble("latitude");
1289            
1290            indexGeocodeValue(latitude, longitude, document, fieldName);
1291        }
1292    }
1293    
1294    /**
1295     * Index a 'geocode' metadata
1296     * @param latitude the coord latitude.
1297     * @param longitude the coord longitude.
1298     * @param document The solr document to index into
1299     * @param fieldName The index field name
1300     */
1301    public void indexGeocodeValue(double latitude, double longitude, SolrInputDocument document, String fieldName)
1302    {
1303        document.addField(fieldName + "$longitude_d", longitude);
1304        document.addField(fieldName + "$latitude_d", latitude);
1305        
1306        String geoFieldName = SolrFieldHelper.getIndexingFieldName(MetadataType.GEOCODE, fieldName);
1307        document.addField(geoFieldName, longitude + " " + latitude);
1308    }
1309    
1310    /**
1311     * Index a composite metadata, i.e. browse and index the sub-metadatas.
1312     * @param content The content being indexed.
1313     * @param metadata The parent metadata.
1314     * @param metadataName The composite metadata name.
1315     * @param document The solr document to index into.
1316     * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself).
1317     * @param fieldName The field name.
1318     * @param definition The composite metadata definition.
1319     * @param additionalDocuments The solr additional documents used for repeater instance
1320     */
1321    public void indexCompositeMetadata(Content content, CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, MetadataDefinition definition, List<SolrInputDocument> additionalDocuments)
1322    {
1323        CompositeMetadata compositeMetadata = metadata.getCompositeMetadata(metadataName);
1324        
1325        // Index recursively
1326        Set<String> subMetadataNames = definition.getMetadataNames();
1327        for (String subMetadataName : subMetadataNames)
1328        {
1329            if (compositeMetadata.hasMetadata(subMetadataName))
1330            {
1331                indexMetadata(content, subMetadataName, compositeMetadata, document, contentDoc, additionalDocuments, fieldName + ContentConstants.METADATA_PATH_SEPARATOR + subMetadataName, definition.getMetadataDefinition(subMetadataName));
1332            }
1333        }
1334    }
1335    
1336    /**
1337     * Index a repeater metadata, i.e. browse and index the entries.
1338     * @param content The content being indexed.
1339     * @param metadata The parent metadata.
1340     * @param metadataName The repeater metadata name.
1341     * @param document The solr document to index into.
1342     * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself).
1343     * @param fieldName The field name.
1344     * @param definition The repeater metadata definition.
1345     * @param additionalDocuments The solr additional documents used for repeater instance
1346     */
1347    public void indexRepeaterMetadata(Content content, CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, MetadataDefinition definition, List<SolrInputDocument> additionalDocuments)
1348    {
1349        CompositeMetadata compositeMetadata = metadata.getCompositeMetadata(metadataName);
1350        
1351        // Get and sort the entry names.
1352        String[] entries = compositeMetadata.getMetadataNames();
1353        Arrays.sort(entries, MetadataManager.REPEATER_ENTRY_COMPARATOR);
1354        
1355        for (int i = 0; i < entries.length; i++)
1356        {
1357            String entryName = entries[i];
1358            int position = i + 1;
1359            
1360            CompositeMetadata entry = compositeMetadata.getCompositeMetadata(entryName);
1361            
1362            String repeaterID = document.getField("id").getFirstValue().toString() + "/" + fieldName + "/" + entryName;
1363            
1364            // Creates a new Solr document for each entry
1365            SolrInputDocument repDocument = new SolrInputDocument();
1366            repDocument.addField("id", repeaterID);
1367            document.addField(fieldName + "_s_dv", repeaterID);
1368            
1369            repDocument.addField(DOCUMENT_TYPE, TYPE_REPEATER);
1370            repDocument.addField(REPEATER_ENTRY_POSITION, position);
1371            // Add the created document to additional documents
1372            additionalDocuments.add(repDocument);
1373            
1374            SolrInputDocument parentContentDoc = contentDoc != null ? contentDoc : document; 
1375            
1376            Set<String> subMetadataNames = definition.getMetadataNames();
1377            for (String subMetadataName : subMetadataNames)
1378            {
1379                if (entry.hasMetadata(subMetadataName))
1380                {
1381                    // Created document is now the main document
1382                    indexMetadata(content, subMetadataName, entry, repDocument, parentContentDoc, additionalDocuments, subMetadataName, definition.getMetadataDefinition(subMetadataName));
1383                }
1384            }
1385        }
1386    }
1387    
1388    /**
1389     * Index the content of a resource.
1390     * @param resource The resource
1391     * @param document The solr document to index into
1392     * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself).
1393     * @param language The content language.
1394     */
1395    protected void indexResourceContent(Resource resource, SolrInputDocument document, SolrInputDocument contentDoc, String language)
1396    {
1397        try (InputStream is = resource.getInputStream())
1398        {
1399            indexResourceContentValue(is, resource.getDCSubject(), resource.getDCDescription(), language, document, contentDoc);
1400            
1401            // TODO Declare and index DC metadata?
1402            // DC meta
1403//            _resourceIndexer.indexDublinCoreMetadata(resource, document);
1404        }
1405        catch (Exception e)
1406        {
1407            getLogger().error("Unable to index resource at " + resource.getPath(), e);
1408        }
1409    }
1410    
1411    /**
1412     * Index the content of a resource.
1413     * @param is An input stream on the resource content.
1414     * @param keywords The resource keywords.
1415     * @param description The resource description.
1416     * @param language The content language.
1417     * @param document The solr document to index into
1418     * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself).
1419     * @throws TikaException If an error occurs extracting the document's text content.
1420     * @throws IOException If an error occurs reading the document's text content.
1421     */
1422    protected void indexResourceContentValue(InputStream is, String[] keywords, String description, String language, SolrInputDocument document, SolrInputDocument contentDoc) throws IOException, TikaException
1423    {
1424        String value = _tika.parseToString(is);
1425        
1426        indexFulltextValue(document, contentDoc, value, language);
1427        
1428        for (String keyword : keywords)
1429        {
1430            indexFulltextValue(document, contentDoc, keyword, language);
1431        }
1432        
1433        if (description != null)
1434        {
1435            indexFulltextValue(document, contentDoc, description, language);
1436        }
1437    }
1438    
1439    /**
1440     * Index a full-text value.
1441     * @param mainDocument The document being used, can be either the content document itself or a repeater document.
1442     * @param contentDoc The parent content document. If the mainDocument is the content document, this will be null.
1443     * @param text The text to index.
1444     * @param language The content language.
1445     */
1446    protected void indexFulltextValue(SolrInputDocument mainDocument, SolrInputDocument contentDoc, String text, String language)
1447    {
1448        indexFulltextValue(mainDocument, text, language);
1449        
1450        // The content doc is null if the main document is the content doc (to prevent indexing the data twice).
1451        if (contentDoc != null)
1452        {
1453            indexFulltextValue(contentDoc, text, language);
1454        }
1455    }
1456    
1457    /**
1458     * Index a full-text value.
1459     * @param document The document to index into.
1460     * @param text The text to index.
1461     * @param language The content language.
1462     */
1463    public static void indexFulltextValue(SolrInputDocument document, String text, String language)
1464    {
1465        if (StringUtils.isNotBlank(text))
1466        {
1467            document.addField(FULL_GENERAL, text);
1468            document.addField(FULL_EXACT_WS, text);
1469            
1470            indexLanguageFulltextValue(document, text, language);
1471        }
1472    }
1473    
1474    /**
1475     * Index a full-text value.
1476     * @param document The document to index into.
1477     * @param text The text to index.
1478     * @param languages The languages.
1479     */
1480    public static void indexFulltextValue(SolrInputDocument document, String text, Collection<String> languages)
1481    {
1482        if (StringUtils.isNotBlank(text))
1483        {
1484            document.addField(FULL_GENERAL, text);
1485            document.addField(FULL_EXACT_WS, text);
1486            
1487            for (String language : languages)
1488            {
1489                indexLanguageFulltextValue(document, text, language);
1490            }
1491        }
1492    }
1493    
1494    /**
1495     * Index a full-text value in the language-specific fields.
1496     * @param document The document to index into.
1497     * @param text The text to index.
1498     * @param language The content language.
1499     */
1500    protected static void indexLanguageFulltextValue(SolrInputDocument document, String text, String language)
1501    {
1502        document.addField(FULL_PREFIX + language, text);
1503        document.addField(FULL_STEMMED_PREFIX + language, text);
1504    }
1505}