001/* 002 * Copyright 2015 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.cms.content.indexing.solr; 017 018import java.io.IOException; 019import java.io.InputStream; 020import java.util.Arrays; 021import java.util.Collection; 022import java.util.Date; 023import java.util.HashMap; 024import java.util.Iterator; 025import java.util.List; 026import java.util.Locale; 027import java.util.Map; 028import java.util.Map.Entry; 029import java.util.Optional; 030import java.util.Set; 031import java.util.stream.Collectors; 032import java.util.stream.Stream; 033 034import org.apache.avalon.framework.component.Component; 035import org.apache.avalon.framework.service.ServiceException; 036import org.apache.avalon.framework.service.ServiceManager; 037import org.apache.avalon.framework.service.Serviceable; 038import org.apache.commons.lang3.ArrayUtils; 039import org.apache.commons.lang3.StringUtils; 040import org.apache.excalibur.xml.sax.SAXParser; 041import org.apache.solr.common.SolrInputDocument; 042import org.apache.tika.Tika; 043import org.apache.tika.exception.TikaException; 044import org.xml.sax.InputSource; 045import org.xml.sax.SAXException; 046 047import org.ametys.cms.content.ContentHelper; 048import org.ametys.cms.content.RichTextHandler; 049import org.ametys.cms.content.indexing.solr.content.attachment.ContentVisibleAttachmentIndexerExtensionPoint; 050import org.ametys.cms.content.references.OutgoingReferences; 051import org.ametys.cms.content.references.OutgoingReferencesExtractor; 052import org.ametys.cms.contenttype.ContentConstants; 053import org.ametys.cms.contenttype.ContentType; 054import org.ametys.cms.contenttype.ContentTypeExtensionPoint; 055import org.ametys.cms.contenttype.ContentTypesHelper; 056import org.ametys.cms.contenttype.MetadataDefinition; 057import org.ametys.cms.contenttype.MetadataManager; 058import org.ametys.cms.contenttype.MetadataType; 059import org.ametys.cms.contenttype.RepeaterDefinition; 060import org.ametys.cms.contenttype.indexing.CustomIndexingField; 061import org.ametys.cms.contenttype.indexing.CustomMetadataIndexingField; 062import org.ametys.cms.contenttype.indexing.IndexingField; 063import org.ametys.cms.contenttype.indexing.IndexingModel; 064import org.ametys.cms.contenttype.indexing.MetadataIndexingField; 065import org.ametys.cms.languages.Language; 066import org.ametys.cms.languages.LanguagesManager; 067import org.ametys.cms.repository.Content; 068import org.ametys.cms.search.model.SystemProperty; 069import org.ametys.cms.search.model.SystemPropertyExtensionPoint; 070import org.ametys.core.user.UserIdentity; 071import org.ametys.plugins.core.user.UserHelper; 072import org.ametys.plugins.explorer.resources.Resource; 073import org.ametys.plugins.explorer.resources.metadata.TikaProvider; 074import org.ametys.plugins.repository.AmetysObject; 075import org.ametys.plugins.repository.AmetysObjectIterable; 076import org.ametys.plugins.repository.AmetysObjectResolver; 077import org.ametys.plugins.repository.AmetysRepositoryException; 078import org.ametys.plugins.repository.TraversableAmetysObject; 079import org.ametys.plugins.repository.UnknownAmetysObjectException; 080import org.ametys.plugins.repository.metadata.BinaryMetadata; 081import org.ametys.plugins.repository.metadata.CompositeMetadata; 082import org.ametys.plugins.repository.metadata.MultilingualString; 083import org.ametys.plugins.repository.metadata.MultilingualStringHelper; 084import org.ametys.plugins.repository.metadata.RichText; 085import org.ametys.runtime.plugin.component.AbstractLogEnabled; 086 087/** 088 * Component for {@link Content} indexing into a Solr server. 089 */ 090public class SolrContentIndexer extends AbstractLogEnabled implements Component, Serviceable, SolrFieldNames 091{ 092 /** The component role. */ 093 public static final String ROLE = SolrContentIndexer.class.getName(); 094 095 /** The Ametys objet resolver */ 096 protected AmetysObjectResolver _resolver; 097 /** The content type extension point */ 098 protected ContentTypeExtensionPoint _cTypeEP; 099 /** The content type helper */ 100 protected ContentTypesHelper _cTypesHelper; 101 /** The users manager */ 102 protected UserHelper _userHelper; 103 /** The Tika instance */ 104 protected Tika _tika; 105 /** The resource indexer */ 106 protected SolrResourceIndexer _resourceIndexer; 107 /** The system property extension point. */ 108 protected SystemPropertyExtensionPoint _systemPropEP; 109 /** The content helper */ 110 protected ContentHelper _contentHelper; 111 /** The outgoing references extractor */ 112 protected OutgoingReferencesExtractor _outgoingReferencesExtractor; 113 /** The extension point for ContentVisibleAttachmentIndexers */ 114 protected ContentVisibleAttachmentIndexerExtensionPoint _contentVisibleAttachmentIndexerEP; 115 /** The manager for languages */ 116 protected LanguagesManager _languagesManager; 117 /** Avalon service manager */ 118 protected ServiceManager _manager; 119 120 @Override 121 public void service(ServiceManager manager) throws ServiceException 122 { 123 _manager = manager; 124 _resolver = (AmetysObjectResolver) manager.lookup(AmetysObjectResolver.ROLE); 125 _resourceIndexer = (SolrResourceIndexer) manager.lookup(SolrResourceIndexer.ROLE); 126 _cTypeEP = (ContentTypeExtensionPoint) manager.lookup(ContentTypeExtensionPoint.ROLE); 127 _contentHelper = (ContentHelper) manager.lookup(ContentHelper.ROLE); 128 _cTypesHelper = (ContentTypesHelper) manager.lookup(ContentTypesHelper.ROLE); 129 _userHelper = (UserHelper) manager.lookup(UserHelper.ROLE); 130 TikaProvider tikaProvider = (TikaProvider) manager.lookup(TikaProvider.ROLE); 131 _tika = tikaProvider.getTika(); 132 _systemPropEP = (SystemPropertyExtensionPoint) manager.lookup(SystemPropertyExtensionPoint.ROLE); 133 _outgoingReferencesExtractor = (OutgoingReferencesExtractor) manager.lookup(OutgoingReferencesExtractor.ROLE); 134 _contentVisibleAttachmentIndexerEP = (ContentVisibleAttachmentIndexerExtensionPoint) manager.lookup(ContentVisibleAttachmentIndexerExtensionPoint.ROLE); 135 _languagesManager = (LanguagesManager) manager.lookup(LanguagesManager.ROLE); 136 } 137 138 /** 139 * Populate a solr input document by adding fields to index into it. 140 * @param content The content to index 141 * @param document The main solr document to index into 142 * @param additionalDocuments The additional documents for repeater instances 143 * @throws Exception if an error occurred while indexing 144 */ 145 public void indexContent(Content content, SolrInputDocument document, List<SolrInputDocument> additionalDocuments) throws Exception 146 { 147 // Properties specific to a stand-alone indexation. 148 String contentId = content.getId(); 149 document.addField(ID, contentId); 150 document.addField(DOCUMENT_TYPE, TYPE_CONTENT); 151 152 indexContentTitle(content, document); 153 154 document.addField(CONTENT_NAME, SolrIndexer.truncateUtf8StringValue(content.getName(), getLogger(), contentId, CONTENT_NAME)); 155 _indexOutgoingReferences(content, document); 156 _indexVisibleAttachments(content, document); 157 158 document.addField(WORKFLOW_REF_DV, contentId + "#workflow"); 159 160 // Index content system properties. 161 indexSystemProperties(content, document); 162 163 // Index the fields specified in the indexation model. 164 indexModelFields(content, document, additionalDocuments); 165 } 166 167 private void _indexOutgoingReferences(Content content, SolrInputDocument document) 168 { 169 // Found by the extractor (resource references found in all data of the content) 170 _outgoingReferencesExtractor.getOutgoingReferences(content).values() // key is the data path, we do not care what data it comes from 171 .parallelStream() 172 .map(OutgoingReferences::entrySet) 173 .flatMap(Set::parallelStream) 174 .filter(outgoingRefs -> outgoingRefs.getKey().equals("explorer")) // only references of the resource explorer 175 .map(Entry::getValue) 176 .flatMap(List::parallelStream) // flat the resource ids 177 .forEach(resourceId -> document.addField(CONTENT_OUTGOING_REFEERENCES_RESOURCE_IDS, resourceId)); 178 179 // Attachments of the content (just the root folder) 180 Optional.ofNullable(content.getRootAttachments()) 181 .map(AmetysObject::getId) 182 .ifPresent(id -> document.addField(CONTENT_OUTGOING_REFEERENCES_RESOURCE_IDS, id)); 183 } 184 185 private void _indexVisibleAttachments(Content content, SolrInputDocument document) 186 { 187 Collection<String> values = _contentVisibleAttachmentIndexerEP.getExtensionsIds() 188 .stream() 189 .map(_contentVisibleAttachmentIndexerEP::getExtension) 190 .map(attachmentIndexer -> attachmentIndexer.getVisibleAttachmentIds(content)) 191 .flatMap(Collection::stream) 192 .collect(Collectors.toList()); 193 document.addField(CONTENT_VISIBLE_ATTACHMENT_RESOURCE_IDS, values); 194 } 195 196 /** 197 * Index the content title 198 * @param content The title 199 * @param document The main solr document to index into 200 */ 201 protected void indexContentTitle(Content content, SolrInputDocument document) 202 { 203 if (content.getMetadataHolder().getType(Content.ATTRIBUTE_TITLE) == org.ametys.plugins.repository.metadata.CompositeMetadata.MetadataType.MULTILINGUAL_STRING) 204 { 205 MultilingualString value = content.getMetadataHolder().getMultilingualString(Content.ATTRIBUTE_TITLE); 206 indexMultilingualStringValues(value, content.getId(), document, null, TITLE); 207 } 208 else 209 { 210 String title = _contentHelper.getTitle(content); 211 document.addField(TITLE, SolrIndexer.truncateUtf8StringValue(title, getLogger(), content.getId(), TITLE)); 212 document.addField(TITLE_SORT, title); 213 } 214 } 215 216 /** 217 * Index the system properties of a content. 218 * @param content The content to index. 219 * @param document The solr document to index into. 220 */ 221 protected void indexSystemProperties(Content content, SolrInputDocument document) 222 { 223 for (String sysPropId : _systemPropEP.getExtensionsIds()) 224 { 225 SystemProperty sysProp = _systemPropEP.getExtension(sysPropId); 226 227 sysProp.index(content, document); 228 } 229 } 230 231 /** 232 * Index the content type and all its supertypes in the given document (recursively). 233 * @param cTypeId The ID of the content type to index. 234 * @param document The solr document to index into. 235 * @param fieldName The field name. 236 */ 237 protected void indexAllContentTypes(String cTypeId, SolrInputDocument document, String fieldName) 238 { 239 document.addField(fieldName, cTypeId); 240 241 if (_cTypeEP.hasExtension(cTypeId)) 242 { 243 ContentType contentType = _cTypeEP.getExtension(cTypeId); 244 for (String supertypeId : contentType.getSupertypeIds()) 245 { 246 indexAllContentTypes(supertypeId, document, fieldName); 247 } 248 } 249 } 250 251 /** 252 * Index the fields specified in the indexation model. 253 * @param content The content to index. 254 * @param document The main content solr document. 255 * @param additionalDocuments The additional documents for repeater instances. 256 */ 257 protected void indexModelFields(Content content, SolrInputDocument document, List<SolrInputDocument> additionalDocuments) 258 { 259 IndexingModel indexingModel = null; 260 try 261 { 262 indexingModel = _cTypesHelper.getIndexingModel(content); 263 } 264 catch (RuntimeException e) 265 { 266 getLogger().error("indexContent > Error getting the indexing model of content " + content.getId(), e); 267 throw e; 268 } 269 270 for (IndexingField field : indexingModel.getFields()) 271 { 272 if (field instanceof CustomIndexingField) 273 { 274 Object[] values = ((CustomIndexingField) field).getValues(content); 275 indexValues(content, field.getName(), field.getType(), values, document, null); 276 } 277 else if (field instanceof MetadataIndexingField) 278 { 279 String metadataPath = ((MetadataIndexingField) field).getMetadataPath(); 280 String[] pathSegments = metadataPath.split(ContentConstants.METADATA_PATH_SEPARATOR); 281 282 MetadataDefinition definition = _cTypesHelper.getMetadataDefinition(pathSegments[0], content.getTypes(), content.getMixinTypes()); 283 if (definition != null) 284 { 285 findAndIndexMetadata(content, pathSegments, content.getMetadataHolder(), definition, field, field.getName(), document, null, additionalDocuments); 286 } 287 } 288 } 289 } 290 291 /** 292 * Populate a Solr input document by adding fields for a single system property. 293 * @param content The content to index 294 * @param propertyId The system property ID. 295 * @param document The solr document 296 * @return true if there are partial update to apply 297 * @throws Exception if an error occurred 298 */ 299 public boolean indexPartialSystemProperty(Content content, String propertyId, SolrInputDocument document) throws Exception 300 { 301 if (!_systemPropEP.hasExtension(propertyId)) 302 { 303 throw new IllegalStateException("The property '" + propertyId + "' can't be indexed as it does not exist."); 304 } 305 306 SolrInputDocument tempDocument = new SolrInputDocument(); 307 308 SystemProperty property = _systemPropEP.getExtension(propertyId); 309 property.index(content, tempDocument); 310 311 if (tempDocument.isEmpty()) 312 { 313 // Does not have any partial update to apply, avoid to erase all the existing fields on the Solr document corresponding to this content (it would be lost) 314 return false; 315 } 316 317 // Copy the indexed values as partial updates. 318 for (String fieldName : tempDocument.getFieldNames()) 319 { 320 Collection<Object> fieldValues = tempDocument.getFieldValues(fieldName); 321 322 Map<String, Object> partialUpdate = new HashMap<>(); 323 partialUpdate.put("set", fieldValues); 324 document.addField(fieldName, partialUpdate); 325 } 326 327 document.addField("id", content.getId()); 328 329 return true; 330 } 331 332 /** 333 * Find the metadata to index from its path 334 * @param content the content currently being traversed. 335 * @param pathSegments The segments of path of metadata to index 336 * @param metadata The parent composite metadata 337 * @param definition The metadata definition 338 * @param field the current indexing field. 339 * @param fieldName the name of the field to index. 340 * @param document The main solr document to index into 341 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 342 * @param additionalDocuments The additional documents 343 */ 344 protected void findAndIndexMetadata(Content content, String[] pathSegments, CompositeMetadata metadata, MetadataDefinition definition, IndexingField field, String fieldName, SolrInputDocument document, SolrInputDocument contentDoc, List<SolrInputDocument> additionalDocuments) 345 { 346 String currentFieldName = pathSegments[0]; 347 348 IndexingModel indexingModel = null; 349 try 350 { 351 indexingModel = _cTypesHelper.getIndexingModel(content); 352 } 353 catch (RuntimeException e) 354 { 355 if (content != null) 356 { 357 getLogger().error("findAndIndexMetadata > Error while indexing content " + content.getId() + " metadata", e); 358 } 359 else 360 { 361 getLogger().error("findAndIndexMetadata > Error while indexing null content metadata"); 362 } 363 throw e; 364 } 365 366 IndexingField refField = indexingModel.getField(currentFieldName); 367 if (refField != null && refField instanceof CustomMetadataIndexingField) 368 { 369 CustomMetadataIndexingField overridingField = (CustomMetadataIndexingField) refField; 370 findAndIndexOverridingField(content, indexingModel, overridingField, fieldName, definition, pathSegments, document, contentDoc, additionalDocuments); 371 } 372 else 373 { 374 if (metadata.hasMetadata(currentFieldName)) 375 { 376 findAndIndexMetadataField(content, pathSegments, metadata, definition, field, fieldName, document, contentDoc, additionalDocuments); 377 } 378 } 379 } 380 381 /** 382 * Find and index a metadata. 383 * @param content the current content being traversed. 384 * @param pathSegments the full metadata path segments. 385 * @param metadata the current metadata holder. 386 * @param definition the current metadata definition. 387 * @param field the current indexing field. 388 * @param fieldName the name of the field to index. 389 * @param document the solr main document. 390 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 391 * @param additionalDocuments the solr additional documents. 392 */ 393 protected void findAndIndexMetadataField(Content content, String[] pathSegments, CompositeMetadata metadata, MetadataDefinition definition, IndexingField field, String fieldName, SolrInputDocument document, SolrInputDocument contentDoc, List<SolrInputDocument> additionalDocuments) 394 { 395 String currentFieldName = pathSegments[0]; 396 397 if (pathSegments.length == 1) 398 { 399 indexMetadata(content, currentFieldName, metadata, document, contentDoc, additionalDocuments, fieldName, definition); 400 return; 401 } 402 403 String[] followingSegments = ArrayUtils.subarray(pathSegments, 1, pathSegments.length); 404 405 switch (definition.getType()) 406 { 407 case BINARY: 408 case BOOLEAN: 409 case STRING: 410 case MULTILINGUAL_STRING: 411 case USER: 412 case LONG: 413 case DOUBLE: 414 case DATE: 415 case DATETIME: 416 case REFERENCE: 417 case RICH_TEXT: 418 case FILE: 419 case GEOCODE: 420 getLogger().warn("The metadata '{}' of type {} can not be a part of a path to index : {}", currentFieldName, definition.getType().toString(), StringUtils.join(pathSegments, ContentConstants.METADATA_PATH_SEPARATOR)); 421 break; 422 case CONTENT: 423 String[] contentIds = metadata.getStringArray(currentFieldName, new String[0]); 424 for (String contentId : contentIds) 425 { 426 try 427 { 428 Content refContent = _resolver.resolveById(contentId); 429 MetadataDefinition remoteMetadataDef = _cTypesHelper.getMetadataDefinition(followingSegments[0], refContent.getTypes(), refContent.getMixinTypes()); 430 findAndIndexMetadata(refContent, followingSegments, refContent.getMetadataHolder(), remoteMetadataDef, field, fieldName, document, contentDoc, additionalDocuments); 431 } 432 catch (UnknownAmetysObjectException e) 433 { 434 // Nothing to index 435 } 436 } 437 break; 438 case SUB_CONTENT: 439 TraversableAmetysObject objectCollection = metadata.getObjectCollection(currentFieldName); 440 AmetysObjectIterable<Content> subcontents = objectCollection.getChildren(); 441 for (Content subcontent : subcontents) 442 { 443 MetadataDefinition remoteMetadataDef = _cTypesHelper.getMetadataDefinition(followingSegments[0], subcontent.getTypes(), subcontent.getMixinTypes()); 444 findAndIndexMetadata(subcontent, followingSegments, subcontent.getMetadataHolder(), remoteMetadataDef, field, fieldName, document, contentDoc, additionalDocuments); 445 } 446 break; 447 case COMPOSITE: 448 CompositeMetadata composite = metadata.getCompositeMetadata(currentFieldName); 449 450 if (definition instanceof RepeaterDefinition) 451 { 452 String[] entries = composite.getMetadataNames(); 453 for (String entry : entries) 454 { 455 findAndIndexMetadata(content, followingSegments, composite.getCompositeMetadata(entry), definition.getMetadataDefinition(followingSegments[0]), field, fieldName, document, contentDoc, additionalDocuments); 456 } 457 } 458 else 459 { 460 findAndIndexMetadata(content, followingSegments, composite, definition.getMetadataDefinition(followingSegments[0]), field, fieldName, document, contentDoc, additionalDocuments); 461 } 462 break; 463 default: 464 break; 465 466 } 467 } 468 469 /** 470 * Find and index a property represented by an overriding field. 471 * @param content the current content being traversed. 472 * @param indexingModel the current indexing model. 473 * @param pathSegments the full metadata path segments. 474 * @param definition the current metadata definition. 475 * @param field the current indexing field. 476 * @param fieldName the name of the field to index. 477 * @param document the solr main document. 478 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 479 * @param additionalDocuments the solr additional documents. 480 */ 481 protected void findAndIndexOverridingField(Content content, IndexingModel indexingModel, CustomMetadataIndexingField field, String fieldName, MetadataDefinition definition, String[] pathSegments, SolrInputDocument document, SolrInputDocument contentDoc, List<SolrInputDocument> additionalDocuments) 482 { 483 String currentFieldName = field.getName(); 484 485 if (pathSegments.length == 1) 486 { 487 indexOverridingField(field, content, fieldName, document, contentDoc, additionalDocuments); 488 return; 489 } 490 491 String[] followingSegments = ArrayUtils.subarray(pathSegments, 1, pathSegments.length); 492 MetadataType type = definition.getType(); 493 494 switch (type) 495 { 496 case BINARY: 497 case BOOLEAN: 498 case STRING: 499 case MULTILINGUAL_STRING: 500 case USER: 501 case LONG: 502 case DOUBLE: 503 case DATE: 504 case DATETIME: 505 case REFERENCE: 506 case RICH_TEXT: 507 case FILE: 508 case GEOCODE: 509 getLogger().warn("The field '{}' of type {} can not be a part of a path to index : {}", currentFieldName, type.toString(), StringUtils.join(pathSegments, ContentConstants.METADATA_PATH_SEPARATOR)); 510 break; 511 case COMPOSITE: 512 getLogger().warn("The type {} is invalid for the overriding field '{}'.", type.toString(), currentFieldName); 513 break; 514 case CONTENT: 515 case SUB_CONTENT: 516 String[] contentIds = (String[]) field.getValues(content); 517 for (String contentId : contentIds) 518 { 519 Content refContent = _resolver.resolveById(contentId); 520 MetadataDefinition remoteMetadataDef = _cTypesHelper.getMetadataDefinition(followingSegments[0], refContent.getTypes(), refContent.getMixinTypes()); 521 findAndIndexMetadata(refContent, followingSegments, refContent.getMetadataHolder(), remoteMetadataDef, field, fieldName, document, contentDoc, additionalDocuments); 522 } 523 break; 524 default: 525 break; 526 } 527 } 528 529 /** 530 * Index a content metadata. 531 * @param content the current content being traversed. 532 * @param metadataName The name of metadata to index 533 * @param metadata The parent composite metadata 534 * @param document the solr document to index into. 535 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 536 * @param additionalDocuments The solr additional documents used for repeater instance 537 * @param fieldName the name of the indexed field. 538 * @param definition the metadata definition. 539 */ 540 public void indexMetadata(Content content, String metadataName, CompositeMetadata metadata, SolrInputDocument document, SolrInputDocument contentDoc, List<SolrInputDocument> additionalDocuments, String fieldName, MetadataDefinition definition) 541 { 542 String language = content.getLanguage(); 543 544 switch (definition.getType()) 545 { 546 case STRING: 547 indexStringMetadata(metadata, metadataName, content.getId(), document, contentDoc, fieldName, language, definition); 548 break; 549 case MULTILINGUAL_STRING: 550 indexMultilingualStringMetadata(metadata, metadataName, content.getId(), document, contentDoc, fieldName, definition); 551 break; 552 case USER: 553 indexUserMetadata(metadata, metadataName, document, contentDoc, fieldName, language, definition); 554 break; 555 case GEOCODE: 556 indexGeoCodeMetadata(metadata, metadataName, document, fieldName, definition); 557 break; 558 case BINARY: 559 indexBinaryMetadata(metadata, metadataName, document, contentDoc, fieldName, language, definition); 560 break; 561 case FILE: 562 indexFileMetadata(metadata, metadataName, document, contentDoc, fieldName, language, definition); 563 break; 564 case DATE: 565 indexDateMetadata(metadata, metadataName, document, fieldName, definition); 566 break; 567 case DATETIME: 568 indexDateTimeMetadata(metadata, metadataName, document, fieldName, definition); 569 break; 570 case CONTENT: 571 indexContentMetadata(metadata, metadataName, document, fieldName, definition); 572 break; 573 case SUB_CONTENT: 574 indexSubContentMetadata(metadata, metadataName, document, fieldName, definition); 575 break; 576 case LONG: 577 indexLongMetadata(metadata, metadataName, document, fieldName, definition); 578 break; 579 case DOUBLE: 580 indexDoubleMetadata(metadata, metadataName, document, fieldName, definition); 581 break; 582 case BOOLEAN: 583 indexBooleanMetadata(metadata, metadataName, document, fieldName, definition); 584 break; 585 case RICH_TEXT: 586 indexRichtextMetadata(metadata, metadataName, document, contentDoc, fieldName, language, definition); 587 break; 588 case COMPOSITE: 589 if (definition instanceof RepeaterDefinition) 590 { 591 indexRepeaterMetadata(content, metadata, metadataName, document, contentDoc, fieldName, definition, additionalDocuments); 592 } 593 else 594 { 595 indexCompositeMetadata(content, metadata, metadataName, document, contentDoc, fieldName, definition, additionalDocuments); 596 } 597 break; 598 case REFERENCE: 599 indexReferenceMetadata(metadata, metadataName, content.getId(), document, contentDoc, fieldName, language, definition); 600 break; 601 default: 602 break; 603 } 604 } 605 606 /** 607 * Index a property represented by an overriding field. 608 * @param field The overriding field. 609 * @param content The content of which to get the property. 610 * @param fieldName The name of the field to index. 611 * @param document the solr document to index into. 612 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 613 * @param additionalDocuments The solr additional documents used for repeater instance 614 */ 615 public void indexOverridingField(CustomMetadataIndexingField field, Content content, String fieldName, SolrInputDocument document, SolrInputDocument contentDoc, List<SolrInputDocument> additionalDocuments) 616 { 617 Object[] values = field.getValues(content); 618 MetadataDefinition definition = field.getMetadataDefinition(); 619 boolean isFacetable = definition.getEnumerator() != null; 620 String language = content.getLanguage(); 621 622 switch (definition.getType()) 623 { 624 case STRING: 625 indexStringValues((String[]) values, content.getId(), document, contentDoc, fieldName, language, isFacetable); 626 break; 627 case MULTILINGUAL_STRING: 628 indexMultilingualStringValues((MultilingualString) values[0], content.getId(), document, contentDoc, fieldName); 629 break; 630 case USER: 631 UserIdentity[] users = new UserIdentity[values.length]; 632 for (int i = 0; i < values.length; i++) 633 { 634 users[i] = UserIdentity.stringToUserIdentity((String) values[i]); 635 } 636 indexUserValues(users, document, contentDoc, fieldName, language); 637 break; 638 case GEOCODE: 639 if (values.length > 1) 640 { 641 indexGeocodeValue((double) values[0], (double) values[1], document, fieldName); 642 } 643 break; 644 case BINARY: 645 if (values.length > 0) 646 { 647 indexFullTextBinaryValue((InputStream) values[0], document, contentDoc, fieldName, language); 648 } 649 break; 650 case FILE: 651 indexFileValue(values, document, contentDoc, fieldName, language); 652 break; 653 case DATE: 654 indexDateValues((Date[]) values, document, fieldName); 655 break; 656 case DATETIME: 657 indexDateTimeValues((Date[]) values, document, fieldName); 658 break; 659 case CONTENT: 660 indexContentValues((String[]) values, document, fieldName); 661 break; 662 case SUB_CONTENT: 663 indexContentValues((String[]) values, document, fieldName); 664 break; 665 case LONG: 666 indexLongValues((Long[]) values, document, fieldName, isFacetable); 667 break; 668 case DOUBLE: 669 indexDoubleValues((Double[]) values, document, fieldName, isFacetable); 670 break; 671 case BOOLEAN: 672 indexBooleanValues((Boolean[]) values, document, fieldName); 673 break; 674 case RICH_TEXT: 675 if (values.length > 0) 676 { 677 indexRichtextValue((RichText) values[0], document, contentDoc, fieldName, language); 678 } 679 break; 680 case COMPOSITE: 681 break; 682 case REFERENCE: 683 // References are get as Map<String, Object> with keys "type" and "value" 684 String[] referenceValues = Stream.of((Map<String, Object>[]) values) 685 // Only keep the value 686 .map(m -> m.get("value")) 687 // Transform it as String 688 .map(Object::toString) 689 // Exclude empty values 690 .filter(StringUtils::isNotBlank) 691 // Collect result in String array 692 .toArray(String[]::new); 693 694 // Index like String values 695 indexStringValues(referenceValues, content.getId(), document, contentDoc, fieldName, language, isFacetable); 696 break; 697 default: 698 break; 699 } 700 } 701 702 /** 703 * Index values 704 * @param content The content being indexed. 705 * @param fieldName The Solr field's name 706 * @param type the type of values to index 707 * @param values the values 708 * @param document the Solr document 709 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 710 */ 711 public void indexValues(Content content, String fieldName, MetadataType type, Object[] values, SolrInputDocument document, SolrInputDocument contentDoc) 712 { 713 String language = content.getLanguage(); 714 715 switch (type) 716 { 717 case STRING: 718 indexStringValues((String[]) values, content.getId(), document, contentDoc, fieldName, language, false); 719 break; 720 case MULTILINGUAL_STRING: 721 if (values.length > 0) 722 { 723 indexMultilingualStringValues((MultilingualString) values[0], content.getId(), document, contentDoc, fieldName); 724 } 725 break; 726 case LONG: 727 indexLongValues((Long[]) values, document, fieldName, false); 728 break; 729 case DOUBLE: 730 indexDoubleValues((Double[]) values, document, fieldName, false); 731 break; 732 case DATE: 733 indexDateValues((Date[]) values, document, fieldName); 734 break; 735 case DATETIME: 736 indexDateTimeValues((Date[]) values, document, fieldName); 737 break; 738 case CONTENT: 739 indexContentValues((String[]) values, document, fieldName); 740 break; 741 case BOOLEAN: 742 indexBooleanValues((Boolean[]) values, document, fieldName); 743 break; 744 case USER: 745 UserIdentity[] users = new UserIdentity[values.length]; 746 for (int i = 0; i < values.length; i++) 747 { 748 users[i] = UserIdentity.stringToUserIdentity((String) values[i]); 749 } 750 indexUserValues(users, document, contentDoc, fieldName, language); 751 break; 752 case RICH_TEXT: 753 indexRichtextValue((RichText) values[0], document, contentDoc, fieldName, language); 754 break; 755 case BINARY: 756 case FILE: 757 case COMPOSITE: 758 case REFERENCE: 759 case SUB_CONTENT: 760 case GEOCODE: 761 getLogger().warn("Only primitive type is allowed on a custom indexing field"); 762 break; 763 default: 764 break; 765 } 766 } 767 768 /** 769 * Index a 'reference' metadata 770 * @param metadata The parent composite metadata 771 * @param metadataName The name of metadata to index 772 * @param contentId The content id. For logging purposes 773 * @param document The solr document to index into 774 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 775 * @param fieldName The index field name 776 * @param language The content language. 777 * @param definition The metadata definition 778 */ 779 public void indexReferenceMetadata(CompositeMetadata metadata, String metadataName, String contentId, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, MetadataDefinition definition) 780 { 781 if (metadata.hasMetadata(metadataName)) 782 { 783 String strValue = metadata.getCompositeMetadata(metadataName).getString("value", null); 784 if (StringUtils.isNotBlank(strValue)) 785 { 786 indexStringValues(new String[]{strValue}, contentId, document, contentDoc, fieldName, language, definition.getEnumerator() != null); 787 } 788 } 789 } 790 791 /** 792 * Index a 'string' metadata 793 * @param metadata The parent composite metadata 794 * @param metadataName The name of metadata to index 795 * @param contentId The content id. For logging purposes 796 * @param document The solr document to index into 797 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 798 * @param fieldName The index field name 799 * @param language The content language. 800 * @param definition The metadata definition 801 */ 802 public void indexStringMetadata(CompositeMetadata metadata, String metadataName, String contentId, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, MetadataDefinition definition) 803 { 804 String[] strValues = metadata.getStringArray(metadataName, new String[0]); 805 indexStringValues(strValues, contentId, document, contentDoc, fieldName, language, definition.getEnumerator() != null); 806 } 807 808 /** 809 * Index a multilingual string metadata 810 * @param metadata The parent composite metadata 811 * @param metadataName The name of metadata to index 812 * @param contentId The content id. For logging purposes 813 * @param document The solr document to index into 814 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 815 * @param fieldName The index field name 816 * @param definition The metadata definition 817 */ 818 public void indexMultilingualStringMetadata(CompositeMetadata metadata, String metadataName, String contentId, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, MetadataDefinition definition) 819 { 820 MultilingualString multilingualString = metadata.getMultilingualString(metadataName); 821 indexMultilingualStringValues(multilingualString, contentId, document, contentDoc, fieldName); 822 } 823 824 /** 825 * Index a multilingual string values 826 * @param value The multilingual string 827 * @param contentId The content id. For logging purposes 828 * @param document The solr document to index into 829 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 830 * @param fieldName The index field name 831 */ 832 public void indexMultilingualStringValues(MultilingualString value, String contentId, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName) 833 { 834 Set<Locale> metaLocales = value.getLocales(); 835 List<String> appLanguages = _languagesManager.getAvailableLanguages() 836 .values() 837 .stream() 838 .map(Language::getCode) 839 .collect(Collectors.toList()); 840 for (String appLanguageCode : appLanguages) 841 { 842 Locale appLocale = new Locale(appLanguageCode); 843 if (metaLocales.contains(appLocale)) 844 { 845 String str = value.getValue(appLocale); 846 indexMultilingualStringValues(new String[] {str}, contentId, document, contentDoc, fieldName, appLocale.getLanguage()); 847 } 848 849 // Need to index sort field for every language of application, even if metadata does not have value for the given language 850 String sortValue = MultilingualStringHelper.getValue(value, appLocale); 851 indexMultilingualStringValuesForSorting(sortValue, document, fieldName, appLocale.getLanguage()); 852 } 853 } 854 855 /** 856 * Index multilingual 'string' values 857 * @param values The values 858 * @param contentId The content id. For logging purposes 859 * @param document The solr document to index into 860 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 861 * @param fieldName The index field name 862 * @param language The language for values. 863 */ 864 public void indexMultilingualStringValues(String[] values, String contentId, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language) 865 { 866 for (String value : values) 867 { 868 document.addField(fieldName + "_txt_" + language, value); 869 document.addField(fieldName + "_txt_stemmed_" + language, value); 870 document.addField(fieldName + "_txt_ws_" + language, value); 871 872 // Index without analyzing. 873 String possiblyTruncatedValue = SolrIndexer.truncateUtf8StringValue(value, getLogger(), contentId, fieldName); 874 document.addField(fieldName + "_" + language + "_s", possiblyTruncatedValue); 875 876 // Index without analyzing but lower-case (for wildcard queries). 877 document.addField(fieldName + "_" + language + "_s_lower", possiblyTruncatedValue.toLowerCase()); 878 879 // Exact words tokenized by whitespace. 880 document.addField(fieldName + "_" + language + "_s_ws", value.toLowerCase()); 881 882 // Index with analyze (full-text search). 883 document.addField(fieldName + "_" + language + "_txt", value); 884 885 indexFulltextValue(document, contentDoc, value, language); 886 } 887 } 888 889 /** 890 * Index multilingual 'string' value in sort field 891 * @param value The value 892 * @param document The solr document to index into 893 * @param fieldName The index field name 894 * @param language The language 895 */ 896 public void indexMultilingualStringValuesForSorting(String value, SolrInputDocument document, String fieldName, String language) 897 { 898 String sortField = fieldName + "_" + language + SolrFieldHelper.getSortFieldSuffix(MetadataType.MULTILINGUAL_STRING); 899 if (StringUtils.isNotEmpty(value) && !document.containsKey(sortField)) 900 { 901 document.addField(sortField, SolrFieldHelper.getSortValue(value)); 902 } 903 } 904 905 /** 906 * Index 'string' values 907 * @param values The values 908 * @param contentId The content id. For logging purposes 909 * @param document The solr document to index into 910 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 911 * @param fieldName The index field name 912 * @param language The content language. 913 * @param isFacetable true if the field can be used as a facet. 914 */ 915 public void indexStringValues(String[] values, String contentId, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, boolean isFacetable) 916 { 917 for (String value : values) 918 { 919 if (!isFacetable) 920 { 921 if (language != null) // Language can be null for multilingual content 922 { 923 // No enumerator: index as full-text. 924 document.addField(fieldName + "_txt_" + language, value); 925 document.addField(fieldName + "_txt_stemmed_" + language, value); 926 document.addField(fieldName + "_txt_ws_" + language, value); 927 } 928 } 929 else 930 { 931 // Facets (enumeration only) 932 document.addField(fieldName + "_s_dv", value); 933 } 934 935 // Index without analyzing. 936 String possiblyTruncatedValue = SolrIndexer.truncateUtf8StringValue(value, getLogger(), contentId, fieldName); 937 document.addField(fieldName + "_s", possiblyTruncatedValue); 938 939 // Index without analyzing but lower-case (for wildcard queries). 940 document.addField(fieldName + "_s_lower", possiblyTruncatedValue.toLowerCase()); 941 942 // Exact words tokenized by whitespace. 943 document.addField(fieldName + "_s_ws", value.toLowerCase()); 944 945 // Index with analyze (full-text search). 946 document.addField(fieldName + "_txt", value); 947 948 indexFulltextValue(document, contentDoc, value, language); 949 } 950 951 String sortField = SolrFieldHelper.getMetadataSortFieldName(MetadataType.STRING, fieldName); 952 if (values.length > 0 && !document.containsKey(sortField)) 953 { 954 // FIXME Si la meta est enumerée, indexer le label ? dans quelle langue ? 955 document.addField(sortField, SolrFieldHelper.getSortValue(values[0])); 956 } 957 } 958 959 /** 960 * Index a 'date' metadata 961 * @param metadata The parent composite metadata 962 * @param metadataName The name of metadata to index 963 * @param document The solr document to index into 964 * @param fieldName The index field name 965 * @param definition The metadata definition 966 */ 967 public void indexDateMetadata (CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition) 968 { 969 Date[] dateValues = metadata.getDateArray(metadataName, new Date[0]); 970 indexDateValues (dateValues, document, fieldName); 971 } 972 973 /** 974 * Index 'date' values 975 * @param values The values 976 * @param document The solr document to index into 977 * @param fieldName The index field name 978 */ 979 public void indexDateValues (Date[] values, SolrInputDocument document, String fieldName) 980 { 981 for (Date value : values) 982 { 983 document.addField(fieldName + "_dt", SolrIndexer.dateFormat().format(value)); 984 } 985 986 String sortField = fieldName + "_dt_sort"; 987 if (values.length > 0 && !document.containsKey(sortField)) 988 { 989 document.addField(sortField, SolrIndexer.dateFormat().format(values[0])); 990 } 991 } 992 993 /** 994 * Index a 'datetime' metadata 995 * @param metadata The parent composite metadata 996 * @param metadataName The name of metadata to index 997 * @param document The solr document to index into 998 * @param fieldName The index field name 999 * @param definition The metadata definition 1000 */ 1001 public void indexDateTimeMetadata (CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition) 1002 { 1003 Date[] dateValues = metadata.getDateArray(metadataName, new Date[0]); 1004 indexDateTimeValues(dateValues, document, fieldName); 1005 } 1006 1007 /** 1008 * Index 'datetime' values 1009 * @param values The values 1010 * @param document The solr document to index into 1011 * @param fieldName The index field name 1012 */ 1013 public void indexDateTimeValues (Date[] values, SolrInputDocument document, String fieldName) 1014 { 1015 for (Date value : values) 1016 { 1017 document.addField(fieldName + "_dt", SolrIndexer.dateFormat().format(value)); 1018 } 1019 1020 String sortField = fieldName + "_dt_sort"; 1021 if (values.length > 0 && !document.containsKey(sortField)) 1022 { 1023 document.addField(sortField, SolrIndexer.dateFormat().format(values[0])); 1024 } 1025 } 1026 1027 /** 1028 * Index a 'double' metadata 1029 * @param metadata The parent composite metadata 1030 * @param metadataName The name of metadata to index 1031 * @param document The solr document to index into 1032 * @param fieldName The index field name 1033 * @param definition The metadata definition 1034 */ 1035 public void indexDoubleMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition) 1036 { 1037 boolean isFacetable = definition.getEnumerator() != null; 1038 double[] values = metadata.getDoubleArray(metadataName, new double[0]); 1039 indexDoubleValues (ArrayUtils.toObject(values), document, fieldName, isFacetable); 1040 } 1041 1042 /** 1043 * Index 'double' values 1044 * @param values The values 1045 * @param document The solr document to index into 1046 * @param fieldName The index field name 1047 * @param isFacetable true if the field can be used as a facet. 1048 */ 1049 public void indexDoubleValues(Double[] values, SolrInputDocument document, String fieldName, boolean isFacetable) 1050 { 1051 for (Double value : values) 1052 { 1053 document.addField(fieldName + "_d", value); 1054 if (isFacetable) 1055 { 1056 document.addField(fieldName + "_d_dv", value); 1057 } 1058 } 1059 1060 String sortField = fieldName + "_d_sort"; 1061 if (values.length > 0 && !document.containsKey(sortField)) 1062 { 1063 document.addField(sortField, values[0]); 1064 } 1065 } 1066 1067 /** 1068 * Index a 'long' metadata 1069 * @param metadata The parent composite metadata 1070 * @param metadataName The name of metadata to index 1071 * @param document The solr document to index into 1072 * @param fieldName The index field name 1073 * @param definition The metadata definition 1074 */ 1075 public void indexLongMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition) 1076 { 1077 boolean isFacetable = definition.getEnumerator() != null; 1078 long[] values = metadata.getLongArray(metadataName, new long[0]); 1079 indexLongValues(ArrayUtils.toObject(values), document, fieldName, isFacetable); 1080 } 1081 1082 /** 1083 * Index 'long' values 1084 * @param values The values 1085 * @param document The solr document to index into 1086 * @param fieldName The index field name 1087 * @param isFacetable true if the field can be used as a facet. 1088 */ 1089 public void indexLongValues(Long[] values, SolrInputDocument document, String fieldName, boolean isFacetable) 1090 { 1091 for (Long value : values) 1092 { 1093 document.addField(fieldName + "_l", value); 1094 if (isFacetable) 1095 { 1096 document.addField(fieldName + "_l_dv", value); 1097 } 1098 } 1099 1100 String sortField = fieldName + "_l_sort"; 1101 if (values.length > 0 && !document.containsKey(sortField)) 1102 { 1103 document.addField(sortField, values[0]); 1104 } 1105 } 1106 1107 /** 1108 * Index a 'user' metadata 1109 * @param metadata The parent composite metadata 1110 * @param metadataName The name of metadata to index 1111 * @param document The solr document to index into 1112 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1113 * @param fieldName The index field name 1114 * @param language The content language. 1115 * @param definition The metadata definition 1116 */ 1117 public void indexUserMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, MetadataDefinition definition) 1118 { 1119 UserIdentity[] users = metadata.getUserArray(metadataName); 1120 indexUserValues(users, document, contentDoc, fieldName, language); 1121 } 1122 1123 /** 1124 * Index 'user' values 1125 * @param users The users 1126 * @param document The solr document to index into 1127 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1128 * @param fieldName The index field name 1129 * @param language The content language. 1130 */ 1131 public void indexUserValues(UserIdentity[] users, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language) 1132 { 1133 int count = 0; 1134 for (UserIdentity userIdentity : users) 1135 { 1136 String fullName = _userHelper.getUserFullName(userIdentity); 1137 String sortableName = _userHelper.getUserSortableName(userIdentity); 1138 String identityAsString = UserIdentity.userIdentityToString(userIdentity); 1139 1140 indexFulltextValue(document, contentDoc, identityAsString, language); 1141 1142 // Facets 1143 document.addField(fieldName + "_s_dv", identityAsString); 1144 1145 // Dynamic fields 1146 document.addField(fieldName + "_s", identityAsString); 1147 1148 if (StringUtils.isNotEmpty(fullName)) 1149 { 1150 document.addField(fieldName + "_txt", fullName); 1151 1152 indexFulltextValue(document, contentDoc, fullName, language); 1153 } 1154 1155 String sortField = SolrFieldHelper.getMetadataSortFieldName(MetadataType.STRING, fieldName); 1156 if (count == 0 && StringUtils.isNotEmpty(sortableName) && !document.containsKey(sortField)) 1157 { 1158 // Index only first user for sorting 1159 document.addField(sortField, SolrFieldHelper.getSortValue(sortableName)); 1160 } 1161 count++; 1162 } 1163 } 1164 1165 /** 1166 * Index a 'boolean' metadata 1167 * @param metadata The parent composite metadata 1168 * @param metadataName The name of metadata to index 1169 * @param document The solr document to index into 1170 * @param fieldName The index field name 1171 * @param definition The metadata definition 1172 */ 1173 public void indexBooleanMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition) 1174 { 1175 boolean[] values = metadata.getBooleanArray(metadataName, new boolean[0]); 1176 indexBooleanValues(ArrayUtils.toObject(values), document, fieldName); 1177 } 1178 1179 /** 1180 * Index 'boolean' values 1181 * @param values The values 1182 * @param document The solr document to index into 1183 * @param fieldName The index field name 1184 */ 1185 public void indexBooleanValues(Boolean[] values, SolrInputDocument document, String fieldName) 1186 { 1187 for (Boolean value : values) 1188 { 1189 document.addField(fieldName + "_b", value); 1190 document.addField(fieldName + "_b_dv", value); 1191 } 1192 1193 String sortField = fieldName + "_b_sort"; 1194 if (values.length > 0 && !document.containsKey(sortField)) 1195 { 1196 document.addField(sortField, values[0]); 1197 } 1198 } 1199 1200 /** 1201 * Index a 'richtext' metadata 1202 * @param metadata The parent composite metadata 1203 * @param metadataName The name of metadata to index 1204 * @param document The solr document to index into 1205 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1206 * @param fieldName The index field name 1207 * @param language The content language. 1208 * @param definition The metadata definition 1209 */ 1210 public void indexRichtextMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, MetadataDefinition definition) 1211 { 1212 indexRichtextValue(metadata.getRichText(metadataName), document, contentDoc, fieldName, language); 1213 } 1214 1215 /** 1216 * Index 'richtext' values 1217 * @param richText The rich text to index. 1218 * @param document The solr document to index into 1219 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1220 * @param fieldName The index field name. 1221 * @param language The content language. 1222 */ 1223 public void indexRichtextValue(RichText richText, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language) 1224 { 1225 try (InputStream is = richText.getInputStream()) 1226 { 1227 String value = _richTextToString(is); 1228 1229 if (language != null) // language can be null for multilingual content 1230 { 1231 // Index as a text field. 1232 document.addField(fieldName + "_txt_" + language, value); 1233 document.addField(fieldName + "_txt_stemmed_" + language, value); 1234 document.addField(fieldName + "_txt_ws_" + language, value); 1235 } 1236 1237 // Index in the full-text value. 1238 SolrContentIndexer.indexFulltextValue(document, value, language); 1239 1240 if (contentDoc != null) 1241 { 1242 SolrContentIndexer.indexFulltextValue(contentDoc, value, language); 1243 } 1244 } 1245 catch (Exception e) 1246 { 1247 getLogger().warn("Failed to index RICH_TEXT '" + fieldName + "'", e); 1248 } 1249 } 1250 1251 /** 1252 * Gets a XML as a string and extract the text only 1253 * @param is The inputstream of XML 1254 * @return The text or null if the XML is not well formed 1255 */ 1256 protected String _richTextToString(InputStream is) 1257 { 1258 SAXParser saxParser = null; 1259 try 1260 { 1261 RichTextHandler txtHandler = new RichTextHandler(); 1262 saxParser = (SAXParser) _manager.lookup(SAXParser.ROLE); 1263 saxParser.parse(new InputSource(is), txtHandler); 1264 return txtHandler.getValue().trim(); 1265 } 1266 catch (ServiceException e) 1267 { 1268 getLogger().error("Unable to get a SAX parser", e); 1269 return null; 1270 } 1271 catch (IOException | SAXException e) 1272 { 1273 getLogger().error("Cannot parse inputstream", e); 1274 return null; 1275 } 1276 finally 1277 { 1278 _manager.release(saxParser); 1279 } 1280 } 1281 1282 1283 1284 /** 1285 * Index a 'binary' metadata 1286 * @param metadata The parent composite metadata 1287 * @param metadataName The name of metadata to index 1288 * @param document The solr document to index into 1289 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1290 * @param fieldName The index field name 1291 * @param language The content language. 1292 * @param definition The metadata definition 1293 */ 1294 public void indexBinaryMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, MetadataDefinition definition) 1295 { 1296 // Index file name. 1297 BinaryMetadata binary = metadata.getBinaryMetadata(metadataName); 1298 document.addField(fieldName + "_txt", binary.getFilename()); 1299 1300 // Index the contents. 1301 indexFullTextBinary(metadata, metadataName, document, contentDoc, fieldName, language, definition); 1302 } 1303 1304 /** 1305 * Index a 'file' metadata 1306 * @param metadata The parent composite metadata 1307 * @param metadataName The name of metadata to index 1308 * @param document The solr document to index into 1309 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1310 * @param fieldName The index field name 1311 * @param language The content language. 1312 * @param definition The metadata definition 1313 */ 1314 public void indexFileMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, MetadataDefinition definition) 1315 { 1316 if (org.ametys.plugins.repository.metadata.CompositeMetadata.MetadataType.BINARY.equals(metadata.getType(metadataName))) 1317 { 1318 indexBinaryMetadata(metadata, metadataName, document, contentDoc, fieldName, language, definition); 1319 } 1320 else 1321 { 1322 // Resource from the explorer. 1323 String value = metadata.getString(metadataName); 1324 1325 try 1326 { 1327 Resource resource = (Resource) _resolver.resolveById(value); 1328 1329 // Index file name. 1330 document.addField(fieldName + "_txt", resource.getName()); 1331 1332 // Index the contents. 1333 indexResourceContent(resource, document, contentDoc, language); 1334 1335// document.addField(prefix + fieldName + "$path", resource.getId()); 1336// document.addField(prefix + fieldName + "$type", "explorer"); 1337// document.addField(prefix + fieldName + "$mime-type", resource.getMimeType()); 1338// document.addField(prefix + fieldName + "$filename", filename); 1339// document.addField(prefix + fieldName + "$lastModified", resource.getLastModified()); 1340// document.addField(prefix + fieldName + "$size", resource.getLength()); 1341// 1342// String viewUrl = "/plugins/explorer/resource?id=" + resource.getId(); 1343// document.addField(prefix + fieldName + "$viewUrl", viewUrl); 1344// document.addField(prefix + fieldName + "$downloadUrl", viewUrl + "&download=true"); 1345 } 1346 catch (AmetysRepositoryException e) 1347 { 1348 getLogger().warn(String.format("Unable to index the resource of id '%s' : resource does not exist.", value), e); 1349 } 1350 } 1351 } 1352 1353 /** 1354 * Index a 'file' metadata 1355 * @param values The values. 1356 * @param document The solr document to index into 1357 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1358 * @param fieldName The index field name 1359 * @param language The content language. 1360 */ 1361 public void indexFileValue(Object[] values, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language) 1362 { 1363 String type = (String) values[0]; 1364 if (StringUtils.equalsIgnoreCase(org.ametys.plugins.repository.metadata.CompositeMetadata.MetadataType.BINARY.name(), type)) 1365 { 1366 indexFullTextBinaryValue((InputStream) values[1], document, contentDoc, fieldName, language); 1367 } 1368 else 1369 { 1370 indexResourceContent((Resource) values[1], document, contentDoc, language); 1371 } 1372 } 1373 1374 /** 1375 * Index a 'binary' metadata 1376 * @param metadata The parent composite metadata 1377 * @param metadataName The name of metadata to index 1378 * @param document The solr document to index into 1379 * @param contentDoc The content document. 1380 * @param fieldName The index field name 1381 * @param language The content language. 1382 * @param definition The metadata definition 1383 */ 1384 protected void indexFullTextBinary(CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, MetadataDefinition definition) 1385 { 1386 try (InputStream is = metadata.getBinaryMetadata(metadataName).getInputStream()) 1387 { 1388 indexFullTextBinaryValue(is, document, contentDoc, fieldName, language); 1389 } 1390 catch (IOException e) 1391 { 1392 throw new RuntimeException(e); 1393 } 1394 } 1395 1396 /** 1397 * Index a 'binary' value 1398 * @param is An InputStream on the binary data. 1399 * @param document The solr document to index into 1400 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1401 * @param fieldName The index field name 1402 * @param language The content language. 1403 */ 1404 protected void indexFullTextBinaryValue(InputStream is, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language) 1405 { 1406 try 1407 { 1408 String text = _tika.parseToString(is); 1409 1410 indexFulltextValue(document, contentDoc, text, language); 1411 } 1412 catch (Throwable e) 1413 { 1414 getLogger().warn(String.format("Failed to index binary field '%s'", fieldName), e); 1415 } 1416 } 1417 1418 /** 1419 * Index a 'content' metadata 1420 * @param metadata The parent composite metadata 1421 * @param metadataName The name of metadata to index 1422 * @param document The solr document to index into 1423 * @param fieldName The index field name 1424 * @param definition The metadata definition 1425 */ 1426 public void indexContentMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition) 1427 { 1428 String[] contentIds = metadata.getStringArray(metadataName, new String[0]); 1429 indexContentValues(contentIds, document, fieldName); 1430 } 1431 1432 /** 1433 * Index content values. 1434 * @param contentIds The ID of the contents to index. 1435 * @param document The solr document to index into. 1436 * @param fieldName the field name. 1437 */ 1438 public void indexContentValues (String[] contentIds, SolrInputDocument document, String fieldName) 1439 { 1440 for (String contentId : contentIds) 1441 { 1442 document.addField(fieldName + "_s", contentId); 1443 // Facets 1444 document.addField(fieldName + "_s_dv", contentId); 1445 } 1446 1447 String sortField = SolrFieldHelper.getMetadataSortFieldName(MetadataType.STRING, fieldName); 1448 if (contentIds.length > 0) 1449 { 1450 try 1451 { 1452 // TODO Est-ce qu'on peut faire autrement qu'un resolve ? 1453 Content content = _resolver.resolveById(contentIds[0]); 1454 CompositeMetadata metadataHolder = content.getMetadataHolder(); 1455 if (org.ametys.plugins.repository.metadata.CompositeMetadata.MetadataType.MULTILINGUAL_STRING.equals(metadataHolder.getType(Content.ATTRIBUTE_TITLE))) 1456 { 1457 MultilingualString value = metadataHolder.getMultilingualString(Content.ATTRIBUTE_TITLE); 1458 for (Locale locale : value.getLocales()) 1459 { 1460 String langSortField = sortField + "_" + locale.getLanguage(); 1461 if (!document.containsKey(langSortField)) 1462 { 1463 String str = value.getValue(locale); 1464 document.addField(langSortField, SolrFieldHelper.getSortValue(str)); 1465 } 1466 } 1467 } 1468 else if (!document.containsKey(sortField)) 1469 { 1470 document.addField(sortField, SolrFieldHelper.getSortValue(_contentHelper.getTitle(content))); 1471 } 1472 } 1473 catch (AmetysRepositoryException e) 1474 { 1475 // Do not index 1476 } 1477 } 1478 } 1479 1480 /** 1481 * Index a 'sub_content' metadata 1482 * @param metadata The parent composite metadata 1483 * @param metadataName The name of metadata to index 1484 * @param document The solr document to index into 1485 * @param fieldName The index field name 1486 * @param definition The metadata definition 1487 */ 1488 public void indexSubContentMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition) 1489 { 1490 TraversableAmetysObject objectCollection = metadata.getObjectCollection(metadataName); 1491 AmetysObjectIterable<Content> subcontents = objectCollection.getChildren(); 1492 for (Content subcontent : subcontents) 1493 { 1494 document.addField(fieldName + "_s", subcontent.getId()); 1495 // Facets 1496 document.addField(fieldName + "_s_dv", subcontent.getId()); 1497 } 1498 1499// String sortField = fieldName + "_s_sort"; 1500 String sortField = SolrFieldHelper.getMetadataSortFieldName(MetadataType.STRING, fieldName); 1501 subcontents = objectCollection.getChildren(); 1502 Iterator<Content> it = subcontents.iterator(); 1503 1504 if (it.hasNext() && !document.containsKey(sortField)) 1505 { 1506 Content subcontent = it.next(); 1507 CompositeMetadata metadataHolder = subcontent.getMetadataHolder(); 1508 if (org.ametys.plugins.repository.metadata.CompositeMetadata.MetadataType.MULTILINGUAL_STRING.equals(metadataHolder.getType(Content.ATTRIBUTE_TITLE))) 1509 { 1510 MultilingualString value = metadataHolder.getMultilingualString(Content.ATTRIBUTE_TITLE); 1511 for (Locale locale : value.getLocales()) 1512 { 1513 String str = value.getValue(locale); 1514 document.addField(sortField + "_" + locale.getLanguage(), SolrFieldHelper.getSortValue(str)); 1515 } 1516 } 1517 else 1518 { 1519 document.addField(sortField, SolrFieldHelper.getSortValue(_contentHelper.getTitle(subcontent))); 1520 } 1521 } 1522 } 1523 1524 /** 1525 * Index a 'geocode' metadata 1526 * @param metadata The parent composite metadata 1527 * @param metadataName The name of metadata to index 1528 * @param document The solr document to index into 1529 * @param fieldName The index field name 1530 * @param definition The metadata definition 1531 */ 1532 public void indexGeoCodeMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition) 1533 { 1534 CompositeMetadata geoCodeMetadata = metadata.getCompositeMetadata(metadataName); 1535 if (geoCodeMetadata.hasMetadata("longitude") && geoCodeMetadata.hasMetadata("latitude")) 1536 { 1537 double longitude = geoCodeMetadata.getDouble("longitude"); 1538 double latitude = geoCodeMetadata.getDouble("latitude"); 1539 1540 indexGeocodeValue(latitude, longitude, document, fieldName); 1541 } 1542 } 1543 1544 /** 1545 * Index a 'geocode' metadata 1546 * @param latitude the coord latitude. 1547 * @param longitude the coord longitude. 1548 * @param document The solr document to index into 1549 * @param fieldName The index field name 1550 */ 1551 public void indexGeocodeValue(double latitude, double longitude, SolrInputDocument document, String fieldName) 1552 { 1553 document.addField(fieldName + "$latitude_d", latitude); 1554 document.addField(fieldName + "$longitude_d", longitude); 1555 1556 String geoFieldName = SolrFieldHelper.getIndexingFieldName(MetadataType.GEOCODE, fieldName); 1557 document.addField(geoFieldName, latitude + "," + longitude); 1558 } 1559 1560 /** 1561 * Index a composite metadata, i.e. browse and index the sub-metadatas. 1562 * @param content The content being indexed. 1563 * @param metadata The parent metadata. 1564 * @param metadataName The composite metadata name. 1565 * @param document The solr document to index into. 1566 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1567 * @param fieldName The field name. 1568 * @param definition The composite metadata definition. 1569 * @param additionalDocuments The solr additional documents used for repeater instance 1570 */ 1571 public void indexCompositeMetadata(Content content, CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, MetadataDefinition definition, List<SolrInputDocument> additionalDocuments) 1572 { 1573 CompositeMetadata compositeMetadata = metadata.getCompositeMetadata(metadataName); 1574 1575 // Index recursively 1576 Set<String> subMetadataNames = definition.getMetadataNames(); 1577 for (String subMetadataName : subMetadataNames) 1578 { 1579 if (compositeMetadata.hasMetadata(subMetadataName)) 1580 { 1581 indexMetadata(content, subMetadataName, compositeMetadata, document, contentDoc, additionalDocuments, fieldName + ContentConstants.METADATA_PATH_SEPARATOR + subMetadataName, definition.getMetadataDefinition(subMetadataName)); 1582 } 1583 } 1584 } 1585 1586 /** 1587 * Index a repeater metadata, i.e. browse and index the entries. 1588 * @param content The content being indexed. 1589 * @param metadata The parent metadata. 1590 * @param metadataName The repeater metadata name. 1591 * @param document The solr document to index into. 1592 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1593 * @param fieldName The field name. 1594 * @param definition The repeater metadata definition. 1595 * @param additionalDocuments The solr additional documents used for repeater instance 1596 */ 1597 public void indexRepeaterMetadata(Content content, CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, MetadataDefinition definition, List<SolrInputDocument> additionalDocuments) 1598 { 1599 CompositeMetadata compositeMetadata = metadata.getCompositeMetadata(metadataName); 1600 1601 // Get and sort the entry names. 1602 String[] entries = compositeMetadata.getMetadataNames(); 1603 Arrays.sort(entries, MetadataManager.REPEATER_ENTRY_COMPARATOR); 1604 1605 for (int i = 0; i < entries.length; i++) 1606 { 1607 String entryName = entries[i]; 1608 int position = i + 1; 1609 1610 CompositeMetadata entry = compositeMetadata.getCompositeMetadata(entryName); 1611 1612 String repeaterID = document.getField("id").getFirstValue().toString() + "/" + fieldName + "/" + entryName; 1613 1614 // Creates a new Solr document for each entry 1615 SolrInputDocument repDocument = new SolrInputDocument(); 1616 repDocument.addField("id", repeaterID); 1617 document.addField(fieldName + "_s_dv", repeaterID); 1618 1619 repDocument.addField(DOCUMENT_TYPE, TYPE_REPEATER); 1620 repDocument.addField(REPEATER_ENTRY_POSITION, position); 1621 // Add the created document to additional documents 1622 additionalDocuments.add(repDocument); 1623 1624 SolrInputDocument parentContentDoc = contentDoc != null ? contentDoc : document; 1625 1626 Set<String> subMetadataNames = definition.getMetadataNames(); 1627 for (String subMetadataName : subMetadataNames) 1628 { 1629 if (entry.hasMetadata(subMetadataName)) 1630 { 1631 // Created document is now the main document 1632 indexMetadata(content, subMetadataName, entry, repDocument, parentContentDoc, additionalDocuments, subMetadataName, definition.getMetadataDefinition(subMetadataName)); 1633 } 1634 } 1635 } 1636 } 1637 1638 /** 1639 * Index the content of a resource. 1640 * @param resource The resource 1641 * @param document The solr document to index into 1642 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1643 * @param language The content language. 1644 */ 1645 protected void indexResourceContent(Resource resource, SolrInputDocument document, SolrInputDocument contentDoc, String language) 1646 { 1647 try (InputStream is = resource.getInputStream()) 1648 { 1649 indexResourceContentValue(is, resource.getDCSubject(), resource.getDCDescription(), language, document, contentDoc); 1650 1651 // TODO Declare and index DC metadata? 1652 // DC meta 1653// _resourceIndexer.indexDublinCoreMetadata(resource, document); 1654 } 1655 catch (Exception e) 1656 { 1657 getLogger().error("Unable to index resource at " + resource.getPath(), e); 1658 } 1659 } 1660 1661 /** 1662 * Index the content of a resource. 1663 * @param is An input stream on the resource content. 1664 * @param keywords The resource keywords. 1665 * @param description The resource description. 1666 * @param language The content language. 1667 * @param document The solr document to index into 1668 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1669 * @throws TikaException If an error occurs extracting the document's text content. 1670 * @throws IOException If an error occurs reading the document's text content. 1671 */ 1672 protected void indexResourceContentValue(InputStream is, String[] keywords, String description, String language, SolrInputDocument document, SolrInputDocument contentDoc) throws IOException, TikaException 1673 { 1674 String value = _tika.parseToString(is); 1675 1676 indexFulltextValue(document, contentDoc, value, language); 1677 1678 for (String keyword : keywords) 1679 { 1680 indexFulltextValue(document, contentDoc, keyword, language); 1681 } 1682 1683 if (description != null) 1684 { 1685 indexFulltextValue(document, contentDoc, description, language); 1686 } 1687 } 1688 1689 /** 1690 * Index a full-text value. 1691 * @param mainDocument The document being used, can be either the content document itself or a repeater document. 1692 * @param contentDoc The parent content document. If the mainDocument is the content document, this will be null. 1693 * @param text The text to index. 1694 * @param language The content language. 1695 */ 1696 protected void indexFulltextValue(SolrInputDocument mainDocument, SolrInputDocument contentDoc, String text, String language) 1697 { 1698 indexFulltextValue(mainDocument, text, language); 1699 1700 // The content doc is null if the main document is the content doc (to prevent indexing the data twice). 1701 if (contentDoc != null) 1702 { 1703 indexFulltextValue(contentDoc, text, language); 1704 } 1705 } 1706 1707 /** 1708 * Index a full-text value. 1709 * @param document The document to index into. 1710 * @param text The text to index. 1711 * @param language The content language. 1712 */ 1713 public static void indexFulltextValue(SolrInputDocument document, String text, String language) 1714 { 1715 if (StringUtils.isNotBlank(text)) 1716 { 1717 document.addField(FULL_GENERAL, text); 1718 document.addField(FULL_EXACT_WS, text); 1719 1720 if (StringUtils.isNotEmpty(language)) 1721 { 1722 indexLanguageFulltextValue(document, text, language); 1723 } 1724 } 1725 } 1726 1727 /** 1728 * Index a full-text value. 1729 * @param document The document to index into. 1730 * @param text The text to index. 1731 * @param languages The languages. 1732 */ 1733 public static void indexFulltextValue(SolrInputDocument document, String text, Collection<String> languages) 1734 { 1735 if (StringUtils.isNotBlank(text)) 1736 { 1737 document.addField(FULL_GENERAL, text); 1738 document.addField(FULL_EXACT_WS, text); 1739 1740 for (String language : languages) 1741 { 1742 indexLanguageFulltextValue(document, text, language); 1743 } 1744 } 1745 } 1746 1747 /** 1748 * Index a full-text value in the language-specific fields. 1749 * @param document The document to index into. 1750 * @param text The text to index. 1751 * @param language The content language. 1752 */ 1753 protected static void indexLanguageFulltextValue(SolrInputDocument document, String text, String language) 1754 { 1755 document.addField(FULL_PREFIX + language, text); 1756 document.addField(FULL_STEMMED_PREFIX + language, text); 1757 } 1758}