001/* 002 * Copyright 2015 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.cms.content.indexing.solr; 017 018import java.io.IOException; 019import java.io.InputStream; 020import java.util.Arrays; 021import java.util.Collection; 022import java.util.Date; 023import java.util.HashMap; 024import java.util.Iterator; 025import java.util.List; 026import java.util.Locale; 027import java.util.Map; 028import java.util.Map.Entry; 029import java.util.Optional; 030import java.util.Set; 031import java.util.stream.Collectors; 032import java.util.stream.Stream; 033 034import org.apache.avalon.framework.component.Component; 035import org.apache.avalon.framework.service.ServiceException; 036import org.apache.avalon.framework.service.ServiceManager; 037import org.apache.avalon.framework.service.Serviceable; 038import org.apache.commons.lang3.ArrayUtils; 039import org.apache.commons.lang3.StringUtils; 040import org.apache.excalibur.xml.sax.SAXParser; 041import org.apache.solr.common.SolrInputDocument; 042import org.apache.tika.Tika; 043import org.apache.tika.exception.TikaException; 044import org.xml.sax.InputSource; 045import org.xml.sax.SAXException; 046 047import org.ametys.cms.content.ContentHelper; 048import org.ametys.cms.content.RichTextHandler; 049import org.ametys.cms.content.indexing.solr.content.attachment.ContentVisibleAttachmentIndexerExtensionPoint; 050import org.ametys.cms.content.references.OutgoingReferences; 051import org.ametys.cms.content.references.OutgoingReferencesExtractor; 052import org.ametys.cms.contenttype.ContentConstants; 053import org.ametys.cms.contenttype.ContentType; 054import org.ametys.cms.contenttype.ContentTypeExtensionPoint; 055import org.ametys.cms.contenttype.ContentTypesHelper; 056import org.ametys.cms.contenttype.MetadataDefinition; 057import org.ametys.cms.contenttype.MetadataManager; 058import org.ametys.cms.contenttype.MetadataType; 059import org.ametys.cms.contenttype.RepeaterDefinition; 060import org.ametys.cms.contenttype.indexing.CustomIndexingField; 061import org.ametys.cms.contenttype.indexing.CustomMetadataIndexingField; 062import org.ametys.cms.contenttype.indexing.IndexingField; 063import org.ametys.cms.contenttype.indexing.IndexingModel; 064import org.ametys.cms.contenttype.indexing.MetadataIndexingField; 065import org.ametys.cms.languages.Language; 066import org.ametys.cms.languages.LanguagesManager; 067import org.ametys.cms.repository.Content; 068import org.ametys.cms.repository.DefaultContent; 069import org.ametys.cms.search.model.SystemProperty; 070import org.ametys.cms.search.model.SystemPropertyExtensionPoint; 071import org.ametys.core.user.UserIdentity; 072import org.ametys.plugins.core.user.UserHelper; 073import org.ametys.plugins.explorer.resources.Resource; 074import org.ametys.plugins.explorer.resources.metadata.TikaProvider; 075import org.ametys.plugins.repository.AmetysObject; 076import org.ametys.plugins.repository.AmetysObjectIterable; 077import org.ametys.plugins.repository.AmetysObjectResolver; 078import org.ametys.plugins.repository.AmetysRepositoryException; 079import org.ametys.plugins.repository.TraversableAmetysObject; 080import org.ametys.plugins.repository.UnknownAmetysObjectException; 081import org.ametys.plugins.repository.metadata.BinaryMetadata; 082import org.ametys.plugins.repository.metadata.CompositeMetadata; 083import org.ametys.plugins.repository.metadata.MultilingualString; 084import org.ametys.plugins.repository.metadata.MultilingualStringHelper; 085import org.ametys.plugins.repository.metadata.RichText; 086import org.ametys.runtime.plugin.component.AbstractLogEnabled; 087 088/** 089 * Component for {@link Content} indexing into a Solr server. 090 */ 091public class SolrContentIndexer extends AbstractLogEnabled implements Component, Serviceable, SolrFieldNames 092{ 093 /** The component role. */ 094 public static final String ROLE = SolrContentIndexer.class.getName(); 095 096 /** The Ametys objet resolver */ 097 protected AmetysObjectResolver _resolver; 098 /** The content type extension point */ 099 protected ContentTypeExtensionPoint _cTypeEP; 100 /** The content type helper */ 101 protected ContentTypesHelper _cTypesHelper; 102 /** The users manager */ 103 protected UserHelper _userHelper; 104 /** The Tika instance */ 105 protected Tika _tika; 106 /** The resource indexer */ 107 protected SolrResourceIndexer _resourceIndexer; 108 /** The system property extension point. */ 109 protected SystemPropertyExtensionPoint _systemPropEP; 110 /** The content helper */ 111 protected ContentHelper _contentHelper; 112 /** The outgoing references extractor */ 113 protected OutgoingReferencesExtractor _outgoingReferencesExtractor; 114 /** The extension point for ContentVisibleAttachmentIndexers */ 115 protected ContentVisibleAttachmentIndexerExtensionPoint _contentVisibleAttachmentIndexerEP; 116 /** The manager for languages */ 117 protected LanguagesManager _languagesManager; 118 /** Avalon service manager */ 119 protected ServiceManager _manager; 120 121 @Override 122 public void service(ServiceManager manager) throws ServiceException 123 { 124 _manager = manager; 125 _resolver = (AmetysObjectResolver) manager.lookup(AmetysObjectResolver.ROLE); 126 _resourceIndexer = (SolrResourceIndexer) manager.lookup(SolrResourceIndexer.ROLE); 127 _cTypeEP = (ContentTypeExtensionPoint) manager.lookup(ContentTypeExtensionPoint.ROLE); 128 _contentHelper = (ContentHelper) manager.lookup(ContentHelper.ROLE); 129 _cTypesHelper = (ContentTypesHelper) manager.lookup(ContentTypesHelper.ROLE); 130 _userHelper = (UserHelper) manager.lookup(UserHelper.ROLE); 131 TikaProvider tikaProvider = (TikaProvider) manager.lookup(TikaProvider.ROLE); 132 _tika = tikaProvider.getTika(); 133 _systemPropEP = (SystemPropertyExtensionPoint) manager.lookup(SystemPropertyExtensionPoint.ROLE); 134 _outgoingReferencesExtractor = (OutgoingReferencesExtractor) manager.lookup(OutgoingReferencesExtractor.ROLE); 135 _contentVisibleAttachmentIndexerEP = (ContentVisibleAttachmentIndexerExtensionPoint) manager.lookup(ContentVisibleAttachmentIndexerExtensionPoint.ROLE); 136 _languagesManager = (LanguagesManager) manager.lookup(LanguagesManager.ROLE); 137 } 138 139 /** 140 * Populate a solr input document by adding fields to index into it. 141 * @param content The content to index 142 * @param document The main solr document to index into 143 * @param additionalDocuments The additional documents for repeater instances 144 * @throws Exception if an error occurred while indexing 145 */ 146 public void indexContent(Content content, SolrInputDocument document, List<SolrInputDocument> additionalDocuments) throws Exception 147 { 148 // Properties specific to a stand-alone indexation. 149 String contentId = content.getId(); 150 document.addField(ID, contentId); 151 document.addField(DOCUMENT_TYPE, TYPE_CONTENT); 152 153 indexContentTitle(content, document); 154 155 document.addField(CONTENT_NAME, SolrIndexer.truncateUtf8StringValue(content.getName(), getLogger(), contentId, CONTENT_NAME)); 156 _indexOutgoingReferences(content, document); 157 _indexVisibleAttachments(content, document); 158 159 document.addField(WORKFLOW_REF_DV, contentId + "#workflow"); 160 161 // Index content system properties. 162 indexSystemProperties(content, document); 163 164 // Index the fields specified in the indexation model. 165 indexModelFields(content, document, additionalDocuments); 166 } 167 168 private void _indexOutgoingReferences(Content content, SolrInputDocument document) 169 { 170 // Found by the extractor (resource references found in all data of the content) 171 _outgoingReferencesExtractor.getOutgoingReferences(content).values() // key is the data path, we do not care what data it comes from 172 .parallelStream() 173 .map(OutgoingReferences::entrySet) 174 .flatMap(Set::parallelStream) 175 .filter(outgoingRefs -> outgoingRefs.getKey().equals("explorer")) // only references of the resource explorer 176 .map(Entry::getValue) 177 .flatMap(List::parallelStream) // flat the resource ids 178 .forEach(resourceId -> document.addField(CONTENT_OUTGOING_REFEERENCES_RESOURCE_IDS, resourceId)); 179 180 // Attachments of the content (just the root folder) 181 Optional.ofNullable(content.getRootAttachments()) 182 .map(AmetysObject::getId) 183 .ifPresent(id -> document.addField(CONTENT_OUTGOING_REFEERENCES_RESOURCE_IDS, id)); 184 } 185 186 private void _indexVisibleAttachments(Content content, SolrInputDocument document) 187 { 188 Collection<String> values = _contentVisibleAttachmentIndexerEP.getExtensionsIds() 189 .stream() 190 .map(_contentVisibleAttachmentIndexerEP::getExtension) 191 .map(attachmentIndexer -> attachmentIndexer.getVisibleAttachmentIds(content)) 192 .flatMap(Collection::stream) 193 .collect(Collectors.toList()); 194 document.addField(CONTENT_VISIBLE_ATTACHMENT_RESOURCE_IDS, values); 195 } 196 197 /** 198 * Index the content title 199 * @param content The title 200 * @param document The main solr document to index into 201 */ 202 protected void indexContentTitle(Content content, SolrInputDocument document) 203 { 204 if (content.getMetadataHolder().getType("title") == org.ametys.plugins.repository.metadata.CompositeMetadata.MetadataType.MULTILINGUAL_STRING) 205 { 206 MultilingualString value = content.getMetadataHolder().getMultilingualString(DefaultContent.METADATA_TITLE); 207 indexMultilingualStringValues(value, content.getId(), document, null, TITLE); 208 } 209 else 210 { 211 String title = _contentHelper.getTitle(content); 212 document.addField(TITLE, SolrIndexer.truncateUtf8StringValue(title, getLogger(), content.getId(), TITLE)); 213 document.addField(TITLE_SORT, title); 214 } 215 } 216 217 /** 218 * Index the system properties of a content. 219 * @param content The content to index. 220 * @param document The solr document to index into. 221 */ 222 protected void indexSystemProperties(Content content, SolrInputDocument document) 223 { 224 for (String sysPropId : _systemPropEP.getExtensionsIds()) 225 { 226 SystemProperty sysProp = _systemPropEP.getExtension(sysPropId); 227 228 sysProp.index(content, document); 229 } 230 } 231 232 /** 233 * Index the content type and all its supertypes in the given document (recursively). 234 * @param cTypeId The ID of the content type to index. 235 * @param document The solr document to index into. 236 * @param fieldName The field name. 237 */ 238 protected void indexAllContentTypes(String cTypeId, SolrInputDocument document, String fieldName) 239 { 240 document.addField(fieldName, cTypeId); 241 242 if (_cTypeEP.hasExtension(cTypeId)) 243 { 244 ContentType contentType = _cTypeEP.getExtension(cTypeId); 245 for (String supertypeId : contentType.getSupertypeIds()) 246 { 247 indexAllContentTypes(supertypeId, document, fieldName); 248 } 249 } 250 } 251 252 /** 253 * Index the fields specified in the indexation model. 254 * @param content The content to index. 255 * @param document The main content solr document. 256 * @param additionalDocuments The additional documents for repeater instances. 257 */ 258 protected void indexModelFields(Content content, SolrInputDocument document, List<SolrInputDocument> additionalDocuments) 259 { 260 IndexingModel indexingModel = null; 261 try 262 { 263 indexingModel = _cTypesHelper.getIndexingModel(content); 264 } 265 catch (RuntimeException e) 266 { 267 getLogger().error("indexContent > Error getting the indexing model of content " + content.getId(), e); 268 throw e; 269 } 270 271 for (IndexingField field : indexingModel.getFields()) 272 { 273 if (field instanceof CustomIndexingField) 274 { 275 Object[] values = ((CustomIndexingField) field).getValues(content); 276 indexValues(content, field.getName(), field.getType(), values, document, null); 277 } 278 else if (field instanceof MetadataIndexingField) 279 { 280 String metadataPath = ((MetadataIndexingField) field).getMetadataPath(); 281 String[] pathSegments = metadataPath.split(ContentConstants.METADATA_PATH_SEPARATOR); 282 283 MetadataDefinition definition = _cTypesHelper.getMetadataDefinition(pathSegments[0], content.getTypes(), content.getMixinTypes()); 284 if (definition != null) 285 { 286 findAndIndexMetadata(content, pathSegments, content.getMetadataHolder(), definition, field, field.getName(), document, null, additionalDocuments); 287 } 288 } 289 } 290 } 291 292 /** 293 * Populate a Solr input document by adding fields for a single system property. 294 * @param content The content to index 295 * @param propertyId The system property ID. 296 * @param document The solr document 297 * @return true if there are partial update to apply 298 * @throws Exception if an error occurred 299 */ 300 public boolean indexPartialSystemProperty(Content content, String propertyId, SolrInputDocument document) throws Exception 301 { 302 if (!_systemPropEP.hasExtension(propertyId)) 303 { 304 throw new IllegalStateException("The property '" + propertyId + "' can't be indexed as it does not exist."); 305 } 306 307 SolrInputDocument tempDocument = new SolrInputDocument(); 308 309 SystemProperty property = _systemPropEP.getExtension(propertyId); 310 property.index(content, tempDocument); 311 312 if (tempDocument.isEmpty()) 313 { 314 // Does not have any partial update to apply, avoid to erase all the existing fields on the Solr document corresponding to this content (it would be lost) 315 return false; 316 } 317 318 // Copy the indexed values as partial updates. 319 for (String fieldName : tempDocument.getFieldNames()) 320 { 321 Collection<Object> fieldValues = tempDocument.getFieldValues(fieldName); 322 323 Map<String, Object> partialUpdate = new HashMap<>(); 324 partialUpdate.put("set", fieldValues); 325 document.addField(fieldName, partialUpdate); 326 } 327 328 document.addField("id", content.getId()); 329 330 return true; 331 } 332 333 /** 334 * Find the metadata to index from its path 335 * @param content the content currently being traversed. 336 * @param pathSegments The segments of path of metadata to index 337 * @param metadata The parent composite metadata 338 * @param definition The metadata definition 339 * @param field the current indexing field. 340 * @param fieldName the name of the field to index. 341 * @param document The main solr document to index into 342 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 343 * @param additionalDocuments The additional documents 344 */ 345 protected void findAndIndexMetadata(Content content, String[] pathSegments, CompositeMetadata metadata, MetadataDefinition definition, IndexingField field, String fieldName, SolrInputDocument document, SolrInputDocument contentDoc, List<SolrInputDocument> additionalDocuments) 346 { 347 String currentFieldName = pathSegments[0]; 348 349 IndexingModel indexingModel = null; 350 try 351 { 352 indexingModel = _cTypesHelper.getIndexingModel(content); 353 } 354 catch (RuntimeException e) 355 { 356 if (content != null) 357 { 358 getLogger().error("findAndIndexMetadata > Error while indexing content " + content.getId() + " metadata", e); 359 } 360 else 361 { 362 getLogger().error("findAndIndexMetadata > Error while indexing null content metadata"); 363 } 364 throw e; 365 } 366 367 IndexingField refField = indexingModel.getField(currentFieldName); 368 if (refField != null && refField instanceof CustomMetadataIndexingField) 369 { 370 CustomMetadataIndexingField overridingField = (CustomMetadataIndexingField) refField; 371 findAndIndexOverridingField(content, indexingModel, overridingField, fieldName, definition, pathSegments, document, contentDoc, additionalDocuments); 372 } 373 else 374 { 375 if (metadata.hasMetadata(currentFieldName)) 376 { 377 findAndIndexMetadataField(content, pathSegments, metadata, definition, field, fieldName, document, contentDoc, additionalDocuments); 378 } 379 } 380 } 381 382 /** 383 * Find and index a metadata. 384 * @param content the current content being traversed. 385 * @param pathSegments the full metadata path segments. 386 * @param metadata the current metadata holder. 387 * @param definition the current metadata definition. 388 * @param field the current indexing field. 389 * @param fieldName the name of the field to index. 390 * @param document the solr main document. 391 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 392 * @param additionalDocuments the solr additional documents. 393 */ 394 protected void findAndIndexMetadataField(Content content, String[] pathSegments, CompositeMetadata metadata, MetadataDefinition definition, IndexingField field, String fieldName, SolrInputDocument document, SolrInputDocument contentDoc, List<SolrInputDocument> additionalDocuments) 395 { 396 String currentFieldName = pathSegments[0]; 397 398 if (pathSegments.length == 1) 399 { 400 indexMetadata(content, currentFieldName, metadata, document, contentDoc, additionalDocuments, fieldName, definition); 401 return; 402 } 403 404 String[] followingSegments = ArrayUtils.subarray(pathSegments, 1, pathSegments.length); 405 406 switch (definition.getType()) 407 { 408 case BINARY: 409 case BOOLEAN: 410 case STRING: 411 case MULTILINGUAL_STRING: 412 case USER: 413 case LONG: 414 case DOUBLE: 415 case DATE: 416 case DATETIME: 417 case REFERENCE: 418 case RICH_TEXT: 419 case FILE: 420 case GEOCODE: 421 getLogger().warn("The metadata '{}' of type {} can not be a part of a path to index : {}", currentFieldName, definition.getType().toString(), StringUtils.join(pathSegments, ContentConstants.METADATA_PATH_SEPARATOR)); 422 break; 423 case CONTENT: 424 String[] contentIds = metadata.getStringArray(currentFieldName, new String[0]); 425 for (String contentId : contentIds) 426 { 427 try 428 { 429 Content refContent = _resolver.resolveById(contentId); 430 MetadataDefinition remoteMetadataDef = _cTypesHelper.getMetadataDefinition(followingSegments[0], refContent.getTypes(), refContent.getMixinTypes()); 431 findAndIndexMetadata(refContent, followingSegments, refContent.getMetadataHolder(), remoteMetadataDef, field, fieldName, document, contentDoc, additionalDocuments); 432 } 433 catch (UnknownAmetysObjectException e) 434 { 435 // Nothing to index 436 } 437 } 438 break; 439 case SUB_CONTENT: 440 TraversableAmetysObject objectCollection = metadata.getObjectCollection(currentFieldName); 441 AmetysObjectIterable<Content> subcontents = objectCollection.getChildren(); 442 for (Content subcontent : subcontents) 443 { 444 MetadataDefinition remoteMetadataDef = _cTypesHelper.getMetadataDefinition(followingSegments[0], subcontent.getTypes(), subcontent.getMixinTypes()); 445 findAndIndexMetadata(subcontent, followingSegments, subcontent.getMetadataHolder(), remoteMetadataDef, field, fieldName, document, contentDoc, additionalDocuments); 446 } 447 break; 448 case COMPOSITE: 449 CompositeMetadata composite = metadata.getCompositeMetadata(currentFieldName); 450 451 if (definition instanceof RepeaterDefinition) 452 { 453 String[] entries = composite.getMetadataNames(); 454 for (String entry : entries) 455 { 456 findAndIndexMetadata(content, followingSegments, composite.getCompositeMetadata(entry), definition.getMetadataDefinition(followingSegments[0]), field, fieldName, document, contentDoc, additionalDocuments); 457 } 458 } 459 else 460 { 461 findAndIndexMetadata(content, followingSegments, composite, definition.getMetadataDefinition(followingSegments[0]), field, fieldName, document, contentDoc, additionalDocuments); 462 } 463 break; 464 default: 465 break; 466 467 } 468 } 469 470 /** 471 * Find and index a property represented by an overriding field. 472 * @param content the current content being traversed. 473 * @param indexingModel the current indexing model. 474 * @param pathSegments the full metadata path segments. 475 * @param definition the current metadata definition. 476 * @param field the current indexing field. 477 * @param fieldName the name of the field to index. 478 * @param document the solr main document. 479 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 480 * @param additionalDocuments the solr additional documents. 481 */ 482 protected void findAndIndexOverridingField(Content content, IndexingModel indexingModel, CustomMetadataIndexingField field, String fieldName, MetadataDefinition definition, String[] pathSegments, SolrInputDocument document, SolrInputDocument contentDoc, List<SolrInputDocument> additionalDocuments) 483 { 484 String currentFieldName = field.getName(); 485 486 if (pathSegments.length == 1) 487 { 488 indexOverridingField(field, content, fieldName, document, contentDoc, additionalDocuments); 489 return; 490 } 491 492 String[] followingSegments = ArrayUtils.subarray(pathSegments, 1, pathSegments.length); 493 MetadataType type = definition.getType(); 494 495 switch (type) 496 { 497 case BINARY: 498 case BOOLEAN: 499 case STRING: 500 case MULTILINGUAL_STRING: 501 case USER: 502 case LONG: 503 case DOUBLE: 504 case DATE: 505 case DATETIME: 506 case REFERENCE: 507 case RICH_TEXT: 508 case FILE: 509 case GEOCODE: 510 getLogger().warn("The field '{}' of type {} can not be a part of a path to index : {}", currentFieldName, type.toString(), StringUtils.join(pathSegments, ContentConstants.METADATA_PATH_SEPARATOR)); 511 break; 512 case COMPOSITE: 513 getLogger().warn("The type {} is invalid for the overriding field '{}'.", type.toString(), currentFieldName); 514 break; 515 case CONTENT: 516 case SUB_CONTENT: 517 String[] contentIds = (String[]) field.getValues(content); 518 for (String contentId : contentIds) 519 { 520 Content refContent = _resolver.resolveById(contentId); 521 MetadataDefinition remoteMetadataDef = _cTypesHelper.getMetadataDefinition(followingSegments[0], refContent.getTypes(), refContent.getMixinTypes()); 522 findAndIndexMetadata(refContent, followingSegments, refContent.getMetadataHolder(), remoteMetadataDef, field, fieldName, document, contentDoc, additionalDocuments); 523 } 524 break; 525 default: 526 break; 527 } 528 } 529 530 /** 531 * Index a content metadata. 532 * @param content the current content being traversed. 533 * @param metadataName The name of metadata to index 534 * @param metadata The parent composite metadata 535 * @param document the solr document to index into. 536 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 537 * @param additionalDocuments The solr additional documents used for repeater instance 538 * @param fieldName the name of the indexed field. 539 * @param definition the metadata definition. 540 */ 541 public void indexMetadata(Content content, String metadataName, CompositeMetadata metadata, SolrInputDocument document, SolrInputDocument contentDoc, List<SolrInputDocument> additionalDocuments, String fieldName, MetadataDefinition definition) 542 { 543 String language = content.getLanguage(); 544 545 switch (definition.getType()) 546 { 547 case STRING: 548 indexStringMetadata(metadata, metadataName, content.getId(), document, contentDoc, fieldName, language, definition); 549 break; 550 case MULTILINGUAL_STRING: 551 indexMultilingualStringMetadata(metadata, metadataName, content.getId(), document, contentDoc, fieldName, definition); 552 break; 553 case USER: 554 indexUserMetadata(metadata, metadataName, document, contentDoc, fieldName, language, definition); 555 break; 556 case GEOCODE: 557 indexGeoCodeMetadata(metadata, metadataName, document, fieldName, definition); 558 break; 559 case BINARY: 560 indexBinaryMetadata(metadata, metadataName, document, contentDoc, fieldName, language, definition); 561 break; 562 case FILE: 563 indexFileMetadata(metadata, metadataName, document, contentDoc, fieldName, language, definition); 564 break; 565 case DATE: 566 indexDateMetadata(metadata, metadataName, document, fieldName, definition); 567 break; 568 case DATETIME: 569 indexDateTimeMetadata(metadata, metadataName, document, fieldName, definition); 570 break; 571 case CONTENT: 572 indexContentMetadata(metadata, metadataName, document, fieldName, definition); 573 break; 574 case SUB_CONTENT: 575 indexSubContentMetadata(metadata, metadataName, document, fieldName, definition); 576 break; 577 case LONG: 578 indexLongMetadata(metadata, metadataName, document, fieldName, definition); 579 break; 580 case DOUBLE: 581 indexDoubleMetadata(metadata, metadataName, document, fieldName, definition); 582 break; 583 case BOOLEAN: 584 indexBooleanMetadata(metadata, metadataName, document, fieldName, definition); 585 break; 586 case RICH_TEXT: 587 indexRichtextMetadata(metadata, metadataName, document, contentDoc, fieldName, language, definition); 588 break; 589 case COMPOSITE: 590 if (definition instanceof RepeaterDefinition) 591 { 592 indexRepeaterMetadata(content, metadata, metadataName, document, contentDoc, fieldName, definition, additionalDocuments); 593 } 594 else 595 { 596 indexCompositeMetadata(content, metadata, metadataName, document, contentDoc, fieldName, definition, additionalDocuments); 597 } 598 break; 599 case REFERENCE: 600 indexReferenceMetadata(metadata, metadataName, content.getId(), document, contentDoc, fieldName, language, definition); 601 break; 602 default: 603 break; 604 } 605 } 606 607 /** 608 * Index a property represented by an overriding field. 609 * @param field The overriding field. 610 * @param content The content of which to get the property. 611 * @param fieldName The name of the field to index. 612 * @param document the solr document to index into. 613 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 614 * @param additionalDocuments The solr additional documents used for repeater instance 615 */ 616 public void indexOverridingField(CustomMetadataIndexingField field, Content content, String fieldName, SolrInputDocument document, SolrInputDocument contentDoc, List<SolrInputDocument> additionalDocuments) 617 { 618 Object[] values = field.getValues(content); 619 MetadataDefinition definition = field.getMetadataDefinition(); 620 boolean isFacetable = definition.getEnumerator() != null; 621 String language = content.getLanguage(); 622 623 switch (definition.getType()) 624 { 625 case STRING: 626 indexStringValues((String[]) values, content.getId(), document, contentDoc, fieldName, language, isFacetable); 627 break; 628 case MULTILINGUAL_STRING: 629 indexMultilingualStringValues((MultilingualString) values[0], content.getId(), document, contentDoc, fieldName); 630 break; 631 case USER: 632 UserIdentity[] users = new UserIdentity[values.length]; 633 for (int i = 0; i < values.length; i++) 634 { 635 users[i] = UserIdentity.stringToUserIdentity((String) values[i]); 636 } 637 indexUserValues(users, document, contentDoc, fieldName, language); 638 break; 639 case GEOCODE: 640 if (values.length > 1) 641 { 642 indexGeocodeValue((double) values[0], (double) values[1], document, fieldName); 643 } 644 break; 645 case BINARY: 646 if (values.length > 0) 647 { 648 indexFullTextBinaryValue((InputStream) values[0], document, contentDoc, fieldName, language); 649 } 650 break; 651 case FILE: 652 indexFileValue(values, document, contentDoc, fieldName, language); 653 break; 654 case DATE: 655 indexDateValues((Date[]) values, document, fieldName); 656 break; 657 case DATETIME: 658 indexDateTimeValues((Date[]) values, document, fieldName); 659 break; 660 case CONTENT: 661 indexContentValues((String[]) values, document, fieldName); 662 break; 663 case SUB_CONTENT: 664 indexContentValues((String[]) values, document, fieldName); 665 break; 666 case LONG: 667 indexLongValues((Long[]) values, document, fieldName, isFacetable); 668 break; 669 case DOUBLE: 670 indexDoubleValues((Double[]) values, document, fieldName, isFacetable); 671 break; 672 case BOOLEAN: 673 indexBooleanValues((Boolean[]) values, document, fieldName); 674 break; 675 case RICH_TEXT: 676 if (values.length > 0) 677 { 678 indexRichtextValue((RichText) values[0], document, contentDoc, fieldName, language); 679 } 680 break; 681 case COMPOSITE: 682 break; 683 case REFERENCE: 684 // References are get as Map<String, Object> with keys "type" and "value" 685 String[] referenceValues = Stream.of((Map<String, Object>[]) values) 686 // Only keep the value 687 .map(m -> m.get("value")) 688 // Transform it as String 689 .map(Object::toString) 690 // Collect result in String array 691 .toArray(String[]::new); 692 693 // Index like String values 694 indexStringValues(referenceValues, content.getId(), document, contentDoc, fieldName, language, isFacetable); 695 break; 696 default: 697 break; 698 } 699 } 700 701 /** 702 * Index values 703 * @param content The content being indexed. 704 * @param fieldName The Solr field's name 705 * @param type the type of values to index 706 * @param values the values 707 * @param document the Solr document 708 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 709 */ 710 public void indexValues(Content content, String fieldName, MetadataType type, Object[] values, SolrInputDocument document, SolrInputDocument contentDoc) 711 { 712 String language = content.getLanguage(); 713 714 switch (type) 715 { 716 case STRING: 717 indexStringValues((String[]) values, content.getId(), document, contentDoc, fieldName, language, false); 718 break; 719 case MULTILINGUAL_STRING: 720 if (values.length > 0) 721 { 722 indexMultilingualStringValues((MultilingualString) values[0], content.getId(), document, contentDoc, fieldName); 723 } 724 break; 725 case LONG: 726 indexLongValues((Long[]) values, document, fieldName, false); 727 break; 728 case DOUBLE: 729 indexDoubleValues((Double[]) values, document, fieldName, false); 730 break; 731 case DATE: 732 indexDateValues((Date[]) values, document, fieldName); 733 break; 734 case DATETIME: 735 indexDateTimeValues((Date[]) values, document, fieldName); 736 break; 737 case CONTENT: 738 indexContentValues((String[]) values, document, fieldName); 739 break; 740 case BOOLEAN: 741 indexBooleanValues((Boolean[]) values, document, fieldName); 742 break; 743 case USER: 744 UserIdentity[] users = new UserIdentity[values.length]; 745 for (int i = 0; i < values.length; i++) 746 { 747 users[i] = UserIdentity.stringToUserIdentity((String) values[i]); 748 } 749 indexUserValues(users, document, contentDoc, fieldName, language); 750 break; 751 case RICH_TEXT: 752 indexRichtextValue((RichText) values[0], document, contentDoc, fieldName, language); 753 break; 754 case BINARY: 755 case FILE: 756 case COMPOSITE: 757 case REFERENCE: 758 case SUB_CONTENT: 759 case GEOCODE: 760 getLogger().warn("Only primitive type is allowed on a custom indexing field"); 761 break; 762 default: 763 break; 764 } 765 } 766 767 /** 768 * Index a 'reference' metadata 769 * @param metadata The parent composite metadata 770 * @param metadataName The name of metadata to index 771 * @param contentId The content id. For logging purposes 772 * @param document The solr document to index into 773 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 774 * @param fieldName The index field name 775 * @param language The content language. 776 * @param definition The metadata definition 777 */ 778 public void indexReferenceMetadata(CompositeMetadata metadata, String metadataName, String contentId, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, MetadataDefinition definition) 779 { 780 if (metadata.hasMetadata(metadataName)) 781 { 782 String[] strValues = metadata.getCompositeMetadata(metadataName).getStringArray("value", new String[0]); 783 indexStringValues(strValues, contentId, document, contentDoc, fieldName, language, definition.getEnumerator() != null); 784 } 785 } 786 787 /** 788 * Index a 'string' metadata 789 * @param metadata The parent composite metadata 790 * @param metadataName The name of metadata to index 791 * @param contentId The content id. For logging purposes 792 * @param document The solr document to index into 793 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 794 * @param fieldName The index field name 795 * @param language The content language. 796 * @param definition The metadata definition 797 */ 798 public void indexStringMetadata(CompositeMetadata metadata, String metadataName, String contentId, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, MetadataDefinition definition) 799 { 800 String[] strValues = metadata.getStringArray(metadataName, new String[0]); 801 indexStringValues(strValues, contentId, document, contentDoc, fieldName, language, definition.getEnumerator() != null); 802 } 803 804 /** 805 * Index a multilingual string metadata 806 * @param metadata The parent composite metadata 807 * @param metadataName The name of metadata to index 808 * @param contentId The content id. For logging purposes 809 * @param document The solr document to index into 810 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 811 * @param fieldName The index field name 812 * @param definition The metadata definition 813 */ 814 public void indexMultilingualStringMetadata(CompositeMetadata metadata, String metadataName, String contentId, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, MetadataDefinition definition) 815 { 816 MultilingualString multilingualString = metadata.getMultilingualString(metadataName); 817 indexMultilingualStringValues(multilingualString, contentId, document, contentDoc, fieldName); 818 } 819 820 /** 821 * Index a multilingual string values 822 * @param value The multilingual string 823 * @param contentId The content id. For logging purposes 824 * @param document The solr document to index into 825 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 826 * @param fieldName The index field name 827 */ 828 public void indexMultilingualStringValues(MultilingualString value, String contentId, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName) 829 { 830 Set<Locale> metaLocales = value.getLocales(); 831 List<String> appLanguages = _languagesManager.getAvailableLanguages() 832 .values() 833 .stream() 834 .map(Language::getCode) 835 .collect(Collectors.toList()); 836 for (String appLanguageCode : appLanguages) 837 { 838 Locale appLocale = new Locale(appLanguageCode); 839 if (metaLocales.contains(appLocale)) 840 { 841 String str = value.getValue(appLocale); 842 indexMultilingualStringValues(new String[] {str}, contentId, document, contentDoc, fieldName, appLocale.getLanguage()); 843 } 844 845 // Need to index sort field for every language of application, even if metadata does not have value for the given language 846 String sortValue = MultilingualStringHelper.getValue(value, appLocale); 847 indexMultilingualStringValuesForSorting(sortValue, document, fieldName, appLocale.getLanguage()); 848 } 849 } 850 851 /** 852 * Index multilingual 'string' values 853 * @param values The values 854 * @param contentId The content id. For logging purposes 855 * @param document The solr document to index into 856 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 857 * @param fieldName The index field name 858 * @param language The language for values. 859 */ 860 public void indexMultilingualStringValues(String[] values, String contentId, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language) 861 { 862 for (String value : values) 863 { 864 document.addField(fieldName + "_txt_" + language, value); 865 document.addField(fieldName + "_txt_stemmed_" + language, value); 866 document.addField(fieldName + "_txt_ws_" + language, value); 867 868 // Index without analyzing. 869 String possiblyTruncatedValue = SolrIndexer.truncateUtf8StringValue(value, getLogger(), contentId, fieldName); 870 document.addField(fieldName + "_" + language + "_s", possiblyTruncatedValue); 871 872 // Index without analyzing but lower-case (for wildcard queries). 873 document.addField(fieldName + "_" + language + "_s_lower", possiblyTruncatedValue.toLowerCase()); 874 875 // Exact words tokenized by whitespace. 876 document.addField(fieldName + "_" + language + "_s_ws", value.toLowerCase()); 877 878 // Index with analyze (full-text search). 879 document.addField(fieldName + "_" + language + "_txt", value); 880 881 indexFulltextValue(document, contentDoc, value, language); 882 } 883 } 884 885 /** 886 * Index multilingual 'string' value in sort field 887 * @param value The value 888 * @param document The solr document to index into 889 * @param fieldName The index field name 890 * @param language The language 891 */ 892 public void indexMultilingualStringValuesForSorting(String value, SolrInputDocument document, String fieldName, String language) 893 { 894 String sortField = fieldName + "_" + language + SolrFieldHelper.getSortFieldSuffix(MetadataType.MULTILINGUAL_STRING); 895 if (StringUtils.isNotEmpty(value) && !document.containsKey(sortField)) 896 { 897 document.addField(sortField, SolrFieldHelper.getSortValue(value)); 898 } 899 } 900 901 /** 902 * Index 'string' values 903 * @param values The values 904 * @param contentId The content id. For logging purposes 905 * @param document The solr document to index into 906 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 907 * @param fieldName The index field name 908 * @param language The content language. 909 * @param isFacetable true if the field can be used as a facet. 910 */ 911 public void indexStringValues(String[] values, String contentId, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, boolean isFacetable) 912 { 913 for (String value : values) 914 { 915 if (!isFacetable) 916 { 917 if (language != null) // Language can be null for multilingual content 918 { 919 // No enumerator: index as full-text. 920 document.addField(fieldName + "_txt_" + language, value); 921 document.addField(fieldName + "_txt_stemmed_" + language, value); 922 document.addField(fieldName + "_txt_ws_" + language, value); 923 } 924 } 925 else 926 { 927 // Facets (enumeration only) 928 document.addField(fieldName + "_s_dv", value); 929 } 930 931 // Index without analyzing. 932 String possiblyTruncatedValue = SolrIndexer.truncateUtf8StringValue(value, getLogger(), contentId, fieldName); 933 document.addField(fieldName + "_s", possiblyTruncatedValue); 934 935 // Index without analyzing but lower-case (for wildcard queries). 936 document.addField(fieldName + "_s_lower", possiblyTruncatedValue.toLowerCase()); 937 938 // Exact words tokenized by whitespace. 939 document.addField(fieldName + "_s_ws", value.toLowerCase()); 940 941 // Index with analyze (full-text search). 942 document.addField(fieldName + "_txt", value); 943 944 indexFulltextValue(document, contentDoc, value, language); 945 } 946 947 String sortField = SolrFieldHelper.getMetadataSortFieldName(MetadataType.STRING, fieldName); 948 if (values.length > 0 && !document.containsKey(sortField)) 949 { 950 // FIXME Si la meta est enumerée, indexer le label ? dans quelle langue ? 951 document.addField(sortField, SolrFieldHelper.getSortValue(values[0])); 952 } 953 } 954 955 /** 956 * Index a 'date' metadata 957 * @param metadata The parent composite metadata 958 * @param metadataName The name of metadata to index 959 * @param document The solr document to index into 960 * @param fieldName The index field name 961 * @param definition The metadata definition 962 */ 963 public void indexDateMetadata (CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition) 964 { 965 Date[] dateValues = metadata.getDateArray(metadataName, new Date[0]); 966 indexDateValues (dateValues, document, fieldName); 967 } 968 969 /** 970 * Index 'date' values 971 * @param values The values 972 * @param document The solr document to index into 973 * @param fieldName The index field name 974 */ 975 public void indexDateValues (Date[] values, SolrInputDocument document, String fieldName) 976 { 977 for (Date value : values) 978 { 979 document.addField(fieldName + "_dt", SolrIndexer.dateFormat().format(value)); 980 } 981 982 String sortField = fieldName + "_dt_sort"; 983 if (values.length > 0 && !document.containsKey(sortField)) 984 { 985 document.addField(sortField, SolrIndexer.dateFormat().format(values[0])); 986 } 987 } 988 989 /** 990 * Index a 'datetime' metadata 991 * @param metadata The parent composite metadata 992 * @param metadataName The name of metadata to index 993 * @param document The solr document to index into 994 * @param fieldName The index field name 995 * @param definition The metadata definition 996 */ 997 public void indexDateTimeMetadata (CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition) 998 { 999 Date[] dateValues = metadata.getDateArray(metadataName, new Date[0]); 1000 indexDateTimeValues(dateValues, document, fieldName); 1001 } 1002 1003 /** 1004 * Index 'datetime' values 1005 * @param values The values 1006 * @param document The solr document to index into 1007 * @param fieldName The index field name 1008 */ 1009 public void indexDateTimeValues (Date[] values, SolrInputDocument document, String fieldName) 1010 { 1011 for (Date value : values) 1012 { 1013 document.addField(fieldName + "_dt", SolrIndexer.dateFormat().format(value)); 1014 } 1015 1016 String sortField = fieldName + "_dt_sort"; 1017 if (values.length > 0 && !document.containsKey(sortField)) 1018 { 1019 document.addField(sortField, SolrIndexer.dateFormat().format(values[0])); 1020 } 1021 } 1022 1023 /** 1024 * Index a 'double' metadata 1025 * @param metadata The parent composite metadata 1026 * @param metadataName The name of metadata to index 1027 * @param document The solr document to index into 1028 * @param fieldName The index field name 1029 * @param definition The metadata definition 1030 */ 1031 public void indexDoubleMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition) 1032 { 1033 boolean isFacetable = definition.getEnumerator() != null; 1034 double[] values = metadata.getDoubleArray(metadataName, new double[0]); 1035 indexDoubleValues (ArrayUtils.toObject(values), document, fieldName, isFacetable); 1036 } 1037 1038 /** 1039 * Index 'double' values 1040 * @param values The values 1041 * @param document The solr document to index into 1042 * @param fieldName The index field name 1043 * @param isFacetable true if the field can be used as a facet. 1044 */ 1045 public void indexDoubleValues(Double[] values, SolrInputDocument document, String fieldName, boolean isFacetable) 1046 { 1047 for (Double value : values) 1048 { 1049 document.addField(fieldName + "_d", value); 1050 if (isFacetable) 1051 { 1052 document.addField(fieldName + "_d_dv", value); 1053 } 1054 } 1055 1056 String sortField = fieldName + "_d_sort"; 1057 if (values.length > 0 && !document.containsKey(sortField)) 1058 { 1059 document.addField(sortField, values[0]); 1060 } 1061 } 1062 1063 /** 1064 * Index a 'long' metadata 1065 * @param metadata The parent composite metadata 1066 * @param metadataName The name of metadata to index 1067 * @param document The solr document to index into 1068 * @param fieldName The index field name 1069 * @param definition The metadata definition 1070 */ 1071 public void indexLongMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition) 1072 { 1073 boolean isFacetable = definition.getEnumerator() != null; 1074 long[] values = metadata.getLongArray(metadataName, new long[0]); 1075 indexLongValues(ArrayUtils.toObject(values), document, fieldName, isFacetable); 1076 } 1077 1078 /** 1079 * Index 'long' values 1080 * @param values The values 1081 * @param document The solr document to index into 1082 * @param fieldName The index field name 1083 * @param isFacetable true if the field can be used as a facet. 1084 */ 1085 public void indexLongValues(Long[] values, SolrInputDocument document, String fieldName, boolean isFacetable) 1086 { 1087 for (Long value : values) 1088 { 1089 document.addField(fieldName + "_l", value); 1090 if (isFacetable) 1091 { 1092 document.addField(fieldName + "_l_dv", value); 1093 } 1094 } 1095 1096 String sortField = fieldName + "_l_sort"; 1097 if (values.length > 0 && !document.containsKey(sortField)) 1098 { 1099 document.addField(sortField, values[0]); 1100 } 1101 } 1102 1103 /** 1104 * Index a 'user' metadata 1105 * @param metadata The parent composite metadata 1106 * @param metadataName The name of metadata to index 1107 * @param document The solr document to index into 1108 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1109 * @param fieldName The index field name 1110 * @param language The content language. 1111 * @param definition The metadata definition 1112 */ 1113 public void indexUserMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, MetadataDefinition definition) 1114 { 1115 UserIdentity[] users = metadata.getUserArray(metadataName); 1116 indexUserValues(users, document, contentDoc, fieldName, language); 1117 } 1118 1119 /** 1120 * Index 'user' values 1121 * @param users The users 1122 * @param document The solr document to index into 1123 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1124 * @param fieldName The index field name 1125 * @param language The content language. 1126 */ 1127 public void indexUserValues(UserIdentity[] users, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language) 1128 { 1129 int count = 0; 1130 for (UserIdentity userIdentity : users) 1131 { 1132 String fullName = _userHelper.getUserFullName(userIdentity); 1133 String sortableName = _userHelper.getUserSortableName(userIdentity); 1134 String identityAsString = UserIdentity.userIdentityToString(userIdentity); 1135 1136 indexFulltextValue(document, contentDoc, identityAsString, language); 1137 1138 // Facets 1139 document.addField(fieldName + "_s_dv", identityAsString); 1140 1141 // Dynamic fields 1142 document.addField(fieldName + "_s", identityAsString); 1143 1144 if (StringUtils.isNotEmpty(fullName)) 1145 { 1146 document.addField(fieldName + "_txt", fullName); 1147 1148 indexFulltextValue(document, contentDoc, fullName, language); 1149 } 1150 1151 String sortField = SolrFieldHelper.getMetadataSortFieldName(MetadataType.STRING, fieldName); 1152 if (count == 0 && StringUtils.isNotEmpty(sortableName) && !document.containsKey(sortField)) 1153 { 1154 // Index only first user for sorting 1155 document.addField(sortField, SolrFieldHelper.getSortValue(sortableName)); 1156 } 1157 count++; 1158 } 1159 } 1160 1161 /** 1162 * Index a 'boolean' metadata 1163 * @param metadata The parent composite metadata 1164 * @param metadataName The name of metadata to index 1165 * @param document The solr document to index into 1166 * @param fieldName The index field name 1167 * @param definition The metadata definition 1168 */ 1169 public void indexBooleanMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition) 1170 { 1171 boolean[] values = metadata.getBooleanArray(metadataName, new boolean[0]); 1172 indexBooleanValues(ArrayUtils.toObject(values), document, fieldName); 1173 } 1174 1175 /** 1176 * Index 'boolean' values 1177 * @param values The values 1178 * @param document The solr document to index into 1179 * @param fieldName The index field name 1180 */ 1181 public void indexBooleanValues(Boolean[] values, SolrInputDocument document, String fieldName) 1182 { 1183 for (Boolean value : values) 1184 { 1185 document.addField(fieldName + "_b", value); 1186 document.addField(fieldName + "_b_dv", value); 1187 } 1188 1189 String sortField = fieldName + "_b_sort"; 1190 if (values.length > 0 && !document.containsKey(sortField)) 1191 { 1192 document.addField(sortField, values[0]); 1193 } 1194 } 1195 1196 /** 1197 * Index a 'richtext' metadata 1198 * @param metadata The parent composite metadata 1199 * @param metadataName The name of metadata to index 1200 * @param document The solr document to index into 1201 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1202 * @param fieldName The index field name 1203 * @param language The content language. 1204 * @param definition The metadata definition 1205 */ 1206 public void indexRichtextMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, MetadataDefinition definition) 1207 { 1208 indexRichtextValue(metadata.getRichText(metadataName), document, contentDoc, fieldName, language); 1209 } 1210 1211 /** 1212 * Index 'richtext' values 1213 * @param richText The rich text to index. 1214 * @param document The solr document to index into 1215 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1216 * @param fieldName The index field name. 1217 * @param language The content language. 1218 */ 1219 public void indexRichtextValue(RichText richText, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language) 1220 { 1221 try (InputStream is = richText.getInputStream()) 1222 { 1223 String value = _richTextToString(is); 1224 1225 if (language != null) // language can be null for multilingual content 1226 { 1227 // Index as a text field. 1228 document.addField(fieldName + "_txt_" + language, value); 1229 document.addField(fieldName + "_txt_stemmed_" + language, value); 1230 document.addField(fieldName + "_txt_ws_" + language, value); 1231 } 1232 1233 // Index in the full-text value. 1234 SolrContentIndexer.indexFulltextValue(document, value, language); 1235 1236 if (contentDoc != null) 1237 { 1238 SolrContentIndexer.indexFulltextValue(contentDoc, value, language); 1239 } 1240 } 1241 catch (Exception e) 1242 { 1243 getLogger().warn("Failed to index RICH_TEXT '" + fieldName + "'", e); 1244 } 1245 } 1246 1247 /** 1248 * Gets a XML as a string and extract the text only 1249 * @param is The inputstream of XML 1250 * @return The text or null if the XML is not well formed 1251 */ 1252 protected String _richTextToString(InputStream is) 1253 { 1254 SAXParser saxParser = null; 1255 try 1256 { 1257 RichTextHandler txtHandler = new RichTextHandler(); 1258 saxParser = (SAXParser) _manager.lookup(SAXParser.ROLE); 1259 saxParser.parse(new InputSource(is), txtHandler); 1260 return txtHandler.getValue().trim(); 1261 } 1262 catch (ServiceException e) 1263 { 1264 getLogger().error("Unable to get a SAX parser", e); 1265 return null; 1266 } 1267 catch (IOException | SAXException e) 1268 { 1269 getLogger().error("Cannot parse inputstream", e); 1270 return null; 1271 } 1272 finally 1273 { 1274 _manager.release(saxParser); 1275 } 1276 } 1277 1278 1279 1280 /** 1281 * Index a 'binary' metadata 1282 * @param metadata The parent composite metadata 1283 * @param metadataName The name of metadata to index 1284 * @param document The solr document to index into 1285 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1286 * @param fieldName The index field name 1287 * @param language The content language. 1288 * @param definition The metadata definition 1289 */ 1290 public void indexBinaryMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, MetadataDefinition definition) 1291 { 1292 // Index file name. 1293 BinaryMetadata binary = metadata.getBinaryMetadata(metadataName); 1294 document.addField(fieldName + "_txt", binary.getFilename()); 1295 1296 // Index the contents. 1297 indexFullTextBinary(metadata, metadataName, document, contentDoc, fieldName, language, definition); 1298 } 1299 1300 /** 1301 * Index a 'file' metadata 1302 * @param metadata The parent composite metadata 1303 * @param metadataName The name of metadata to index 1304 * @param document The solr document to index into 1305 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1306 * @param fieldName The index field name 1307 * @param language The content language. 1308 * @param definition The metadata definition 1309 */ 1310 public void indexFileMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, MetadataDefinition definition) 1311 { 1312 if (org.ametys.plugins.repository.metadata.CompositeMetadata.MetadataType.BINARY.equals(metadata.getType(metadataName))) 1313 { 1314 indexBinaryMetadata(metadata, metadataName, document, contentDoc, fieldName, language, definition); 1315 } 1316 else 1317 { 1318 // Resource from the explorer. 1319 String value = metadata.getString(metadataName); 1320 1321 try 1322 { 1323 Resource resource = (Resource) _resolver.resolveById(value); 1324 1325 // Index file name. 1326 document.addField(fieldName + "_txt", resource.getName()); 1327 1328 // Index the contents. 1329 indexResourceContent(resource, document, contentDoc, language); 1330 1331// document.addField(prefix + fieldName + "$path", resource.getId()); 1332// document.addField(prefix + fieldName + "$type", "explorer"); 1333// document.addField(prefix + fieldName + "$mime-type", resource.getMimeType()); 1334// document.addField(prefix + fieldName + "$filename", filename); 1335// document.addField(prefix + fieldName + "$lastModified", resource.getLastModified()); 1336// document.addField(prefix + fieldName + "$size", resource.getLength()); 1337// 1338// String viewUrl = "/plugins/explorer/resource?id=" + resource.getId(); 1339// document.addField(prefix + fieldName + "$viewUrl", viewUrl); 1340// document.addField(prefix + fieldName + "$downloadUrl", viewUrl + "&download=true"); 1341 } 1342 catch (AmetysRepositoryException e) 1343 { 1344 getLogger().warn(String.format("Unable to index the resource of id '%s' : resource does not exist.", value), e); 1345 } 1346 } 1347 } 1348 1349 /** 1350 * Index a 'file' metadata 1351 * @param values The values. 1352 * @param document The solr document to index into 1353 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1354 * @param fieldName The index field name 1355 * @param language The content language. 1356 */ 1357 public void indexFileValue(Object[] values, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language) 1358 { 1359 String type = (String) values[0]; 1360 if (StringUtils.equalsIgnoreCase(org.ametys.plugins.repository.metadata.CompositeMetadata.MetadataType.BINARY.name(), type)) 1361 { 1362 indexFullTextBinaryValue((InputStream) values[1], document, contentDoc, fieldName, language); 1363 } 1364 else 1365 { 1366 indexResourceContent((Resource) values[1], document, contentDoc, language); 1367 } 1368 } 1369 1370 /** 1371 * Index a 'binary' metadata 1372 * @param metadata The parent composite metadata 1373 * @param metadataName The name of metadata to index 1374 * @param document The solr document to index into 1375 * @param contentDoc The content document. 1376 * @param fieldName The index field name 1377 * @param language The content language. 1378 * @param definition The metadata definition 1379 */ 1380 protected void indexFullTextBinary(CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, MetadataDefinition definition) 1381 { 1382 try (InputStream is = metadata.getBinaryMetadata(metadataName).getInputStream()) 1383 { 1384 indexFullTextBinaryValue(is, document, contentDoc, fieldName, language); 1385 } 1386 catch (IOException e) 1387 { 1388 throw new RuntimeException(e); 1389 } 1390 } 1391 1392 /** 1393 * Index a 'binary' value 1394 * @param is An InputStream on the binary data. 1395 * @param document The solr document to index into 1396 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1397 * @param fieldName The index field name 1398 * @param language The content language. 1399 */ 1400 protected void indexFullTextBinaryValue(InputStream is, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language) 1401 { 1402 try 1403 { 1404 String text = _tika.parseToString(is); 1405 1406 indexFulltextValue(document, contentDoc, text, language); 1407 } 1408 catch (Throwable e) 1409 { 1410 getLogger().warn(String.format("Failed to index binary field '%s'", fieldName), e); 1411 } 1412 } 1413 1414 /** 1415 * Index a 'content' metadata 1416 * @param metadata The parent composite metadata 1417 * @param metadataName The name of metadata to index 1418 * @param document The solr document to index into 1419 * @param fieldName The index field name 1420 * @param definition The metadata definition 1421 */ 1422 public void indexContentMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition) 1423 { 1424 String[] contentIds = metadata.getStringArray(metadataName, new String[0]); 1425 indexContentValues(contentIds, document, fieldName); 1426 } 1427 1428 /** 1429 * Index content values. 1430 * @param contentIds The ID of the contents to index. 1431 * @param document The solr document to index into. 1432 * @param fieldName the field name. 1433 */ 1434 public void indexContentValues (String[] contentIds, SolrInputDocument document, String fieldName) 1435 { 1436 for (String contentId : contentIds) 1437 { 1438 document.addField(fieldName + "_s", contentId); 1439 // Facets 1440 document.addField(fieldName + "_s_dv", contentId); 1441 } 1442 1443 String sortField = SolrFieldHelper.getMetadataSortFieldName(MetadataType.STRING, fieldName); 1444 if (contentIds.length > 0 && !document.containsKey(sortField)) 1445 { 1446 try 1447 { 1448 // TODO Est-ce qu'on peut faire autrement qu'un resolve ? 1449 Content content = _resolver.resolveById(contentIds[0]); 1450 CompositeMetadata metadataHolder = content.getMetadataHolder(); 1451 if (org.ametys.plugins.repository.metadata.CompositeMetadata.MetadataType.MULTILINGUAL_STRING.equals(metadataHolder.getType(DefaultContent.METADATA_TITLE))) 1452 { 1453 MultilingualString value = metadataHolder.getMultilingualString(DefaultContent.METADATA_TITLE); 1454 for (Locale locale : value.getLocales()) 1455 { 1456 String str = value.getValue(locale); 1457 document.addField(sortField + "_" + locale.getLanguage(), SolrFieldHelper.getSortValue(str)); 1458 } 1459 } 1460 else 1461 { 1462 document.addField(sortField, SolrFieldHelper.getSortValue(_contentHelper.getTitle(content))); 1463 } 1464 } 1465 catch (AmetysRepositoryException e) 1466 { 1467 // Do not index 1468 } 1469 } 1470 } 1471 1472 /** 1473 * Index a 'sub_content' metadata 1474 * @param metadata The parent composite metadata 1475 * @param metadataName The name of metadata to index 1476 * @param document The solr document to index into 1477 * @param fieldName The index field name 1478 * @param definition The metadata definition 1479 */ 1480 public void indexSubContentMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition) 1481 { 1482 TraversableAmetysObject objectCollection = metadata.getObjectCollection(metadataName); 1483 AmetysObjectIterable<Content> subcontents = objectCollection.getChildren(); 1484 for (Content subcontent : subcontents) 1485 { 1486 document.addField(fieldName + "_s", subcontent.getId()); 1487 // Facets 1488 document.addField(fieldName + "_s_dv", subcontent.getId()); 1489 } 1490 1491// String sortField = fieldName + "_s_sort"; 1492 String sortField = SolrFieldHelper.getMetadataSortFieldName(MetadataType.STRING, fieldName); 1493 subcontents = objectCollection.getChildren(); 1494 Iterator<Content> it = subcontents.iterator(); 1495 1496 if (it.hasNext() && !document.containsKey(sortField)) 1497 { 1498 Content subcontent = it.next(); 1499 CompositeMetadata metadataHolder = subcontent.getMetadataHolder(); 1500 if (org.ametys.plugins.repository.metadata.CompositeMetadata.MetadataType.MULTILINGUAL_STRING.equals(metadataHolder.getType(DefaultContent.METADATA_TITLE))) 1501 { 1502 MultilingualString value = metadataHolder.getMultilingualString(DefaultContent.METADATA_TITLE); 1503 for (Locale locale : value.getLocales()) 1504 { 1505 String str = value.getValue(locale); 1506 document.addField(sortField + "_" + locale.getLanguage(), SolrFieldHelper.getSortValue(str)); 1507 } 1508 } 1509 else 1510 { 1511 document.addField(sortField, SolrFieldHelper.getSortValue(_contentHelper.getTitle(subcontent))); 1512 } 1513 } 1514 } 1515 1516 /** 1517 * Index a 'geocode' metadata 1518 * @param metadata The parent composite metadata 1519 * @param metadataName The name of metadata to index 1520 * @param document The solr document to index into 1521 * @param fieldName The index field name 1522 * @param definition The metadata definition 1523 */ 1524 public void indexGeoCodeMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition) 1525 { 1526 CompositeMetadata geoCodeMetadata = metadata.getCompositeMetadata(metadataName); 1527 if (geoCodeMetadata.hasMetadata("longitude") && geoCodeMetadata.hasMetadata("latitude")) 1528 { 1529 double longitude = geoCodeMetadata.getDouble("longitude"); 1530 double latitude = geoCodeMetadata.getDouble("latitude"); 1531 1532 indexGeocodeValue(latitude, longitude, document, fieldName); 1533 } 1534 } 1535 1536 /** 1537 * Index a 'geocode' metadata 1538 * @param latitude the coord latitude. 1539 * @param longitude the coord longitude. 1540 * @param document The solr document to index into 1541 * @param fieldName The index field name 1542 */ 1543 public void indexGeocodeValue(double latitude, double longitude, SolrInputDocument document, String fieldName) 1544 { 1545 document.addField(fieldName + "$longitude_d", longitude); 1546 document.addField(fieldName + "$latitude_d", latitude); 1547 1548 String geoFieldName = SolrFieldHelper.getIndexingFieldName(MetadataType.GEOCODE, fieldName); 1549 document.addField(geoFieldName, longitude + " " + latitude); 1550 } 1551 1552 /** 1553 * Index a composite metadata, i.e. browse and index the sub-metadatas. 1554 * @param content The content being indexed. 1555 * @param metadata The parent metadata. 1556 * @param metadataName The composite metadata name. 1557 * @param document The solr document to index into. 1558 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1559 * @param fieldName The field name. 1560 * @param definition The composite metadata definition. 1561 * @param additionalDocuments The solr additional documents used for repeater instance 1562 */ 1563 public void indexCompositeMetadata(Content content, CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, MetadataDefinition definition, List<SolrInputDocument> additionalDocuments) 1564 { 1565 CompositeMetadata compositeMetadata = metadata.getCompositeMetadata(metadataName); 1566 1567 // Index recursively 1568 Set<String> subMetadataNames = definition.getMetadataNames(); 1569 for (String subMetadataName : subMetadataNames) 1570 { 1571 if (compositeMetadata.hasMetadata(subMetadataName)) 1572 { 1573 indexMetadata(content, subMetadataName, compositeMetadata, document, contentDoc, additionalDocuments, fieldName + ContentConstants.METADATA_PATH_SEPARATOR + subMetadataName, definition.getMetadataDefinition(subMetadataName)); 1574 } 1575 } 1576 } 1577 1578 /** 1579 * Index a repeater metadata, i.e. browse and index the entries. 1580 * @param content The content being indexed. 1581 * @param metadata The parent metadata. 1582 * @param metadataName The repeater metadata name. 1583 * @param document The solr document to index into. 1584 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1585 * @param fieldName The field name. 1586 * @param definition The repeater metadata definition. 1587 * @param additionalDocuments The solr additional documents used for repeater instance 1588 */ 1589 public void indexRepeaterMetadata(Content content, CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, MetadataDefinition definition, List<SolrInputDocument> additionalDocuments) 1590 { 1591 CompositeMetadata compositeMetadata = metadata.getCompositeMetadata(metadataName); 1592 1593 // Get and sort the entry names. 1594 String[] entries = compositeMetadata.getMetadataNames(); 1595 Arrays.sort(entries, MetadataManager.REPEATER_ENTRY_COMPARATOR); 1596 1597 for (int i = 0; i < entries.length; i++) 1598 { 1599 String entryName = entries[i]; 1600 int position = i + 1; 1601 1602 CompositeMetadata entry = compositeMetadata.getCompositeMetadata(entryName); 1603 1604 String repeaterID = document.getField("id").getFirstValue().toString() + "/" + fieldName + "/" + entryName; 1605 1606 // Creates a new Solr document for each entry 1607 SolrInputDocument repDocument = new SolrInputDocument(); 1608 repDocument.addField("id", repeaterID); 1609 document.addField(fieldName + "_s_dv", repeaterID); 1610 1611 repDocument.addField(DOCUMENT_TYPE, TYPE_REPEATER); 1612 repDocument.addField(REPEATER_ENTRY_POSITION, position); 1613 // Add the created document to additional documents 1614 additionalDocuments.add(repDocument); 1615 1616 SolrInputDocument parentContentDoc = contentDoc != null ? contentDoc : document; 1617 1618 Set<String> subMetadataNames = definition.getMetadataNames(); 1619 for (String subMetadataName : subMetadataNames) 1620 { 1621 if (entry.hasMetadata(subMetadataName)) 1622 { 1623 // Created document is now the main document 1624 indexMetadata(content, subMetadataName, entry, repDocument, parentContentDoc, additionalDocuments, subMetadataName, definition.getMetadataDefinition(subMetadataName)); 1625 } 1626 } 1627 } 1628 } 1629 1630 /** 1631 * Index the content of a resource. 1632 * @param resource The resource 1633 * @param document The solr document to index into 1634 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1635 * @param language The content language. 1636 */ 1637 protected void indexResourceContent(Resource resource, SolrInputDocument document, SolrInputDocument contentDoc, String language) 1638 { 1639 try (InputStream is = resource.getInputStream()) 1640 { 1641 indexResourceContentValue(is, resource.getDCSubject(), resource.getDCDescription(), language, document, contentDoc); 1642 1643 // TODO Declare and index DC metadata? 1644 // DC meta 1645// _resourceIndexer.indexDublinCoreMetadata(resource, document); 1646 } 1647 catch (Exception e) 1648 { 1649 getLogger().error("Unable to index resource at " + resource.getPath(), e); 1650 } 1651 } 1652 1653 /** 1654 * Index the content of a resource. 1655 * @param is An input stream on the resource content. 1656 * @param keywords The resource keywords. 1657 * @param description The resource description. 1658 * @param language The content language. 1659 * @param document The solr document to index into 1660 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1661 * @throws TikaException If an error occurs extracting the document's text content. 1662 * @throws IOException If an error occurs reading the document's text content. 1663 */ 1664 protected void indexResourceContentValue(InputStream is, String[] keywords, String description, String language, SolrInputDocument document, SolrInputDocument contentDoc) throws IOException, TikaException 1665 { 1666 String value = _tika.parseToString(is); 1667 1668 indexFulltextValue(document, contentDoc, value, language); 1669 1670 for (String keyword : keywords) 1671 { 1672 indexFulltextValue(document, contentDoc, keyword, language); 1673 } 1674 1675 if (description != null) 1676 { 1677 indexFulltextValue(document, contentDoc, description, language); 1678 } 1679 } 1680 1681 /** 1682 * Index a full-text value. 1683 * @param mainDocument The document being used, can be either the content document itself or a repeater document. 1684 * @param contentDoc The parent content document. If the mainDocument is the content document, this will be null. 1685 * @param text The text to index. 1686 * @param language The content language. 1687 */ 1688 protected void indexFulltextValue(SolrInputDocument mainDocument, SolrInputDocument contentDoc, String text, String language) 1689 { 1690 indexFulltextValue(mainDocument, text, language); 1691 1692 // The content doc is null if the main document is the content doc (to prevent indexing the data twice). 1693 if (contentDoc != null) 1694 { 1695 indexFulltextValue(contentDoc, text, language); 1696 } 1697 } 1698 1699 /** 1700 * Index a full-text value. 1701 * @param document The document to index into. 1702 * @param text The text to index. 1703 * @param language The content language. 1704 */ 1705 public static void indexFulltextValue(SolrInputDocument document, String text, String language) 1706 { 1707 if (StringUtils.isNotBlank(text)) 1708 { 1709 document.addField(FULL_GENERAL, text); 1710 document.addField(FULL_EXACT_WS, text); 1711 1712 if (StringUtils.isNotEmpty(language)) 1713 { 1714 indexLanguageFulltextValue(document, text, language); 1715 } 1716 } 1717 } 1718 1719 /** 1720 * Index a full-text value. 1721 * @param document The document to index into. 1722 * @param text The text to index. 1723 * @param languages The languages. 1724 */ 1725 public static void indexFulltextValue(SolrInputDocument document, String text, Collection<String> languages) 1726 { 1727 if (StringUtils.isNotBlank(text)) 1728 { 1729 document.addField(FULL_GENERAL, text); 1730 document.addField(FULL_EXACT_WS, text); 1731 1732 for (String language : languages) 1733 { 1734 indexLanguageFulltextValue(document, text, language); 1735 } 1736 } 1737 } 1738 1739 /** 1740 * Index a full-text value in the language-specific fields. 1741 * @param document The document to index into. 1742 * @param text The text to index. 1743 * @param language The content language. 1744 */ 1745 protected static void indexLanguageFulltextValue(SolrInputDocument document, String text, String language) 1746 { 1747 document.addField(FULL_PREFIX + language, text); 1748 document.addField(FULL_STEMMED_PREFIX + language, text); 1749 } 1750}