001/* 002 * Copyright 2015 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.cms.content.indexing.solr; 017 018import java.io.IOException; 019import java.io.InputStream; 020import java.util.Arrays; 021import java.util.Collection; 022import java.util.Date; 023import java.util.HashMap; 024import java.util.Iterator; 025import java.util.List; 026import java.util.Locale; 027import java.util.Map; 028import java.util.Map.Entry; 029import java.util.stream.Collectors; 030import java.util.Optional; 031import java.util.Set; 032 033import org.apache.avalon.framework.component.Component; 034import org.apache.avalon.framework.service.ServiceException; 035import org.apache.avalon.framework.service.ServiceManager; 036import org.apache.avalon.framework.service.Serviceable; 037import org.apache.commons.lang3.ArrayUtils; 038import org.apache.commons.lang3.StringUtils; 039import org.apache.excalibur.xml.sax.SAXParser; 040import org.apache.solr.common.SolrInputDocument; 041import org.apache.tika.Tika; 042import org.apache.tika.exception.TikaException; 043import org.xml.sax.InputSource; 044import org.xml.sax.SAXException; 045 046import org.ametys.cms.content.ContentHelper; 047import org.ametys.cms.content.RichTextHandler; 048import org.ametys.cms.content.references.OutgoingReferences; 049import org.ametys.cms.content.references.OutgoingReferencesExtractor; 050import org.ametys.cms.contenttype.ContentConstants; 051import org.ametys.cms.contenttype.ContentType; 052import org.ametys.cms.contenttype.ContentTypeExtensionPoint; 053import org.ametys.cms.contenttype.ContentTypesHelper; 054import org.ametys.cms.contenttype.MetadataDefinition; 055import org.ametys.cms.contenttype.MetadataManager; 056import org.ametys.cms.contenttype.MetadataType; 057import org.ametys.cms.contenttype.RepeaterDefinition; 058import org.ametys.cms.contenttype.indexing.CustomIndexingField; 059import org.ametys.cms.contenttype.indexing.CustomMetadataIndexingField; 060import org.ametys.cms.contenttype.indexing.IndexingField; 061import org.ametys.cms.contenttype.indexing.IndexingModel; 062import org.ametys.cms.contenttype.indexing.MetadataIndexingField; 063import org.ametys.cms.languages.Language; 064import org.ametys.cms.languages.LanguagesManager; 065import org.ametys.cms.repository.Content; 066import org.ametys.cms.repository.DefaultContent; 067import org.ametys.cms.search.model.SystemProperty; 068import org.ametys.cms.search.model.SystemPropertyExtensionPoint; 069import org.ametys.core.user.UserIdentity; 070import org.ametys.plugins.core.user.UserHelper; 071import org.ametys.plugins.explorer.resources.Resource; 072import org.ametys.plugins.explorer.resources.metadata.TikaProvider; 073import org.ametys.plugins.repository.AmetysObject; 074import org.ametys.plugins.repository.AmetysObjectIterable; 075import org.ametys.plugins.repository.AmetysObjectResolver; 076import org.ametys.plugins.repository.AmetysRepositoryException; 077import org.ametys.plugins.repository.TraversableAmetysObject; 078import org.ametys.plugins.repository.UnknownAmetysObjectException; 079import org.ametys.plugins.repository.metadata.BinaryMetadata; 080import org.ametys.plugins.repository.metadata.CompositeMetadata; 081import org.ametys.plugins.repository.metadata.MultilingualString; 082import org.ametys.plugins.repository.metadata.MultilingualStringHelper; 083import org.ametys.plugins.repository.metadata.RichText; 084import org.ametys.runtime.plugin.component.AbstractLogEnabled; 085 086/** 087 * Component for {@link Content} indexing into a Solr server. 088 */ 089public class SolrContentIndexer extends AbstractLogEnabled implements Component, Serviceable, SolrFieldNames 090{ 091 /** The component role. */ 092 public static final String ROLE = SolrContentIndexer.class.getName(); 093 094 /** The Ametys objet resolver */ 095 protected AmetysObjectResolver _resolver; 096 /** The content type extension point */ 097 protected ContentTypeExtensionPoint _cTypeEP; 098 /** The content type helper */ 099 protected ContentTypesHelper _cTypesHelper; 100 /** The users manager */ 101 protected UserHelper _userHelper; 102 /** The Tika instance */ 103 protected Tika _tika; 104 /** The resource indexer */ 105 protected SolrResourceIndexer _resourceIndexer; 106 /** The sax parser */ 107 protected SAXParser _parser; 108 /** The system property extension point. */ 109 protected SystemPropertyExtensionPoint _systemPropEP; 110 /** The content helper */ 111 protected ContentHelper _contentHelper; 112 /** The outgoing references extractor */ 113 protected OutgoingReferencesExtractor _outgoingReferencesExtractor; 114 /** The manager for languages */ 115 protected LanguagesManager _languagesManager; 116 117 @Override 118 public void service(ServiceManager manager) throws ServiceException 119 { 120 _resolver = (AmetysObjectResolver) manager.lookup(AmetysObjectResolver.ROLE); 121 _resourceIndexer = (SolrResourceIndexer) manager.lookup(SolrResourceIndexer.ROLE); 122 _cTypeEP = (ContentTypeExtensionPoint) manager.lookup(ContentTypeExtensionPoint.ROLE); 123 _contentHelper = (ContentHelper) manager.lookup(ContentHelper.ROLE); 124 _cTypesHelper = (ContentTypesHelper) manager.lookup(ContentTypesHelper.ROLE); 125 _userHelper = (UserHelper) manager.lookup(UserHelper.ROLE); 126 TikaProvider tikaProvider = (TikaProvider) manager.lookup(TikaProvider.ROLE); 127 _tika = tikaProvider.getTika(); 128 _parser = (SAXParser) manager.lookup(SAXParser.ROLE); 129 _systemPropEP = (SystemPropertyExtensionPoint) manager.lookup(SystemPropertyExtensionPoint.ROLE); 130 _outgoingReferencesExtractor = (OutgoingReferencesExtractor) manager.lookup(OutgoingReferencesExtractor.ROLE); 131 _languagesManager = (LanguagesManager) manager.lookup(LanguagesManager.ROLE); 132 } 133 134 /** 135 * Populate a solr input document by adding fields to index into it. 136 * @param content The content to index 137 * @param document The main solr document to index into 138 * @param additionalDocuments The additional documents for repeater instances 139 * @throws Exception if an error occurred while indexing 140 */ 141 public void indexContent(Content content, SolrInputDocument document, List<SolrInputDocument> additionalDocuments) throws Exception 142 { 143 // Properties specific to a stand-alone indexation. 144 String contentId = content.getId(); 145 document.addField(ID, contentId); 146 document.addField(DOCUMENT_TYPE, TYPE_CONTENT); 147 148 indexContentTitle(content, document); 149 150 document.addField(CONTENT_NAME, SolrIndexer.truncateUtf8StringValue(content.getName(), getLogger(), contentId, CONTENT_NAME)); 151 _indexOutgoingReferences(content, document); 152 153 document.addField(WORKFLOW_REF_DV, contentId + "#workflow"); 154 155 // Index content system properties. 156 indexSystemProperties(content, document); 157 158 // Index the fields specified in the indexation model. 159 indexModelFields(content, document, additionalDocuments); 160 } 161 162 private void _indexOutgoingReferences(Content content, SolrInputDocument document) 163 { 164 // Found by the extractor (resource references found in all metadata of the content) 165 _outgoingReferencesExtractor.getOutgoingReferences(content).values() // key is the metadata,we do not care what metadata it comes from 166 .parallelStream() 167 .map(OutgoingReferences::entrySet) 168 .flatMap(Set::parallelStream) 169 .filter(outgoingRefs -> outgoingRefs.getKey().equals("explorer")) // only references of the resource explorer 170 .map(Entry::getValue) 171 .flatMap(List::parallelStream) // flat the resource ids 172 .forEach(resourceId -> document.addField(CONTENT_OUTGOING_REFEERENCES_RESOURCE_IDS, resourceId)); 173 174 // Attachments of the content (just the root folder) 175 Optional.ofNullable(content.getRootAttachments()).map(AmetysObject::getId).ifPresent(id -> document.addField(CONTENT_OUTGOING_REFEERENCES_RESOURCE_IDS, id)); 176 } 177 178 /** 179 * Index the content title 180 * @param content The title 181 * @param document The main solr document to index into 182 */ 183 protected void indexContentTitle(Content content, SolrInputDocument document) 184 { 185 if (content.getMetadataHolder().getType("title") == org.ametys.plugins.repository.metadata.CompositeMetadata.MetadataType.MULTILINGUAL_STRING) 186 { 187 MultilingualString value = content.getMetadataHolder().getMultilingualString(DefaultContent.METADATA_TITLE); 188 indexMultilingualStringValues(value, content.getId(), document, null, TITLE); 189 } 190 else 191 { 192 document.addField(TITLE, SolrIndexer.truncateUtf8StringValue(_contentHelper.getTitle(content), getLogger(), content.getId(), TITLE)); 193 } 194 } 195 196 /** 197 * Index the system properties of a content. 198 * @param content The content to index. 199 * @param document The solr document to index into. 200 */ 201 protected void indexSystemProperties(Content content, SolrInputDocument document) 202 { 203 for (String sysPropId : _systemPropEP.getExtensionsIds()) 204 { 205 SystemProperty sysProp = _systemPropEP.getExtension(sysPropId); 206 207 sysProp.index(content, document); 208 } 209 } 210 211 /** 212 * Index the content type and all its supertypes in the given document (recursively). 213 * @param cTypeId The ID of the content type to index. 214 * @param document The solr document to index into. 215 * @param fieldName The field name. 216 */ 217 protected void indexAllContentTypes(String cTypeId, SolrInputDocument document, String fieldName) 218 { 219 document.addField(fieldName, cTypeId); 220 221 if (_cTypeEP.hasExtension(cTypeId)) 222 { 223 ContentType contentType = _cTypeEP.getExtension(cTypeId); 224 for (String supertypeId : contentType.getSupertypeIds()) 225 { 226 indexAllContentTypes(supertypeId, document, fieldName); 227 } 228 } 229 } 230 231 /** 232 * Index the fields specified in the indexation model. 233 * @param content The content to index. 234 * @param document The main content solr document. 235 * @param additionalDocuments The additional documents for repeater instances. 236 */ 237 protected void indexModelFields(Content content, SolrInputDocument document, List<SolrInputDocument> additionalDocuments) 238 { 239 IndexingModel indexingModel = null; 240 try 241 { 242 indexingModel = _cTypesHelper.getIndexingModel(content); 243 } 244 catch (RuntimeException e) 245 { 246 getLogger().error("indexContent > Error getting the indexing model of content " + content.getId(), e); 247 throw e; 248 } 249 250 for (IndexingField field : indexingModel.getFields()) 251 { 252 if (field instanceof CustomIndexingField) 253 { 254 Object[] values = ((CustomIndexingField) field).getValues(content); 255 indexValues(content, field.getName(), field.getType(), values, document, null); 256 } 257 else if (field instanceof MetadataIndexingField) 258 { 259 String metadataPath = ((MetadataIndexingField) field).getMetadataPath(); 260 String[] pathSegments = metadataPath.split(ContentConstants.METADATA_PATH_SEPARATOR); 261 262 MetadataDefinition definition = _cTypesHelper.getMetadataDefinition(pathSegments[0], content.getTypes(), content.getMixinTypes()); 263 if (definition != null) 264 { 265 findAndIndexMetadata(content, pathSegments, content.getMetadataHolder(), definition, field, field.getName(), document, null, additionalDocuments); 266 } 267 } 268 } 269 } 270 271 /** 272 * Populate a Solr input document by adding fields for a single system property. 273 * @param content The content to index 274 * @param propertyId The system property ID. 275 * @param document The solr document 276 * @return true if there are partial update to apply 277 * @throws Exception if an error occurred 278 */ 279 public boolean indexPartialSystemProperty(Content content, String propertyId, SolrInputDocument document) throws Exception 280 { 281 if (!_systemPropEP.hasExtension(propertyId)) 282 { 283 throw new IllegalStateException("The property '" + propertyId + "' can't be indexed as it does not exist."); 284 } 285 286 SolrInputDocument tempDocument = new SolrInputDocument(); 287 288 SystemProperty property = _systemPropEP.getExtension(propertyId); 289 property.index(content, tempDocument); 290 291 if (tempDocument.isEmpty()) 292 { 293 // Does not have any partial update to apply, avoid to erase all the existing fields on the Solr document corresponding to this content (it would be lost) 294 return false; 295 } 296 297 // Copy the indexed values as partial updates. 298 for (String fieldName : tempDocument.getFieldNames()) 299 { 300 Collection<Object> fieldValues = tempDocument.getFieldValues(fieldName); 301 302 Map<String, Object> partialUpdate = new HashMap<>(); 303 partialUpdate.put("set", fieldValues); 304 document.addField(fieldName, partialUpdate); 305 } 306 307 document.addField("id", content.getId()); 308 309 return true; 310 } 311 312 /** 313 * Find the metadata to index from its path 314 * @param content the content currently being traversed. 315 * @param pathSegments The segments of path of metadata to index 316 * @param metadata The parent composite metadata 317 * @param definition The metadata definition 318 * @param field the current indexing field. 319 * @param fieldName the name of the field to index. 320 * @param document The main solr document to index into 321 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 322 * @param additionalDocuments The additional documents 323 */ 324 protected void findAndIndexMetadata(Content content, String[] pathSegments, CompositeMetadata metadata, MetadataDefinition definition, IndexingField field, String fieldName, SolrInputDocument document, SolrInputDocument contentDoc, List<SolrInputDocument> additionalDocuments) 325 { 326 String currentFieldName = pathSegments[0]; 327 328 IndexingModel indexingModel = null; 329 try 330 { 331 indexingModel = _cTypesHelper.getIndexingModel(content); 332 } 333 catch (RuntimeException e) 334 { 335 if (content != null) 336 { 337 getLogger().error("findAndIndexMetadata > Error while indexing content " + content.getId() + " metadata", e); 338 } 339 else 340 { 341 getLogger().error("findAndIndexMetadata > Error while indexing null content metadata"); 342 } 343 throw e; 344 } 345 346 IndexingField refField = indexingModel.getField(currentFieldName); 347 if (refField != null && refField instanceof CustomMetadataIndexingField) 348 { 349 CustomMetadataIndexingField overridingField = (CustomMetadataIndexingField) refField; 350 findAndIndexOverridingField(content, indexingModel, overridingField, fieldName, definition, pathSegments, document, contentDoc, additionalDocuments); 351 } 352 else 353 { 354 if (metadata.hasMetadata(currentFieldName)) 355 { 356 findAndIndexMetadataField(content, pathSegments, metadata, definition, field, fieldName, document, contentDoc, additionalDocuments); 357 } 358 } 359 } 360 361 /** 362 * Find and index a metadata. 363 * @param content the current content being traversed. 364 * @param pathSegments the full metadata path segments. 365 * @param metadata the current metadata holder. 366 * @param definition the current metadata definition. 367 * @param field the current indexing field. 368 * @param fieldName the name of the field to index. 369 * @param document the solr main document. 370 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 371 * @param additionalDocuments the solr additional documents. 372 */ 373 protected void findAndIndexMetadataField(Content content, String[] pathSegments, CompositeMetadata metadata, MetadataDefinition definition, IndexingField field, String fieldName, SolrInputDocument document, SolrInputDocument contentDoc, List<SolrInputDocument> additionalDocuments) 374 { 375 String currentFieldName = pathSegments[0]; 376 377 if (pathSegments.length == 1) 378 { 379 indexMetadata(content, currentFieldName, metadata, document, contentDoc, additionalDocuments, fieldName, definition); 380 return; 381 } 382 383 String[] followingSegments = ArrayUtils.subarray(pathSegments, 1, pathSegments.length); 384 385 switch (definition.getType()) 386 { 387 case BINARY: 388 case BOOLEAN: 389 case STRING: 390 case MULTILINGUAL_STRING: 391 case USER: 392 case LONG: 393 case DOUBLE: 394 case DATE: 395 case DATETIME: 396 case REFERENCE: 397 case RICH_TEXT: 398 case FILE: 399 case GEOCODE: 400 getLogger().warn("The metadata '{}' of type {} can not be a part of a path to index : {}", currentFieldName, definition.getType().toString(), StringUtils.join(pathSegments, ContentConstants.METADATA_PATH_SEPARATOR)); 401 break; 402 case CONTENT: 403 String[] contentIds = metadata.getStringArray(currentFieldName, new String[0]); 404 for (String contentId : contentIds) 405 { 406 try 407 { 408 Content refContent = _resolver.resolveById(contentId); 409 MetadataDefinition remoteMetadataDef = _cTypesHelper.getMetadataDefinition(followingSegments[0], refContent.getTypes(), refContent.getMixinTypes()); 410 findAndIndexMetadata(refContent, followingSegments, refContent.getMetadataHolder(), remoteMetadataDef, field, fieldName, document, contentDoc, additionalDocuments); 411 } 412 catch (UnknownAmetysObjectException e) 413 { 414 // Nothing to index 415 } 416 } 417 break; 418 case SUB_CONTENT: 419 TraversableAmetysObject objectCollection = metadata.getObjectCollection(currentFieldName); 420 AmetysObjectIterable<Content> subcontents = objectCollection.getChildren(); 421 for (Content subcontent : subcontents) 422 { 423 MetadataDefinition remoteMetadataDef = _cTypesHelper.getMetadataDefinition(followingSegments[0], subcontent.getTypes(), subcontent.getMixinTypes()); 424 findAndIndexMetadata(subcontent, followingSegments, subcontent.getMetadataHolder(), remoteMetadataDef, field, fieldName, document, contentDoc, additionalDocuments); 425 } 426 break; 427 case COMPOSITE: 428 CompositeMetadata composite = metadata.getCompositeMetadata(currentFieldName); 429 430 if (definition instanceof RepeaterDefinition) 431 { 432 String[] entries = composite.getMetadataNames(); 433 for (String entry : entries) 434 { 435 findAndIndexMetadata(content, followingSegments, composite.getCompositeMetadata(entry), definition.getMetadataDefinition(followingSegments[0]), field, fieldName, document, contentDoc, additionalDocuments); 436 } 437 } 438 else 439 { 440 findAndIndexMetadata(content, followingSegments, composite, definition.getMetadataDefinition(followingSegments[0]), field, fieldName, document, contentDoc, additionalDocuments); 441 } 442 break; 443 default: 444 break; 445 446 } 447 } 448 449 /** 450 * Find and index a property represented by an overriding field. 451 * @param content the current content being traversed. 452 * @param indexingModel the current indexing model. 453 * @param pathSegments the full metadata path segments. 454 * @param definition the current metadata definition. 455 * @param field the current indexing field. 456 * @param fieldName the name of the field to index. 457 * @param document the solr main document. 458 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 459 * @param additionalDocuments the solr additional documents. 460 */ 461 protected void findAndIndexOverridingField(Content content, IndexingModel indexingModel, CustomMetadataIndexingField field, String fieldName, MetadataDefinition definition, String[] pathSegments, SolrInputDocument document, SolrInputDocument contentDoc, List<SolrInputDocument> additionalDocuments) 462 { 463 String currentFieldName = field.getName(); 464 465 if (pathSegments.length == 1) 466 { 467 indexOverridingField(field, content, fieldName, document, contentDoc, additionalDocuments); 468 return; 469 } 470 471 String[] followingSegments = ArrayUtils.subarray(pathSegments, 1, pathSegments.length); 472 MetadataType type = definition.getType(); 473 474 switch (type) 475 { 476 case BINARY: 477 case BOOLEAN: 478 case STRING: 479 case MULTILINGUAL_STRING: 480 case USER: 481 case LONG: 482 case DOUBLE: 483 case DATE: 484 case DATETIME: 485 case REFERENCE: 486 case RICH_TEXT: 487 case FILE: 488 case GEOCODE: 489 getLogger().warn("The field '{}' of type {} can not be a part of a path to index : {}", currentFieldName, type.toString(), StringUtils.join(pathSegments, ContentConstants.METADATA_PATH_SEPARATOR)); 490 break; 491 case COMPOSITE: 492 getLogger().warn("The type {} is invalid for the overriding field '{}'.", type.toString(), currentFieldName); 493 break; 494 case CONTENT: 495 case SUB_CONTENT: 496 String[] contentIds = (String[]) field.getValues(content); 497 for (String contentId : contentIds) 498 { 499 Content refContent = _resolver.resolveById(contentId); 500 MetadataDefinition remoteMetadataDef = _cTypesHelper.getMetadataDefinition(followingSegments[0], refContent.getTypes(), refContent.getMixinTypes()); 501 findAndIndexMetadata(refContent, followingSegments, refContent.getMetadataHolder(), remoteMetadataDef, field, fieldName, document, contentDoc, additionalDocuments); 502 } 503 break; 504 default: 505 break; 506 } 507 } 508 509 /** 510 * Index a content metadata. 511 * @param content the current content being traversed. 512 * @param metadataName The name of metadata to index 513 * @param metadata The parent composite metadata 514 * @param document the solr document to index into. 515 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 516 * @param additionalDocuments The solr additional documents used for repeater instance 517 * @param fieldName the name of the indexed field. 518 * @param definition the metadata definition. 519 */ 520 public void indexMetadata(Content content, String metadataName, CompositeMetadata metadata, SolrInputDocument document, SolrInputDocument contentDoc, List<SolrInputDocument> additionalDocuments, String fieldName, MetadataDefinition definition) 521 { 522 String language = content.getLanguage(); 523 524 switch (definition.getType()) 525 { 526 case STRING: 527 indexStringMetadata(metadata, metadataName, content.getId(), document, contentDoc, fieldName, language, definition); 528 break; 529 case MULTILINGUAL_STRING: 530 indexMultilingualStringMetadata(metadata, metadataName, content.getId(), document, contentDoc, fieldName, definition); 531 break; 532 case USER: 533 indexUserMetadata(metadata, metadataName, document, contentDoc, fieldName, language, definition); 534 break; 535 case GEOCODE: 536 indexGeoCodeMetadata(metadata, metadataName, document, fieldName, definition); 537 break; 538 case BINARY: 539 indexBinaryMetadata(metadata, metadataName, document, contentDoc, fieldName, language, definition); 540 break; 541 case FILE: 542 indexFileMetadata(metadata, metadataName, document, contentDoc, fieldName, language, definition); 543 break; 544 case DATE: 545 indexDateMetadata(metadata, metadataName, document, fieldName, definition); 546 break; 547 case DATETIME: 548 indexDateTimeMetadata(metadata, metadataName, document, fieldName, definition); 549 break; 550 case CONTENT: 551 indexContentMetadata(metadata, metadataName, document, fieldName, definition); 552 break; 553 case SUB_CONTENT: 554 indexSubContentMetadata(metadata, metadataName, document, fieldName, definition); 555 break; 556 case LONG: 557 indexLongMetadata(metadata, metadataName, document, fieldName, definition); 558 break; 559 case DOUBLE: 560 indexDoubleMetadata(metadata, metadataName, document, fieldName, definition); 561 break; 562 case BOOLEAN: 563 indexBooleanMetadata(metadata, metadataName, document, fieldName, definition); 564 break; 565 case RICH_TEXT: 566 indexRichtextMetadata(metadata, metadataName, document, contentDoc, fieldName, language, definition); 567 break; 568 case COMPOSITE: 569 if (definition instanceof RepeaterDefinition) 570 { 571 indexRepeaterMetadata(content, metadata, metadataName, document, contentDoc, fieldName, definition, additionalDocuments); 572 } 573 else 574 { 575 indexCompositeMetadata(content, metadata, metadataName, document, contentDoc, fieldName, definition, additionalDocuments); 576 } 577 break; 578 case REFERENCE: 579 // TODO reference -> to be indexed? https://issues.ametys.org/browse/CMS-8623 580 break; 581 default: 582 break; 583 } 584 } 585 586 /** 587 * Index a property represented by an overriding field. 588 * @param field The overriding field. 589 * @param content The content of which to get the property. 590 * @param fieldName The name of the field to index. 591 * @param document the solr document to index into. 592 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 593 * @param additionalDocuments The solr additional documents used for repeater instance 594 */ 595 public void indexOverridingField(CustomMetadataIndexingField field, Content content, String fieldName, SolrInputDocument document, SolrInputDocument contentDoc, List<SolrInputDocument> additionalDocuments) 596 { 597 Object[] values = field.getValues(content); 598 MetadataDefinition definition = field.getMetadataDefinition(); 599 boolean isFacetable = definition.getEnumerator() != null; 600 String language = content.getLanguage(); 601 602 switch (definition.getType()) 603 { 604 case STRING: 605 indexStringValues((String[]) values, content.getId(), document, contentDoc, fieldName, language, isFacetable); 606 break; 607 case MULTILINGUAL_STRING: 608 indexMultilingualStringValues((MultilingualString) values[0], content.getId(), document, contentDoc, fieldName); 609 break; 610 case USER: 611 UserIdentity[] users = new UserIdentity[values.length]; 612 for (int i = 0; i < values.length; i++) 613 { 614 users[i] = UserIdentity.stringToUserIdentity((String) values[i]); 615 } 616 indexUserValues(users, document, contentDoc, fieldName, language); 617 break; 618 case GEOCODE: 619 if (values.length > 1) 620 { 621 indexGeocodeValue((double) values[0], (double) values[1], document, fieldName); 622 } 623 break; 624 case BINARY: 625 if (values.length > 0) 626 { 627 indexFullTextBinaryValue((InputStream) values[0], document, contentDoc, fieldName, language); 628 } 629 break; 630 case FILE: 631 indexFileValue(values, document, contentDoc, fieldName, language); 632 break; 633 case DATE: 634 indexDateValues((Date[]) values, document, fieldName); 635 break; 636 case DATETIME: 637 indexDateTimeValues((Date[]) values, document, fieldName); 638 break; 639 case CONTENT: 640 indexContentValues((String[]) values, document, fieldName); 641 break; 642 case SUB_CONTENT: 643 indexContentValues((String[]) values, document, fieldName); 644 break; 645 case LONG: 646 indexLongValues((Long[]) values, document, fieldName, isFacetable); 647 break; 648 case DOUBLE: 649 indexDoubleValues((Double[]) values, document, fieldName, isFacetable); 650 break; 651 case BOOLEAN: 652 indexBooleanValues((Boolean[]) values, document, fieldName); 653 break; 654 case RICH_TEXT: 655 if (values.length > 0) 656 { 657 indexRichtextValue((RichText) values[0], document, contentDoc, fieldName, language); 658 } 659 break; 660 case COMPOSITE: 661 break; 662 case REFERENCE: 663 // TODO reference -> to be indexed? https://issues.ametys.org/browse/CMS-8623 664 break; 665 default: 666 break; 667 } 668 } 669 670 /** 671 * Index values 672 * @param content The content being indexed. 673 * @param fieldName The Solr field's name 674 * @param type the type of values to index 675 * @param values the values 676 * @param document the Solr document 677 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 678 */ 679 public void indexValues(Content content, String fieldName, MetadataType type, Object[] values, SolrInputDocument document, SolrInputDocument contentDoc) 680 { 681 String language = content.getLanguage(); 682 683 switch (type) 684 { 685 case STRING: 686 indexStringValues((String[]) values, content.getId(), document, contentDoc, fieldName, language, false); 687 break; 688 case MULTILINGUAL_STRING: 689 if (values.length > 0) 690 { 691 indexMultilingualStringValues((MultilingualString) values[0], content.getId(), document, contentDoc, fieldName); 692 } 693 break; 694 case LONG: 695 indexLongValues((Long[]) values, document, fieldName, false); 696 break; 697 case DOUBLE: 698 indexDoubleValues((Double[]) values, document, fieldName, false); 699 break; 700 case DATE: 701 indexDateValues((Date[]) values, document, fieldName); 702 break; 703 case DATETIME: 704 indexDateTimeValues((Date[]) values, document, fieldName); 705 break; 706 case CONTENT: 707 indexContentValues((String[]) values, document, fieldName); 708 break; 709 case BOOLEAN: 710 indexBooleanValues((Boolean[]) values, document, fieldName); 711 break; 712 case USER: 713 UserIdentity[] users = new UserIdentity[values.length]; 714 for (int i = 0; i < values.length; i++) 715 { 716 users[i] = UserIdentity.stringToUserIdentity((String) values[i]); 717 } 718 indexUserValues(users, document, contentDoc, fieldName, language); 719 break; 720 case RICH_TEXT: 721 indexRichtextValue((RichText) values[0], document, contentDoc, fieldName, language); 722 break; 723 case BINARY: 724 case FILE: 725 case COMPOSITE: 726 case REFERENCE: 727 case SUB_CONTENT: 728 case GEOCODE: 729 getLogger().warn("Only primitive type is allowed on a custom indexing field"); 730 break; 731 default: 732 break; 733 } 734 } 735 736 737 /** 738 * Index a 'string' metadata 739 * @param metadata The parent composite metadata 740 * @param metadataName The name of metadata to index 741 * @param contentId The content id. For logging purposes 742 * @param document The solr document to index into 743 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 744 * @param fieldName The index field name 745 * @param language The content language. 746 * @param definition The metadata definition 747 */ 748 public void indexStringMetadata(CompositeMetadata metadata, String metadataName, String contentId, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, MetadataDefinition definition) 749 { 750 String[] strValues = metadata.getStringArray(metadataName, new String[0]); 751 indexStringValues(strValues, contentId, document, contentDoc, fieldName, language, definition.getEnumerator() != null); 752 } 753 754 /** 755 * Index a multilingual string metadata 756 * @param metadata The parent composite metadata 757 * @param metadataName The name of metadata to index 758 * @param contentId The content id. For logging purposes 759 * @param document The solr document to index into 760 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 761 * @param fieldName The index field name 762 * @param definition The metadata definition 763 */ 764 public void indexMultilingualStringMetadata(CompositeMetadata metadata, String metadataName, String contentId, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, MetadataDefinition definition) 765 { 766 MultilingualString multilingualString = metadata.getMultilingualString(metadataName); 767 indexMultilingualStringValues(multilingualString, contentId, document, contentDoc, fieldName); 768 } 769 770 /** 771 * Index a multilingual string values 772 * @param value The multilingual string 773 * @param contentId The content id. For logging purposes 774 * @param document The solr document to index into 775 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 776 * @param fieldName The index field name 777 */ 778 public void indexMultilingualStringValues(MultilingualString value, String contentId, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName) 779 { 780 Set<Locale> metaLocales = value.getLocales(); 781 List<String> appLanguages = _languagesManager.getAvailableLanguages() 782 .values() 783 .stream() 784 .map(Language::getCode) 785 .collect(Collectors.toList()); 786 for (String appLanguageCode : appLanguages) 787 { 788 Locale appLocale = new Locale(appLanguageCode); 789 if (metaLocales.contains(appLocale)) 790 { 791 String str = value.getValue(appLocale); 792 indexMultilingualStringValues(new String[] {str}, contentId, document, contentDoc, fieldName, appLocale.getLanguage()); 793 } 794 795 // Need to index sort field for every language of application, even if metadata does not have value for the given language 796 String sortValue = MultilingualStringHelper.getValue(value, appLocale); 797 indexMultilingualStringValuesForSorting(sortValue, document, fieldName, appLanguageCode); 798 } 799 } 800 801 /** 802 * Index multilingual 'string' values 803 * @param values The values 804 * @param contentId The content id. For logging purposes 805 * @param document The solr document to index into 806 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 807 * @param fieldName The index field name 808 * @param language The language for values. 809 */ 810 public void indexMultilingualStringValues(String[] values, String contentId, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language) 811 { 812 for (String value : values) 813 { 814 document.addField(fieldName + "_txt_" + language, value); 815 document.addField(fieldName + "_txt_stemmed_" + language, value); 816 document.addField(fieldName + "_txt_ws_" + language, value); 817 818 // Index without analyzing. 819 String possiblyTruncatedValue = SolrIndexer.truncateUtf8StringValue(value, getLogger(), contentId, fieldName); 820 document.addField(fieldName + "_" + language + "_s", possiblyTruncatedValue); 821 822 // Index without analyzing but lower-case (for wildcard queries). 823 document.addField(fieldName + "_" + language + "_s_lower", possiblyTruncatedValue.toLowerCase()); 824 825 // Exact words tokenized by whitespace. 826 document.addField(fieldName + "_" + language + "_s_ws", value.toLowerCase()); 827 828 // Index with analyze (full-text search). 829 document.addField(fieldName + "_" + language + "_txt", value); 830 831 indexFulltextValue(document, contentDoc, value, language); 832 } 833 } 834 835 /** 836 * Index multilingual 'string' value in sort field 837 * @param value The value 838 * @param document The solr document to index into 839 * @param fieldName The index field name 840 * @param language The language 841 */ 842 public void indexMultilingualStringValuesForSorting(String value, SolrInputDocument document, String fieldName, String language) 843 { 844 String sortField = fieldName + "_" + language + SolrFieldHelper.getSortFieldSuffix(MetadataType.MULTILINGUAL_STRING); 845 if (StringUtils.isNotEmpty(value) && !document.containsKey(sortField)) 846 { 847 document.addField(sortField, SolrFieldHelper.getSortValue(value)); 848 } 849 } 850 851 /** 852 * Index 'string' values 853 * @param values The values 854 * @param contentId The content id. For logging purposes 855 * @param document The solr document to index into 856 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 857 * @param fieldName The index field name 858 * @param language The content language. 859 * @param isFacetable true if the field can be used as a facet. 860 */ 861 public void indexStringValues(String[] values, String contentId, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, boolean isFacetable) 862 { 863 for (String value : values) 864 { 865 if (!isFacetable) 866 { 867 if (language != null) // Language can be null for multilingual content 868 { 869 // No enumerator: index as full-text. 870 document.addField(fieldName + "_txt_" + language, value); 871 document.addField(fieldName + "_txt_stemmed_" + language, value); 872 document.addField(fieldName + "_txt_ws_" + language, value); 873 } 874 } 875 else 876 { 877 // Facets (enumeration only) 878 document.addField(fieldName + "_s_dv", value); 879 } 880 881 // Index without analyzing. 882 String possiblyTruncatedValue = SolrIndexer.truncateUtf8StringValue(value, getLogger(), contentId, fieldName); 883 document.addField(fieldName + "_s", possiblyTruncatedValue); 884 885 // Index without analyzing but lower-case (for wildcard queries). 886 document.addField(fieldName + "_s_lower", possiblyTruncatedValue.toLowerCase()); 887 888 // Exact words tokenized by whitespace. 889 document.addField(fieldName + "_s_ws", value.toLowerCase()); 890 891 // Index with analyze (full-text search). 892 document.addField(fieldName + "_txt", value); 893 894 indexFulltextValue(document, contentDoc, value, language); 895 } 896 897 String sortField = SolrFieldHelper.getMetadataSortFieldName(MetadataType.STRING, fieldName); 898 if (values.length > 0 && !document.containsKey(sortField)) 899 { 900 // FIXME Si la meta est enumerée, indexer le label ? dans quelle langue ? 901 document.addField(sortField, SolrFieldHelper.getSortValue(values[0])); 902 } 903 } 904 905 /** 906 * Index a 'date' metadata 907 * @param metadata The parent composite metadata 908 * @param metadataName The name of metadata to index 909 * @param document The solr document to index into 910 * @param fieldName The index field name 911 * @param definition The metadata definition 912 */ 913 public void indexDateMetadata (CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition) 914 { 915 Date[] dateValues = metadata.getDateArray(metadataName, new Date[0]); 916 indexDateValues (dateValues, document, fieldName); 917 } 918 919 /** 920 * Index 'date' values 921 * @param values The values 922 * @param document The solr document to index into 923 * @param fieldName The index field name 924 */ 925 public void indexDateValues (Date[] values, SolrInputDocument document, String fieldName) 926 { 927 for (Date value : values) 928 { 929 document.addField(fieldName + "_dt", SolrIndexer.dateFormat().format(value)); 930 } 931 932 String sortField = fieldName + "_dt_sort"; 933 if (values.length > 0 && !document.containsKey(sortField)) 934 { 935 document.addField(sortField, SolrIndexer.dateFormat().format(values[0])); 936 } 937 } 938 939 /** 940 * Index a 'datetime' metadata 941 * @param metadata The parent composite metadata 942 * @param metadataName The name of metadata to index 943 * @param document The solr document to index into 944 * @param fieldName The index field name 945 * @param definition The metadata definition 946 */ 947 public void indexDateTimeMetadata (CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition) 948 { 949 Date[] dateValues = metadata.getDateArray(metadataName, new Date[0]); 950 indexDateTimeValues(dateValues, document, fieldName); 951 } 952 953 /** 954 * Index 'datetime' values 955 * @param values The values 956 * @param document The solr document to index into 957 * @param fieldName The index field name 958 */ 959 public void indexDateTimeValues (Date[] values, SolrInputDocument document, String fieldName) 960 { 961 for (Date value : values) 962 { 963 document.addField(fieldName + "_dt", SolrIndexer.dateFormat().format(value)); 964 } 965 966 String sortField = fieldName + "_dt_sort"; 967 if (values.length > 0 && !document.containsKey(sortField)) 968 { 969 document.addField(sortField, SolrIndexer.dateFormat().format(values[0])); 970 } 971 } 972 973 /** 974 * Index a 'double' metadata 975 * @param metadata The parent composite metadata 976 * @param metadataName The name of metadata to index 977 * @param document The solr document to index into 978 * @param fieldName The index field name 979 * @param definition The metadata definition 980 */ 981 public void indexDoubleMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition) 982 { 983 boolean isFacetable = definition.getEnumerator() != null; 984 double[] values = metadata.getDoubleArray(metadataName, new double[0]); 985 indexDoubleValues (ArrayUtils.toObject(values), document, fieldName, isFacetable); 986 } 987 988 /** 989 * Index 'double' values 990 * @param values The values 991 * @param document The solr document to index into 992 * @param fieldName The index field name 993 * @param isFacetable true if the field can be used as a facet. 994 */ 995 public void indexDoubleValues(Double[] values, SolrInputDocument document, String fieldName, boolean isFacetable) 996 { 997 for (Double value : values) 998 { 999 document.addField(fieldName + "_d", value); 1000 if (isFacetable) 1001 { 1002 document.addField(fieldName + "_d_dv", value); 1003 } 1004 } 1005 1006 String sortField = fieldName + "_d_sort"; 1007 if (values.length > 0 && !document.containsKey(sortField)) 1008 { 1009 document.addField(sortField, values[0]); 1010 } 1011 } 1012 1013 /** 1014 * Index a 'long' metadata 1015 * @param metadata The parent composite metadata 1016 * @param metadataName The name of metadata to index 1017 * @param document The solr document to index into 1018 * @param fieldName The index field name 1019 * @param definition The metadata definition 1020 */ 1021 public void indexLongMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition) 1022 { 1023 boolean isFacetable = definition.getEnumerator() != null; 1024 long[] values = metadata.getLongArray(metadataName, new long[0]); 1025 indexLongValues(ArrayUtils.toObject(values), document, fieldName, isFacetable); 1026 } 1027 1028 /** 1029 * Index 'long' values 1030 * @param values The values 1031 * @param document The solr document to index into 1032 * @param fieldName The index field name 1033 * @param isFacetable true if the field can be used as a facet. 1034 */ 1035 public void indexLongValues(Long[] values, SolrInputDocument document, String fieldName, boolean isFacetable) 1036 { 1037 for (Long value : values) 1038 { 1039 document.addField(fieldName + "_l", value); 1040 if (isFacetable) 1041 { 1042 document.addField(fieldName + "_l_dv", value); 1043 } 1044 } 1045 1046 String sortField = fieldName + "_l_sort"; 1047 if (values.length > 0 && !document.containsKey(sortField)) 1048 { 1049 document.addField(sortField, values[0]); 1050 } 1051 } 1052 1053 /** 1054 * Index a 'user' metadata 1055 * @param metadata The parent composite metadata 1056 * @param metadataName The name of metadata to index 1057 * @param document The solr document to index into 1058 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1059 * @param fieldName The index field name 1060 * @param language The content language. 1061 * @param definition The metadata definition 1062 */ 1063 public void indexUserMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, MetadataDefinition definition) 1064 { 1065 UserIdentity[] users = metadata.getUserArray(metadataName); 1066 indexUserValues(users, document, contentDoc, fieldName, language); 1067 } 1068 1069 /** 1070 * Index 'user' values 1071 * @param users The users 1072 * @param document The solr document to index into 1073 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1074 * @param fieldName The index field name 1075 * @param language The content language. 1076 */ 1077 public void indexUserValues(UserIdentity[] users, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language) 1078 { 1079 int count = 0; 1080 for (UserIdentity userIdentity : users) 1081 { 1082 String fullName = _userHelper.getUserFullName(userIdentity); 1083 String sortableName = _userHelper.getUserSortableName(userIdentity); 1084 String identityAsString = UserIdentity.userIdentityToString(userIdentity); 1085 1086 indexFulltextValue(document, contentDoc, identityAsString, language); 1087 1088 // Facets 1089 document.addField(fieldName + "_s_dv", identityAsString); 1090 1091 // Dynamic fields 1092 document.addField(fieldName + "_s", identityAsString); 1093 1094 if (StringUtils.isNotEmpty(fullName)) 1095 { 1096 document.addField(fieldName + "_txt", fullName); 1097 1098 indexFulltextValue(document, contentDoc, fullName, language); 1099 } 1100 1101 String sortField = SolrFieldHelper.getMetadataSortFieldName(MetadataType.STRING, fieldName); 1102 if (count == 0 && StringUtils.isNotEmpty(sortableName) && !document.containsKey(sortField)) 1103 { 1104 // Index only first user for sorting 1105 document.addField(sortField, SolrFieldHelper.getSortValue(sortableName)); 1106 } 1107 count++; 1108 } 1109 } 1110 1111 /** 1112 * Index a 'boolean' metadata 1113 * @param metadata The parent composite metadata 1114 * @param metadataName The name of metadata to index 1115 * @param document The solr document to index into 1116 * @param fieldName The index field name 1117 * @param definition The metadata definition 1118 */ 1119 public void indexBooleanMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition) 1120 { 1121 boolean[] values = metadata.getBooleanArray(metadataName, new boolean[0]); 1122 indexBooleanValues(ArrayUtils.toObject(values), document, fieldName); 1123 } 1124 1125 /** 1126 * Index 'boolean' values 1127 * @param values The values 1128 * @param document The solr document to index into 1129 * @param fieldName The index field name 1130 */ 1131 public void indexBooleanValues(Boolean[] values, SolrInputDocument document, String fieldName) 1132 { 1133 for (Boolean value : values) 1134 { 1135 document.addField(fieldName + "_b", value); 1136 document.addField(fieldName + "_b_dv", value); 1137 } 1138 1139 String sortField = fieldName + "_b_sort"; 1140 if (values.length > 0 && !document.containsKey(sortField)) 1141 { 1142 document.addField(sortField, values[0]); 1143 } 1144 } 1145 1146 /** 1147 * Index a 'richtext' metadata 1148 * @param metadata The parent composite metadata 1149 * @param metadataName The name of metadata to index 1150 * @param document The solr document to index into 1151 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1152 * @param fieldName The index field name 1153 * @param language The content language. 1154 * @param definition The metadata definition 1155 */ 1156 public void indexRichtextMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, MetadataDefinition definition) 1157 { 1158 indexRichtextValue(metadata.getRichText(metadataName), document, contentDoc, fieldName, language); 1159 } 1160 1161 /** 1162 * Index 'richtext' values 1163 * @param richText The rich text to index. 1164 * @param document The solr document to index into 1165 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1166 * @param fieldName The index field name. 1167 * @param language The content language. 1168 */ 1169 public void indexRichtextValue(RichText richText, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language) 1170 { 1171 try (InputStream is = richText.getInputStream()) 1172 { 1173 String value = _richTextToString(is); 1174 1175 if (language != null) // language can be null for multilingual content 1176 { 1177 // Index as a text field. 1178 document.addField(fieldName + "_txt_" + language, value); 1179 document.addField(fieldName + "_txt_stemmed_" + language, value); 1180 document.addField(fieldName + "_txt_ws_" + language, value); 1181 } 1182 1183 // Index in the full-text value. 1184 SolrContentIndexer.indexFulltextValue(document, value, language); 1185 1186 if (contentDoc != null) 1187 { 1188 SolrContentIndexer.indexFulltextValue(contentDoc, value, language); 1189 } 1190 } 1191 catch (Exception e) 1192 { 1193 getLogger().warn("Failed to index RICH_TEXT '" + fieldName + "'", e); 1194 } 1195 } 1196 1197 /** 1198 * Gets a XML as a string and extract the text only 1199 * @param is The inputstream of XML 1200 * @return The text or null if the XML is not well formed 1201 */ 1202 protected String _richTextToString(InputStream is) 1203 { 1204 try 1205 { 1206 RichTextHandler txtHandler = new RichTextHandler(); 1207 _parser.parse(new InputSource(is), txtHandler); 1208 return txtHandler.getValue().trim(); 1209 } 1210 catch (IOException | SAXException e) 1211 { 1212 getLogger().error("Cannot parse inputstream", e); 1213 return null; 1214 } 1215 } 1216 1217 1218 1219 /** 1220 * Index a 'binary' metadata 1221 * @param metadata The parent composite metadata 1222 * @param metadataName The name of metadata to index 1223 * @param document The solr document to index into 1224 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1225 * @param fieldName The index field name 1226 * @param language The content language. 1227 * @param definition The metadata definition 1228 */ 1229 public void indexBinaryMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, MetadataDefinition definition) 1230 { 1231 // Index file name. 1232 BinaryMetadata binary = metadata.getBinaryMetadata(metadataName); 1233 document.addField(fieldName + "_txt", binary.getFilename()); 1234 1235 // Index the contents. 1236 indexFullTextBinary(metadata, metadataName, document, contentDoc, fieldName, language, definition); 1237 } 1238 1239 /** 1240 * Index a 'file' metadata 1241 * @param metadata The parent composite metadata 1242 * @param metadataName The name of metadata to index 1243 * @param document The solr document to index into 1244 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1245 * @param fieldName The index field name 1246 * @param language The content language. 1247 * @param definition The metadata definition 1248 */ 1249 public void indexFileMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, MetadataDefinition definition) 1250 { 1251 if (org.ametys.plugins.repository.metadata.CompositeMetadata.MetadataType.BINARY.equals(metadata.getType(metadataName))) 1252 { 1253 indexBinaryMetadata(metadata, metadataName, document, contentDoc, fieldName, language, definition); 1254 } 1255 else 1256 { 1257 // Resource from the explorer. 1258 String value = metadata.getString(metadataName); 1259 1260 try 1261 { 1262 Resource resource = (Resource) _resolver.resolveById(value); 1263 1264 // Index file name. 1265 document.addField(fieldName + "_txt", resource.getName()); 1266 1267 // Index the contents. 1268 indexResourceContent(resource, document, contentDoc, language); 1269 1270// document.addField(prefix + fieldName + "$path", resource.getId()); 1271// document.addField(prefix + fieldName + "$type", "explorer"); 1272// document.addField(prefix + fieldName + "$mime-type", resource.getMimeType()); 1273// document.addField(prefix + fieldName + "$filename", filename); 1274// document.addField(prefix + fieldName + "$lastModified", resource.getLastModified()); 1275// document.addField(prefix + fieldName + "$size", resource.getLength()); 1276// 1277// String viewUrl = "/plugins/explorer/resource?id=" + resource.getId(); 1278// document.addField(prefix + fieldName + "$viewUrl", viewUrl); 1279// document.addField(prefix + fieldName + "$downloadUrl", viewUrl + "&download=true"); 1280 } 1281 catch (AmetysRepositoryException e) 1282 { 1283 getLogger().warn(String.format("Unable to index the resource of id '%s' : resource does not exist.", value), e); 1284 } 1285 } 1286 } 1287 1288 /** 1289 * Index a 'file' metadata 1290 * @param values The values. 1291 * @param document The solr document to index into 1292 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1293 * @param fieldName The index field name 1294 * @param language The content language. 1295 */ 1296 public void indexFileValue(Object[] values, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language) 1297 { 1298 String type = (String) values[0]; 1299 if (org.ametys.plugins.repository.metadata.CompositeMetadata.MetadataType.BINARY.equals(type)) 1300 { 1301 indexFullTextBinaryValue((InputStream) values[1], document, contentDoc, fieldName, language); 1302 } 1303 else 1304 { 1305 indexResourceContent((Resource) values[1], document, contentDoc, language); 1306 } 1307 } 1308 1309 /** 1310 * Index a 'binary' metadata 1311 * @param metadata The parent composite metadata 1312 * @param metadataName The name of metadata to index 1313 * @param document The solr document to index into 1314 * @param contentDoc The content document. 1315 * @param fieldName The index field name 1316 * @param language The content language. 1317 * @param definition The metadata definition 1318 */ 1319 protected void indexFullTextBinary(CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, MetadataDefinition definition) 1320 { 1321 try (InputStream is = metadata.getBinaryMetadata(metadataName).getInputStream()) 1322 { 1323 indexFullTextBinaryValue(is, document, contentDoc, fieldName, language); 1324 } 1325 catch (IOException e) 1326 { 1327 throw new RuntimeException(e); 1328 } 1329 } 1330 1331 /** 1332 * Index a 'binary' value 1333 * @param is An InputStream on the binary data. 1334 * @param document The solr document to index into 1335 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1336 * @param fieldName The index field name 1337 * @param language The content language. 1338 */ 1339 protected void indexFullTextBinaryValue(InputStream is, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language) 1340 { 1341 try 1342 { 1343 String text = _tika.parseToString(is); 1344 1345 indexFulltextValue(document, contentDoc, text, language); 1346 } 1347 catch (Throwable e) 1348 { 1349 getLogger().warn(String.format("Failed to index binary field '%s'", fieldName), e); 1350 } 1351 } 1352 1353 /** 1354 * Index a 'content' metadata 1355 * @param metadata The parent composite metadata 1356 * @param metadataName The name of metadata to index 1357 * @param document The solr document to index into 1358 * @param fieldName The index field name 1359 * @param definition The metadata definition 1360 */ 1361 public void indexContentMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition) 1362 { 1363 String[] contentIds = metadata.getStringArray(metadataName, new String[0]); 1364 indexContentValues(contentIds, document, fieldName); 1365 } 1366 1367 /** 1368 * Index content values. 1369 * @param contentIds The ID of the contents to index. 1370 * @param document The solr document to index into. 1371 * @param fieldName the field name. 1372 */ 1373 public void indexContentValues (String[] contentIds, SolrInputDocument document, String fieldName) 1374 { 1375 for (String contentId : contentIds) 1376 { 1377 document.addField(fieldName + "_s", contentId); 1378 // Facets 1379 document.addField(fieldName + "_s_dv", contentId); 1380 } 1381 1382 String sortField = SolrFieldHelper.getMetadataSortFieldName(MetadataType.STRING, fieldName); 1383 if (contentIds.length > 0 && !document.containsKey(sortField)) 1384 { 1385 try 1386 { 1387 // TODO Est-ce qu'on peut faire autrement qu'un resolve ? 1388 Content content = _resolver.resolveById(contentIds[0]); 1389 if (_contentHelper.isMultilingual(content)) 1390 { 1391 MultilingualString value = content.getMetadataHolder().getMultilingualString(DefaultContent.METADATA_TITLE); 1392 for (Locale locale : value.getLocales()) 1393 { 1394 String str = value.getValue(locale); 1395 document.addField(sortField + "_" + locale.getLanguage(), SolrFieldHelper.getSortValue(str)); 1396 } 1397 } 1398 else 1399 { 1400 document.addField(sortField, SolrFieldHelper.getSortValue(_contentHelper.getTitle(content))); 1401 } 1402 } 1403 catch (AmetysRepositoryException e) 1404 { 1405 // Do not index 1406 } 1407 } 1408 } 1409 1410 /** 1411 * Index a 'sub_content' metadata 1412 * @param metadata The parent composite metadata 1413 * @param metadataName The name of metadata to index 1414 * @param document The solr document to index into 1415 * @param fieldName The index field name 1416 * @param definition The metadata definition 1417 */ 1418 public void indexSubContentMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition) 1419 { 1420 TraversableAmetysObject objectCollection = metadata.getObjectCollection(metadataName); 1421 AmetysObjectIterable<Content> subcontents = objectCollection.getChildren(); 1422 for (Content subcontent : subcontents) 1423 { 1424 document.addField(fieldName + "_s", subcontent.getId()); 1425 // Facets 1426 document.addField(fieldName + "_s_dv", subcontent.getId()); 1427 } 1428 1429// String sortField = fieldName + "_s_sort"; 1430 String sortField = SolrFieldHelper.getMetadataSortFieldName(MetadataType.STRING, fieldName); 1431 subcontents = objectCollection.getChildren(); 1432 Iterator<Content> it = subcontents.iterator(); 1433 1434 if (it.hasNext() && !document.containsKey(sortField)) 1435 { 1436 Content subcontent = it.next(); 1437 if (_contentHelper.isMultilingual(subcontent)) 1438 { 1439 MultilingualString value = subcontent.getMetadataHolder().getMultilingualString(DefaultContent.METADATA_TITLE); 1440 for (Locale locale : value.getLocales()) 1441 { 1442 String str = value.getValue(locale); 1443 document.addField(sortField + "_" + locale.getLanguage(), SolrFieldHelper.getSortValue(str)); 1444 } 1445 } 1446 else 1447 { 1448 document.addField(sortField, SolrFieldHelper.getSortValue(_contentHelper.getTitle(subcontent))); 1449 } 1450 } 1451 } 1452 1453 /** 1454 * Index a 'geocode' metadata 1455 * @param metadata The parent composite metadata 1456 * @param metadataName The name of metadata to index 1457 * @param document The solr document to index into 1458 * @param fieldName The index field name 1459 * @param definition The metadata definition 1460 */ 1461 public void indexGeoCodeMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition) 1462 { 1463 CompositeMetadata geoCodeMetadata = metadata.getCompositeMetadata(metadataName); 1464 if (geoCodeMetadata.hasMetadata("longitude") && geoCodeMetadata.hasMetadata("latitude")) 1465 { 1466 double longitude = geoCodeMetadata.getDouble("longitude"); 1467 double latitude = geoCodeMetadata.getDouble("latitude"); 1468 1469 indexGeocodeValue(latitude, longitude, document, fieldName); 1470 } 1471 } 1472 1473 /** 1474 * Index a 'geocode' metadata 1475 * @param latitude the coord latitude. 1476 * @param longitude the coord longitude. 1477 * @param document The solr document to index into 1478 * @param fieldName The index field name 1479 */ 1480 public void indexGeocodeValue(double latitude, double longitude, SolrInputDocument document, String fieldName) 1481 { 1482 document.addField(fieldName + "$longitude_d", longitude); 1483 document.addField(fieldName + "$latitude_d", latitude); 1484 1485 String geoFieldName = SolrFieldHelper.getIndexingFieldName(MetadataType.GEOCODE, fieldName); 1486 document.addField(geoFieldName, longitude + " " + latitude); 1487 } 1488 1489 /** 1490 * Index a composite metadata, i.e. browse and index the sub-metadatas. 1491 * @param content The content being indexed. 1492 * @param metadata The parent metadata. 1493 * @param metadataName The composite metadata name. 1494 * @param document The solr document to index into. 1495 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1496 * @param fieldName The field name. 1497 * @param definition The composite metadata definition. 1498 * @param additionalDocuments The solr additional documents used for repeater instance 1499 */ 1500 public void indexCompositeMetadata(Content content, CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, MetadataDefinition definition, List<SolrInputDocument> additionalDocuments) 1501 { 1502 CompositeMetadata compositeMetadata = metadata.getCompositeMetadata(metadataName); 1503 1504 // Index recursively 1505 Set<String> subMetadataNames = definition.getMetadataNames(); 1506 for (String subMetadataName : subMetadataNames) 1507 { 1508 if (compositeMetadata.hasMetadata(subMetadataName)) 1509 { 1510 indexMetadata(content, subMetadataName, compositeMetadata, document, contentDoc, additionalDocuments, fieldName + ContentConstants.METADATA_PATH_SEPARATOR + subMetadataName, definition.getMetadataDefinition(subMetadataName)); 1511 } 1512 } 1513 } 1514 1515 /** 1516 * Index a repeater metadata, i.e. browse and index the entries. 1517 * @param content The content being indexed. 1518 * @param metadata The parent metadata. 1519 * @param metadataName The repeater metadata name. 1520 * @param document The solr document to index into. 1521 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1522 * @param fieldName The field name. 1523 * @param definition The repeater metadata definition. 1524 * @param additionalDocuments The solr additional documents used for repeater instance 1525 */ 1526 public void indexRepeaterMetadata(Content content, CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, MetadataDefinition definition, List<SolrInputDocument> additionalDocuments) 1527 { 1528 CompositeMetadata compositeMetadata = metadata.getCompositeMetadata(metadataName); 1529 1530 // Get and sort the entry names. 1531 String[] entries = compositeMetadata.getMetadataNames(); 1532 Arrays.sort(entries, MetadataManager.REPEATER_ENTRY_COMPARATOR); 1533 1534 for (int i = 0; i < entries.length; i++) 1535 { 1536 String entryName = entries[i]; 1537 int position = i + 1; 1538 1539 CompositeMetadata entry = compositeMetadata.getCompositeMetadata(entryName); 1540 1541 String repeaterID = document.getField("id").getFirstValue().toString() + "/" + fieldName + "/" + entryName; 1542 1543 // Creates a new Solr document for each entry 1544 SolrInputDocument repDocument = new SolrInputDocument(); 1545 repDocument.addField("id", repeaterID); 1546 document.addField(fieldName + "_s_dv", repeaterID); 1547 1548 repDocument.addField(DOCUMENT_TYPE, TYPE_REPEATER); 1549 repDocument.addField(REPEATER_ENTRY_POSITION, position); 1550 // Add the created document to additional documents 1551 additionalDocuments.add(repDocument); 1552 1553 SolrInputDocument parentContentDoc = contentDoc != null ? contentDoc : document; 1554 1555 Set<String> subMetadataNames = definition.getMetadataNames(); 1556 for (String subMetadataName : subMetadataNames) 1557 { 1558 if (entry.hasMetadata(subMetadataName)) 1559 { 1560 // Created document is now the main document 1561 indexMetadata(content, subMetadataName, entry, repDocument, parentContentDoc, additionalDocuments, subMetadataName, definition.getMetadataDefinition(subMetadataName)); 1562 } 1563 } 1564 } 1565 } 1566 1567 /** 1568 * Index the content of a resource. 1569 * @param resource The resource 1570 * @param document The solr document to index into 1571 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1572 * @param language The content language. 1573 */ 1574 protected void indexResourceContent(Resource resource, SolrInputDocument document, SolrInputDocument contentDoc, String language) 1575 { 1576 try (InputStream is = resource.getInputStream()) 1577 { 1578 indexResourceContentValue(is, resource.getDCSubject(), resource.getDCDescription(), language, document, contentDoc); 1579 1580 // TODO Declare and index DC metadata? 1581 // DC meta 1582// _resourceIndexer.indexDublinCoreMetadata(resource, document); 1583 } 1584 catch (Exception e) 1585 { 1586 getLogger().error("Unable to index resource at " + resource.getPath(), e); 1587 } 1588 } 1589 1590 /** 1591 * Index the content of a resource. 1592 * @param is An input stream on the resource content. 1593 * @param keywords The resource keywords. 1594 * @param description The resource description. 1595 * @param language The content language. 1596 * @param document The solr document to index into 1597 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1598 * @throws TikaException If an error occurs extracting the document's text content. 1599 * @throws IOException If an error occurs reading the document's text content. 1600 */ 1601 protected void indexResourceContentValue(InputStream is, String[] keywords, String description, String language, SolrInputDocument document, SolrInputDocument contentDoc) throws IOException, TikaException 1602 { 1603 String value = _tika.parseToString(is); 1604 1605 indexFulltextValue(document, contentDoc, value, language); 1606 1607 for (String keyword : keywords) 1608 { 1609 indexFulltextValue(document, contentDoc, keyword, language); 1610 } 1611 1612 if (description != null) 1613 { 1614 indexFulltextValue(document, contentDoc, description, language); 1615 } 1616 } 1617 1618 /** 1619 * Index a full-text value. 1620 * @param mainDocument The document being used, can be either the content document itself or a repeater document. 1621 * @param contentDoc The parent content document. If the mainDocument is the content document, this will be null. 1622 * @param text The text to index. 1623 * @param language The content language. 1624 */ 1625 protected void indexFulltextValue(SolrInputDocument mainDocument, SolrInputDocument contentDoc, String text, String language) 1626 { 1627 indexFulltextValue(mainDocument, text, language); 1628 1629 // The content doc is null if the main document is the content doc (to prevent indexing the data twice). 1630 if (contentDoc != null) 1631 { 1632 indexFulltextValue(contentDoc, text, language); 1633 } 1634 } 1635 1636 /** 1637 * Index a full-text value. 1638 * @param document The document to index into. 1639 * @param text The text to index. 1640 * @param language The content language. 1641 */ 1642 public static void indexFulltextValue(SolrInputDocument document, String text, String language) 1643 { 1644 if (StringUtils.isNotBlank(text)) 1645 { 1646 document.addField(FULL_GENERAL, text); 1647 document.addField(FULL_EXACT_WS, text); 1648 1649 if (StringUtils.isNotEmpty(language)) 1650 { 1651 indexLanguageFulltextValue(document, text, language); 1652 } 1653 } 1654 } 1655 1656 /** 1657 * Index a full-text value. 1658 * @param document The document to index into. 1659 * @param text The text to index. 1660 * @param languages The languages. 1661 */ 1662 public static void indexFulltextValue(SolrInputDocument document, String text, Collection<String> languages) 1663 { 1664 if (StringUtils.isNotBlank(text)) 1665 { 1666 document.addField(FULL_GENERAL, text); 1667 document.addField(FULL_EXACT_WS, text); 1668 1669 for (String language : languages) 1670 { 1671 indexLanguageFulltextValue(document, text, language); 1672 } 1673 } 1674 } 1675 1676 /** 1677 * Index a full-text value in the language-specific fields. 1678 * @param document The document to index into. 1679 * @param text The text to index. 1680 * @param language The content language. 1681 */ 1682 protected static void indexLanguageFulltextValue(SolrInputDocument document, String text, String language) 1683 { 1684 document.addField(FULL_PREFIX + language, text); 1685 document.addField(FULL_STEMMED_PREFIX + language, text); 1686 } 1687}