001/* 002 * Copyright 2015 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.cms.content.indexing.solr; 017 018import java.io.IOException; 019import java.io.InputStream; 020import java.util.Arrays; 021import java.util.Collection; 022import java.util.Date; 023import java.util.HashMap; 024import java.util.Iterator; 025import java.util.List; 026import java.util.Locale; 027import java.util.Map; 028import java.util.Map.Entry; 029import java.util.Optional; 030import java.util.Set; 031import java.util.stream.Collectors; 032 033import org.apache.avalon.framework.component.Component; 034import org.apache.avalon.framework.service.ServiceException; 035import org.apache.avalon.framework.service.ServiceManager; 036import org.apache.avalon.framework.service.Serviceable; 037import org.apache.commons.lang3.ArrayUtils; 038import org.apache.commons.lang3.StringUtils; 039import org.apache.excalibur.xml.sax.SAXParser; 040import org.apache.solr.common.SolrInputDocument; 041import org.apache.tika.Tika; 042import org.apache.tika.exception.TikaException; 043import org.xml.sax.InputSource; 044import org.xml.sax.SAXException; 045 046import org.ametys.cms.content.ContentHelper; 047import org.ametys.cms.content.RichTextHandler; 048import org.ametys.cms.content.indexing.solr.content.attachment.ContentVisibleAttachmentIndexerExtensionPoint; 049import org.ametys.cms.content.references.OutgoingReferences; 050import org.ametys.cms.content.references.OutgoingReferencesExtractor; 051import org.ametys.cms.contenttype.ContentConstants; 052import org.ametys.cms.contenttype.ContentType; 053import org.ametys.cms.contenttype.ContentTypeExtensionPoint; 054import org.ametys.cms.contenttype.ContentTypesHelper; 055import org.ametys.cms.contenttype.MetadataDefinition; 056import org.ametys.cms.contenttype.MetadataManager; 057import org.ametys.cms.contenttype.MetadataType; 058import org.ametys.cms.contenttype.RepeaterDefinition; 059import org.ametys.cms.contenttype.indexing.CustomIndexingField; 060import org.ametys.cms.contenttype.indexing.CustomMetadataIndexingField; 061import org.ametys.cms.contenttype.indexing.IndexingField; 062import org.ametys.cms.contenttype.indexing.IndexingModel; 063import org.ametys.cms.contenttype.indexing.MetadataIndexingField; 064import org.ametys.cms.languages.Language; 065import org.ametys.cms.languages.LanguagesManager; 066import org.ametys.cms.repository.Content; 067import org.ametys.cms.repository.DefaultContent; 068import org.ametys.cms.search.model.SystemProperty; 069import org.ametys.cms.search.model.SystemPropertyExtensionPoint; 070import org.ametys.core.user.UserIdentity; 071import org.ametys.plugins.core.user.UserHelper; 072import org.ametys.plugins.explorer.resources.Resource; 073import org.ametys.plugins.explorer.resources.metadata.TikaProvider; 074import org.ametys.plugins.repository.AmetysObject; 075import org.ametys.plugins.repository.AmetysObjectIterable; 076import org.ametys.plugins.repository.AmetysObjectResolver; 077import org.ametys.plugins.repository.AmetysRepositoryException; 078import org.ametys.plugins.repository.TraversableAmetysObject; 079import org.ametys.plugins.repository.UnknownAmetysObjectException; 080import org.ametys.plugins.repository.metadata.BinaryMetadata; 081import org.ametys.plugins.repository.metadata.CompositeMetadata; 082import org.ametys.plugins.repository.metadata.MultilingualString; 083import org.ametys.plugins.repository.metadata.MultilingualStringHelper; 084import org.ametys.plugins.repository.metadata.RichText; 085import org.ametys.runtime.plugin.component.AbstractLogEnabled; 086 087/** 088 * Component for {@link Content} indexing into a Solr server. 089 */ 090public class SolrContentIndexer extends AbstractLogEnabled implements Component, Serviceable, SolrFieldNames 091{ 092 /** The component role. */ 093 public static final String ROLE = SolrContentIndexer.class.getName(); 094 095 /** The Ametys objet resolver */ 096 protected AmetysObjectResolver _resolver; 097 /** The content type extension point */ 098 protected ContentTypeExtensionPoint _cTypeEP; 099 /** The content type helper */ 100 protected ContentTypesHelper _cTypesHelper; 101 /** The users manager */ 102 protected UserHelper _userHelper; 103 /** The Tika instance */ 104 protected Tika _tika; 105 /** The resource indexer */ 106 protected SolrResourceIndexer _resourceIndexer; 107 /** The system property extension point. */ 108 protected SystemPropertyExtensionPoint _systemPropEP; 109 /** The content helper */ 110 protected ContentHelper _contentHelper; 111 /** The outgoing references extractor */ 112 protected OutgoingReferencesExtractor _outgoingReferencesExtractor; 113 /** The extension point for ContentVisibleAttachmentIndexers */ 114 protected ContentVisibleAttachmentIndexerExtensionPoint _contentVisibleAttachmentIndexerEP; 115 /** The manager for languages */ 116 protected LanguagesManager _languagesManager; 117 /** Avalon service manager */ 118 protected ServiceManager _manager; 119 120 @Override 121 public void service(ServiceManager manager) throws ServiceException 122 { 123 _manager = manager; 124 _resolver = (AmetysObjectResolver) manager.lookup(AmetysObjectResolver.ROLE); 125 _resourceIndexer = (SolrResourceIndexer) manager.lookup(SolrResourceIndexer.ROLE); 126 _cTypeEP = (ContentTypeExtensionPoint) manager.lookup(ContentTypeExtensionPoint.ROLE); 127 _contentHelper = (ContentHelper) manager.lookup(ContentHelper.ROLE); 128 _cTypesHelper = (ContentTypesHelper) manager.lookup(ContentTypesHelper.ROLE); 129 _userHelper = (UserHelper) manager.lookup(UserHelper.ROLE); 130 TikaProvider tikaProvider = (TikaProvider) manager.lookup(TikaProvider.ROLE); 131 _tika = tikaProvider.getTika(); 132 _systemPropEP = (SystemPropertyExtensionPoint) manager.lookup(SystemPropertyExtensionPoint.ROLE); 133 _outgoingReferencesExtractor = (OutgoingReferencesExtractor) manager.lookup(OutgoingReferencesExtractor.ROLE); 134 _contentVisibleAttachmentIndexerEP = (ContentVisibleAttachmentIndexerExtensionPoint) manager.lookup(ContentVisibleAttachmentIndexerExtensionPoint.ROLE); 135 _languagesManager = (LanguagesManager) manager.lookup(LanguagesManager.ROLE); 136 } 137 138 /** 139 * Populate a solr input document by adding fields to index into it. 140 * @param content The content to index 141 * @param document The main solr document to index into 142 * @param additionalDocuments The additional documents for repeater instances 143 * @throws Exception if an error occurred while indexing 144 */ 145 public void indexContent(Content content, SolrInputDocument document, List<SolrInputDocument> additionalDocuments) throws Exception 146 { 147 // Properties specific to a stand-alone indexation. 148 String contentId = content.getId(); 149 document.addField(ID, contentId); 150 document.addField(DOCUMENT_TYPE, TYPE_CONTENT); 151 152 indexContentTitle(content, document); 153 154 document.addField(CONTENT_NAME, SolrIndexer.truncateUtf8StringValue(content.getName(), getLogger(), contentId, CONTENT_NAME)); 155 _indexOutgoingReferences(content, document); 156 _indexVisibleAttachments(content, document); 157 158 document.addField(WORKFLOW_REF_DV, contentId + "#workflow"); 159 160 // Index content system properties. 161 indexSystemProperties(content, document); 162 163 // Index the fields specified in the indexation model. 164 indexModelFields(content, document, additionalDocuments); 165 } 166 167 private void _indexOutgoingReferences(Content content, SolrInputDocument document) 168 { 169 // Found by the extractor (resource references found in all metadata of the content) 170 _outgoingReferencesExtractor.getOutgoingReferences(content).values() // key is the metadata,we do not care what metadata it comes from 171 .parallelStream() 172 .map(OutgoingReferences::entrySet) 173 .flatMap(Set::parallelStream) 174 .filter(outgoingRefs -> outgoingRefs.getKey().equals("explorer")) // only references of the resource explorer 175 .map(Entry::getValue) 176 .flatMap(List::parallelStream) // flat the resource ids 177 .forEach(resourceId -> document.addField(CONTENT_OUTGOING_REFEERENCES_RESOURCE_IDS, resourceId)); 178 179 // Attachments of the content (just the root folder) 180 Optional.ofNullable(content.getRootAttachments()) 181 .map(AmetysObject::getId) 182 .ifPresent(id -> document.addField(CONTENT_OUTGOING_REFEERENCES_RESOURCE_IDS, id)); 183 } 184 185 private void _indexVisibleAttachments(Content content, SolrInputDocument document) 186 { 187 Collection<String> values = _contentVisibleAttachmentIndexerEP.getExtensionsIds() 188 .stream() 189 .map(_contentVisibleAttachmentIndexerEP::getExtension) 190 .map(attachmentIndexer -> attachmentIndexer.getVisibleAttachmentIds(content)) 191 .flatMap(Collection::stream) 192 .collect(Collectors.toList()); 193 document.addField(CONTENT_VISIBLE_ATTACHMENT_RESOURCE_IDS, values); 194 } 195 196 /** 197 * Index the content title 198 * @param content The title 199 * @param document The main solr document to index into 200 */ 201 protected void indexContentTitle(Content content, SolrInputDocument document) 202 { 203 if (content.getMetadataHolder().getType("title") == org.ametys.plugins.repository.metadata.CompositeMetadata.MetadataType.MULTILINGUAL_STRING) 204 { 205 MultilingualString value = content.getMetadataHolder().getMultilingualString(DefaultContent.METADATA_TITLE); 206 indexMultilingualStringValues(value, content.getId(), document, null, TITLE); 207 } 208 else 209 { 210 String title = _contentHelper.getTitle(content); 211 document.addField(TITLE, SolrIndexer.truncateUtf8StringValue(title, getLogger(), content.getId(), TITLE)); 212 document.addField(TITLE_SORT, title); 213 } 214 } 215 216 /** 217 * Index the system properties of a content. 218 * @param content The content to index. 219 * @param document The solr document to index into. 220 */ 221 protected void indexSystemProperties(Content content, SolrInputDocument document) 222 { 223 for (String sysPropId : _systemPropEP.getExtensionsIds()) 224 { 225 SystemProperty sysProp = _systemPropEP.getExtension(sysPropId); 226 227 sysProp.index(content, document); 228 } 229 } 230 231 /** 232 * Index the content type and all its supertypes in the given document (recursively). 233 * @param cTypeId The ID of the content type to index. 234 * @param document The solr document to index into. 235 * @param fieldName The field name. 236 */ 237 protected void indexAllContentTypes(String cTypeId, SolrInputDocument document, String fieldName) 238 { 239 document.addField(fieldName, cTypeId); 240 241 if (_cTypeEP.hasExtension(cTypeId)) 242 { 243 ContentType contentType = _cTypeEP.getExtension(cTypeId); 244 for (String supertypeId : contentType.getSupertypeIds()) 245 { 246 indexAllContentTypes(supertypeId, document, fieldName); 247 } 248 } 249 } 250 251 /** 252 * Index the fields specified in the indexation model. 253 * @param content The content to index. 254 * @param document The main content solr document. 255 * @param additionalDocuments The additional documents for repeater instances. 256 */ 257 protected void indexModelFields(Content content, SolrInputDocument document, List<SolrInputDocument> additionalDocuments) 258 { 259 IndexingModel indexingModel = null; 260 try 261 { 262 indexingModel = _cTypesHelper.getIndexingModel(content); 263 } 264 catch (RuntimeException e) 265 { 266 getLogger().error("indexContent > Error getting the indexing model of content " + content.getId(), e); 267 throw e; 268 } 269 270 for (IndexingField field : indexingModel.getFields()) 271 { 272 if (field instanceof CustomIndexingField) 273 { 274 Object[] values = ((CustomIndexingField) field).getValues(content); 275 indexValues(content, field.getName(), field.getType(), values, document, null); 276 } 277 else if (field instanceof MetadataIndexingField) 278 { 279 String metadataPath = ((MetadataIndexingField) field).getMetadataPath(); 280 String[] pathSegments = metadataPath.split(ContentConstants.METADATA_PATH_SEPARATOR); 281 282 MetadataDefinition definition = _cTypesHelper.getMetadataDefinition(pathSegments[0], content.getTypes(), content.getMixinTypes()); 283 if (definition != null) 284 { 285 findAndIndexMetadata(content, pathSegments, content.getMetadataHolder(), definition, field, field.getName(), document, null, additionalDocuments); 286 } 287 } 288 } 289 } 290 291 /** 292 * Populate a Solr input document by adding fields for a single system property. 293 * @param content The content to index 294 * @param propertyId The system property ID. 295 * @param document The solr document 296 * @return true if there are partial update to apply 297 * @throws Exception if an error occurred 298 */ 299 public boolean indexPartialSystemProperty(Content content, String propertyId, SolrInputDocument document) throws Exception 300 { 301 if (!_systemPropEP.hasExtension(propertyId)) 302 { 303 throw new IllegalStateException("The property '" + propertyId + "' can't be indexed as it does not exist."); 304 } 305 306 SolrInputDocument tempDocument = new SolrInputDocument(); 307 308 SystemProperty property = _systemPropEP.getExtension(propertyId); 309 property.index(content, tempDocument); 310 311 if (tempDocument.isEmpty()) 312 { 313 // Does not have any partial update to apply, avoid to erase all the existing fields on the Solr document corresponding to this content (it would be lost) 314 return false; 315 } 316 317 // Copy the indexed values as partial updates. 318 for (String fieldName : tempDocument.getFieldNames()) 319 { 320 Collection<Object> fieldValues = tempDocument.getFieldValues(fieldName); 321 322 Map<String, Object> partialUpdate = new HashMap<>(); 323 partialUpdate.put("set", fieldValues); 324 document.addField(fieldName, partialUpdate); 325 } 326 327 document.addField("id", content.getId()); 328 329 return true; 330 } 331 332 /** 333 * Find the metadata to index from its path 334 * @param content the content currently being traversed. 335 * @param pathSegments The segments of path of metadata to index 336 * @param metadata The parent composite metadata 337 * @param definition The metadata definition 338 * @param field the current indexing field. 339 * @param fieldName the name of the field to index. 340 * @param document The main solr document to index into 341 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 342 * @param additionalDocuments The additional documents 343 */ 344 protected void findAndIndexMetadata(Content content, String[] pathSegments, CompositeMetadata metadata, MetadataDefinition definition, IndexingField field, String fieldName, SolrInputDocument document, SolrInputDocument contentDoc, List<SolrInputDocument> additionalDocuments) 345 { 346 String currentFieldName = pathSegments[0]; 347 348 IndexingModel indexingModel = null; 349 try 350 { 351 indexingModel = _cTypesHelper.getIndexingModel(content); 352 } 353 catch (RuntimeException e) 354 { 355 if (content != null) 356 { 357 getLogger().error("findAndIndexMetadata > Error while indexing content " + content.getId() + " metadata", e); 358 } 359 else 360 { 361 getLogger().error("findAndIndexMetadata > Error while indexing null content metadata"); 362 } 363 throw e; 364 } 365 366 IndexingField refField = indexingModel.getField(currentFieldName); 367 if (refField != null && refField instanceof CustomMetadataIndexingField) 368 { 369 CustomMetadataIndexingField overridingField = (CustomMetadataIndexingField) refField; 370 findAndIndexOverridingField(content, indexingModel, overridingField, fieldName, definition, pathSegments, document, contentDoc, additionalDocuments); 371 } 372 else 373 { 374 if (metadata.hasMetadata(currentFieldName)) 375 { 376 findAndIndexMetadataField(content, pathSegments, metadata, definition, field, fieldName, document, contentDoc, additionalDocuments); 377 } 378 } 379 } 380 381 /** 382 * Find and index a metadata. 383 * @param content the current content being traversed. 384 * @param pathSegments the full metadata path segments. 385 * @param metadata the current metadata holder. 386 * @param definition the current metadata definition. 387 * @param field the current indexing field. 388 * @param fieldName the name of the field to index. 389 * @param document the solr main document. 390 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 391 * @param additionalDocuments the solr additional documents. 392 */ 393 protected void findAndIndexMetadataField(Content content, String[] pathSegments, CompositeMetadata metadata, MetadataDefinition definition, IndexingField field, String fieldName, SolrInputDocument document, SolrInputDocument contentDoc, List<SolrInputDocument> additionalDocuments) 394 { 395 String currentFieldName = pathSegments[0]; 396 397 if (pathSegments.length == 1) 398 { 399 indexMetadata(content, currentFieldName, metadata, document, contentDoc, additionalDocuments, fieldName, definition); 400 return; 401 } 402 403 String[] followingSegments = ArrayUtils.subarray(pathSegments, 1, pathSegments.length); 404 405 switch (definition.getType()) 406 { 407 case BINARY: 408 case BOOLEAN: 409 case STRING: 410 case MULTILINGUAL_STRING: 411 case USER: 412 case LONG: 413 case DOUBLE: 414 case DATE: 415 case DATETIME: 416 case REFERENCE: 417 case RICH_TEXT: 418 case FILE: 419 case GEOCODE: 420 getLogger().warn("The metadata '{}' of type {} can not be a part of a path to index : {}", currentFieldName, definition.getType().toString(), StringUtils.join(pathSegments, ContentConstants.METADATA_PATH_SEPARATOR)); 421 break; 422 case CONTENT: 423 String[] contentIds = metadata.getStringArray(currentFieldName, new String[0]); 424 for (String contentId : contentIds) 425 { 426 try 427 { 428 Content refContent = _resolver.resolveById(contentId); 429 MetadataDefinition remoteMetadataDef = _cTypesHelper.getMetadataDefinition(followingSegments[0], refContent.getTypes(), refContent.getMixinTypes()); 430 findAndIndexMetadata(refContent, followingSegments, refContent.getMetadataHolder(), remoteMetadataDef, field, fieldName, document, contentDoc, additionalDocuments); 431 } 432 catch (UnknownAmetysObjectException e) 433 { 434 // Nothing to index 435 } 436 } 437 break; 438 case SUB_CONTENT: 439 TraversableAmetysObject objectCollection = metadata.getObjectCollection(currentFieldName); 440 AmetysObjectIterable<Content> subcontents = objectCollection.getChildren(); 441 for (Content subcontent : subcontents) 442 { 443 MetadataDefinition remoteMetadataDef = _cTypesHelper.getMetadataDefinition(followingSegments[0], subcontent.getTypes(), subcontent.getMixinTypes()); 444 findAndIndexMetadata(subcontent, followingSegments, subcontent.getMetadataHolder(), remoteMetadataDef, field, fieldName, document, contentDoc, additionalDocuments); 445 } 446 break; 447 case COMPOSITE: 448 CompositeMetadata composite = metadata.getCompositeMetadata(currentFieldName); 449 450 if (definition instanceof RepeaterDefinition) 451 { 452 String[] entries = composite.getMetadataNames(); 453 for (String entry : entries) 454 { 455 findAndIndexMetadata(content, followingSegments, composite.getCompositeMetadata(entry), definition.getMetadataDefinition(followingSegments[0]), field, fieldName, document, contentDoc, additionalDocuments); 456 } 457 } 458 else 459 { 460 findAndIndexMetadata(content, followingSegments, composite, definition.getMetadataDefinition(followingSegments[0]), field, fieldName, document, contentDoc, additionalDocuments); 461 } 462 break; 463 default: 464 break; 465 466 } 467 } 468 469 /** 470 * Find and index a property represented by an overriding field. 471 * @param content the current content being traversed. 472 * @param indexingModel the current indexing model. 473 * @param pathSegments the full metadata path segments. 474 * @param definition the current metadata definition. 475 * @param field the current indexing field. 476 * @param fieldName the name of the field to index. 477 * @param document the solr main document. 478 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 479 * @param additionalDocuments the solr additional documents. 480 */ 481 protected void findAndIndexOverridingField(Content content, IndexingModel indexingModel, CustomMetadataIndexingField field, String fieldName, MetadataDefinition definition, String[] pathSegments, SolrInputDocument document, SolrInputDocument contentDoc, List<SolrInputDocument> additionalDocuments) 482 { 483 String currentFieldName = field.getName(); 484 485 if (pathSegments.length == 1) 486 { 487 indexOverridingField(field, content, fieldName, document, contentDoc, additionalDocuments); 488 return; 489 } 490 491 String[] followingSegments = ArrayUtils.subarray(pathSegments, 1, pathSegments.length); 492 MetadataType type = definition.getType(); 493 494 switch (type) 495 { 496 case BINARY: 497 case BOOLEAN: 498 case STRING: 499 case MULTILINGUAL_STRING: 500 case USER: 501 case LONG: 502 case DOUBLE: 503 case DATE: 504 case DATETIME: 505 case REFERENCE: 506 case RICH_TEXT: 507 case FILE: 508 case GEOCODE: 509 getLogger().warn("The field '{}' of type {} can not be a part of a path to index : {}", currentFieldName, type.toString(), StringUtils.join(pathSegments, ContentConstants.METADATA_PATH_SEPARATOR)); 510 break; 511 case COMPOSITE: 512 getLogger().warn("The type {} is invalid for the overriding field '{}'.", type.toString(), currentFieldName); 513 break; 514 case CONTENT: 515 case SUB_CONTENT: 516 String[] contentIds = (String[]) field.getValues(content); 517 for (String contentId : contentIds) 518 { 519 Content refContent = _resolver.resolveById(contentId); 520 MetadataDefinition remoteMetadataDef = _cTypesHelper.getMetadataDefinition(followingSegments[0], refContent.getTypes(), refContent.getMixinTypes()); 521 findAndIndexMetadata(refContent, followingSegments, refContent.getMetadataHolder(), remoteMetadataDef, field, fieldName, document, contentDoc, additionalDocuments); 522 } 523 break; 524 default: 525 break; 526 } 527 } 528 529 /** 530 * Index a content metadata. 531 * @param content the current content being traversed. 532 * @param metadataName The name of metadata to index 533 * @param metadata The parent composite metadata 534 * @param document the solr document to index into. 535 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 536 * @param additionalDocuments The solr additional documents used for repeater instance 537 * @param fieldName the name of the indexed field. 538 * @param definition the metadata definition. 539 */ 540 public void indexMetadata(Content content, String metadataName, CompositeMetadata metadata, SolrInputDocument document, SolrInputDocument contentDoc, List<SolrInputDocument> additionalDocuments, String fieldName, MetadataDefinition definition) 541 { 542 String language = content.getLanguage(); 543 544 switch (definition.getType()) 545 { 546 case STRING: 547 indexStringMetadata(metadata, metadataName, content.getId(), document, contentDoc, fieldName, language, definition); 548 break; 549 case MULTILINGUAL_STRING: 550 indexMultilingualStringMetadata(metadata, metadataName, content.getId(), document, contentDoc, fieldName, definition); 551 break; 552 case USER: 553 indexUserMetadata(metadata, metadataName, document, contentDoc, fieldName, language, definition); 554 break; 555 case GEOCODE: 556 indexGeoCodeMetadata(metadata, metadataName, document, fieldName, definition); 557 break; 558 case BINARY: 559 indexBinaryMetadata(metadata, metadataName, document, contentDoc, fieldName, language, definition); 560 break; 561 case FILE: 562 indexFileMetadata(metadata, metadataName, document, contentDoc, fieldName, language, definition); 563 break; 564 case DATE: 565 indexDateMetadata(metadata, metadataName, document, fieldName, definition); 566 break; 567 case DATETIME: 568 indexDateTimeMetadata(metadata, metadataName, document, fieldName, definition); 569 break; 570 case CONTENT: 571 indexContentMetadata(metadata, metadataName, document, fieldName, definition); 572 break; 573 case SUB_CONTENT: 574 indexSubContentMetadata(metadata, metadataName, document, fieldName, definition); 575 break; 576 case LONG: 577 indexLongMetadata(metadata, metadataName, document, fieldName, definition); 578 break; 579 case DOUBLE: 580 indexDoubleMetadata(metadata, metadataName, document, fieldName, definition); 581 break; 582 case BOOLEAN: 583 indexBooleanMetadata(metadata, metadataName, document, fieldName, definition); 584 break; 585 case RICH_TEXT: 586 indexRichtextMetadata(metadata, metadataName, document, contentDoc, fieldName, language, definition); 587 break; 588 case COMPOSITE: 589 if (definition instanceof RepeaterDefinition) 590 { 591 indexRepeaterMetadata(content, metadata, metadataName, document, contentDoc, fieldName, definition, additionalDocuments); 592 } 593 else 594 { 595 indexCompositeMetadata(content, metadata, metadataName, document, contentDoc, fieldName, definition, additionalDocuments); 596 } 597 break; 598 case REFERENCE: 599 // TODO reference -> to be indexed? https://issues.ametys.org/browse/CMS-8623 600 break; 601 default: 602 break; 603 } 604 } 605 606 /** 607 * Index a property represented by an overriding field. 608 * @param field The overriding field. 609 * @param content The content of which to get the property. 610 * @param fieldName The name of the field to index. 611 * @param document the solr document to index into. 612 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 613 * @param additionalDocuments The solr additional documents used for repeater instance 614 */ 615 public void indexOverridingField(CustomMetadataIndexingField field, Content content, String fieldName, SolrInputDocument document, SolrInputDocument contentDoc, List<SolrInputDocument> additionalDocuments) 616 { 617 Object[] values = field.getValues(content); 618 MetadataDefinition definition = field.getMetadataDefinition(); 619 boolean isFacetable = definition.getEnumerator() != null; 620 String language = content.getLanguage(); 621 622 switch (definition.getType()) 623 { 624 case STRING: 625 indexStringValues((String[]) values, content.getId(), document, contentDoc, fieldName, language, isFacetable); 626 break; 627 case MULTILINGUAL_STRING: 628 indexMultilingualStringValues((MultilingualString) values[0], content.getId(), document, contentDoc, fieldName); 629 break; 630 case USER: 631 UserIdentity[] users = new UserIdentity[values.length]; 632 for (int i = 0; i < values.length; i++) 633 { 634 users[i] = UserIdentity.stringToUserIdentity((String) values[i]); 635 } 636 indexUserValues(users, document, contentDoc, fieldName, language); 637 break; 638 case GEOCODE: 639 if (values.length > 1) 640 { 641 indexGeocodeValue((double) values[0], (double) values[1], document, fieldName); 642 } 643 break; 644 case BINARY: 645 if (values.length > 0) 646 { 647 indexFullTextBinaryValue((InputStream) values[0], document, contentDoc, fieldName, language); 648 } 649 break; 650 case FILE: 651 indexFileValue(values, document, contentDoc, fieldName, language); 652 break; 653 case DATE: 654 indexDateValues((Date[]) values, document, fieldName); 655 break; 656 case DATETIME: 657 indexDateTimeValues((Date[]) values, document, fieldName); 658 break; 659 case CONTENT: 660 indexContentValues((String[]) values, document, fieldName); 661 break; 662 case SUB_CONTENT: 663 indexContentValues((String[]) values, document, fieldName); 664 break; 665 case LONG: 666 indexLongValues((Long[]) values, document, fieldName, isFacetable); 667 break; 668 case DOUBLE: 669 indexDoubleValues((Double[]) values, document, fieldName, isFacetable); 670 break; 671 case BOOLEAN: 672 indexBooleanValues((Boolean[]) values, document, fieldName); 673 break; 674 case RICH_TEXT: 675 if (values.length > 0) 676 { 677 indexRichtextValue((RichText) values[0], document, contentDoc, fieldName, language); 678 } 679 break; 680 case COMPOSITE: 681 break; 682 case REFERENCE: 683 // TODO reference -> to be indexed? https://issues.ametys.org/browse/CMS-8623 684 break; 685 default: 686 break; 687 } 688 } 689 690 /** 691 * Index values 692 * @param content The content being indexed. 693 * @param fieldName The Solr field's name 694 * @param type the type of values to index 695 * @param values the values 696 * @param document the Solr document 697 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 698 */ 699 public void indexValues(Content content, String fieldName, MetadataType type, Object[] values, SolrInputDocument document, SolrInputDocument contentDoc) 700 { 701 String language = content.getLanguage(); 702 703 switch (type) 704 { 705 case STRING: 706 indexStringValues((String[]) values, content.getId(), document, contentDoc, fieldName, language, false); 707 break; 708 case MULTILINGUAL_STRING: 709 if (values.length > 0) 710 { 711 indexMultilingualStringValues((MultilingualString) values[0], content.getId(), document, contentDoc, fieldName); 712 } 713 break; 714 case LONG: 715 indexLongValues((Long[]) values, document, fieldName, false); 716 break; 717 case DOUBLE: 718 indexDoubleValues((Double[]) values, document, fieldName, false); 719 break; 720 case DATE: 721 indexDateValues((Date[]) values, document, fieldName); 722 break; 723 case DATETIME: 724 indexDateTimeValues((Date[]) values, document, fieldName); 725 break; 726 case CONTENT: 727 indexContentValues((String[]) values, document, fieldName); 728 break; 729 case BOOLEAN: 730 indexBooleanValues((Boolean[]) values, document, fieldName); 731 break; 732 case USER: 733 UserIdentity[] users = new UserIdentity[values.length]; 734 for (int i = 0; i < values.length; i++) 735 { 736 users[i] = UserIdentity.stringToUserIdentity((String) values[i]); 737 } 738 indexUserValues(users, document, contentDoc, fieldName, language); 739 break; 740 case RICH_TEXT: 741 indexRichtextValue((RichText) values[0], document, contentDoc, fieldName, language); 742 break; 743 case BINARY: 744 case FILE: 745 case COMPOSITE: 746 case REFERENCE: 747 case SUB_CONTENT: 748 case GEOCODE: 749 getLogger().warn("Only primitive type is allowed on a custom indexing field"); 750 break; 751 default: 752 break; 753 } 754 } 755 756 757 /** 758 * Index a 'string' metadata 759 * @param metadata The parent composite metadata 760 * @param metadataName The name of metadata to index 761 * @param contentId The content id. For logging purposes 762 * @param document The solr document to index into 763 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 764 * @param fieldName The index field name 765 * @param language The content language. 766 * @param definition The metadata definition 767 */ 768 public void indexStringMetadata(CompositeMetadata metadata, String metadataName, String contentId, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, MetadataDefinition definition) 769 { 770 String[] strValues = metadata.getStringArray(metadataName, new String[0]); 771 indexStringValues(strValues, contentId, document, contentDoc, fieldName, language, definition.getEnumerator() != null); 772 } 773 774 /** 775 * Index a multilingual string metadata 776 * @param metadata The parent composite metadata 777 * @param metadataName The name of metadata to index 778 * @param contentId The content id. For logging purposes 779 * @param document The solr document to index into 780 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 781 * @param fieldName The index field name 782 * @param definition The metadata definition 783 */ 784 public void indexMultilingualStringMetadata(CompositeMetadata metadata, String metadataName, String contentId, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, MetadataDefinition definition) 785 { 786 MultilingualString multilingualString = metadata.getMultilingualString(metadataName); 787 indexMultilingualStringValues(multilingualString, contentId, document, contentDoc, fieldName); 788 } 789 790 /** 791 * Index a multilingual string values 792 * @param value The multilingual string 793 * @param contentId The content id. For logging purposes 794 * @param document The solr document to index into 795 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 796 * @param fieldName The index field name 797 */ 798 public void indexMultilingualStringValues(MultilingualString value, String contentId, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName) 799 { 800 Set<Locale> metaLocales = value.getLocales(); 801 List<String> appLanguages = _languagesManager.getAvailableLanguages() 802 .values() 803 .stream() 804 .map(Language::getCode) 805 .collect(Collectors.toList()); 806 for (String appLanguageCode : appLanguages) 807 { 808 Locale appLocale = new Locale(appLanguageCode); 809 if (metaLocales.contains(appLocale)) 810 { 811 String str = value.getValue(appLocale); 812 indexMultilingualStringValues(new String[] {str}, contentId, document, contentDoc, fieldName, appLocale.getLanguage()); 813 } 814 815 // Need to index sort field for every language of application, even if metadata does not have value for the given language 816 String sortValue = MultilingualStringHelper.getValue(value, appLocale); 817 indexMultilingualStringValuesForSorting(sortValue, document, fieldName, appLocale.getLanguage()); 818 } 819 } 820 821 /** 822 * Index multilingual 'string' values 823 * @param values The values 824 * @param contentId The content id. For logging purposes 825 * @param document The solr document to index into 826 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 827 * @param fieldName The index field name 828 * @param language The language for values. 829 */ 830 public void indexMultilingualStringValues(String[] values, String contentId, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language) 831 { 832 for (String value : values) 833 { 834 document.addField(fieldName + "_txt_" + language, value); 835 document.addField(fieldName + "_txt_stemmed_" + language, value); 836 document.addField(fieldName + "_txt_ws_" + language, value); 837 838 // Index without analyzing. 839 String possiblyTruncatedValue = SolrIndexer.truncateUtf8StringValue(value, getLogger(), contentId, fieldName); 840 document.addField(fieldName + "_" + language + "_s", possiblyTruncatedValue); 841 842 // Index without analyzing but lower-case (for wildcard queries). 843 document.addField(fieldName + "_" + language + "_s_lower", possiblyTruncatedValue.toLowerCase()); 844 845 // Exact words tokenized by whitespace. 846 document.addField(fieldName + "_" + language + "_s_ws", value.toLowerCase()); 847 848 // Index with analyze (full-text search). 849 document.addField(fieldName + "_" + language + "_txt", value); 850 851 indexFulltextValue(document, contentDoc, value, language); 852 } 853 } 854 855 /** 856 * Index multilingual 'string' value in sort field 857 * @param value The value 858 * @param document The solr document to index into 859 * @param fieldName The index field name 860 * @param language The language 861 */ 862 public void indexMultilingualStringValuesForSorting(String value, SolrInputDocument document, String fieldName, String language) 863 { 864 String sortField = fieldName + "_" + language + SolrFieldHelper.getSortFieldSuffix(MetadataType.MULTILINGUAL_STRING); 865 if (StringUtils.isNotEmpty(value) && !document.containsKey(sortField)) 866 { 867 document.addField(sortField, SolrFieldHelper.getSortValue(value)); 868 } 869 } 870 871 /** 872 * Index 'string' values 873 * @param values The values 874 * @param contentId The content id. For logging purposes 875 * @param document The solr document to index into 876 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 877 * @param fieldName The index field name 878 * @param language The content language. 879 * @param isFacetable true if the field can be used as a facet. 880 */ 881 public void indexStringValues(String[] values, String contentId, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, boolean isFacetable) 882 { 883 for (String value : values) 884 { 885 if (!isFacetable) 886 { 887 if (language != null) // Language can be null for multilingual content 888 { 889 // No enumerator: index as full-text. 890 document.addField(fieldName + "_txt_" + language, value); 891 document.addField(fieldName + "_txt_stemmed_" + language, value); 892 document.addField(fieldName + "_txt_ws_" + language, value); 893 } 894 } 895 else 896 { 897 // Facets (enumeration only) 898 document.addField(fieldName + "_s_dv", value); 899 } 900 901 // Index without analyzing. 902 String possiblyTruncatedValue = SolrIndexer.truncateUtf8StringValue(value, getLogger(), contentId, fieldName); 903 document.addField(fieldName + "_s", possiblyTruncatedValue); 904 905 // Index without analyzing but lower-case (for wildcard queries). 906 document.addField(fieldName + "_s_lower", possiblyTruncatedValue.toLowerCase()); 907 908 // Exact words tokenized by whitespace. 909 document.addField(fieldName + "_s_ws", value.toLowerCase()); 910 911 // Index with analyze (full-text search). 912 document.addField(fieldName + "_txt", value); 913 914 indexFulltextValue(document, contentDoc, value, language); 915 } 916 917 String sortField = SolrFieldHelper.getMetadataSortFieldName(MetadataType.STRING, fieldName); 918 if (values.length > 0 && !document.containsKey(sortField)) 919 { 920 // FIXME Si la meta est enumerée, indexer le label ? dans quelle langue ? 921 document.addField(sortField, SolrFieldHelper.getSortValue(values[0])); 922 } 923 } 924 925 /** 926 * Index a 'date' metadata 927 * @param metadata The parent composite metadata 928 * @param metadataName The name of metadata to index 929 * @param document The solr document to index into 930 * @param fieldName The index field name 931 * @param definition The metadata definition 932 */ 933 public void indexDateMetadata (CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition) 934 { 935 Date[] dateValues = metadata.getDateArray(metadataName, new Date[0]); 936 indexDateValues (dateValues, document, fieldName); 937 } 938 939 /** 940 * Index 'date' values 941 * @param values The values 942 * @param document The solr document to index into 943 * @param fieldName The index field name 944 */ 945 public void indexDateValues (Date[] values, SolrInputDocument document, String fieldName) 946 { 947 for (Date value : values) 948 { 949 document.addField(fieldName + "_dt", SolrIndexer.dateFormat().format(value)); 950 } 951 952 String sortField = fieldName + "_dt_sort"; 953 if (values.length > 0 && !document.containsKey(sortField)) 954 { 955 document.addField(sortField, SolrIndexer.dateFormat().format(values[0])); 956 } 957 } 958 959 /** 960 * Index a 'datetime' metadata 961 * @param metadata The parent composite metadata 962 * @param metadataName The name of metadata to index 963 * @param document The solr document to index into 964 * @param fieldName The index field name 965 * @param definition The metadata definition 966 */ 967 public void indexDateTimeMetadata (CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition) 968 { 969 Date[] dateValues = metadata.getDateArray(metadataName, new Date[0]); 970 indexDateTimeValues(dateValues, document, fieldName); 971 } 972 973 /** 974 * Index 'datetime' values 975 * @param values The values 976 * @param document The solr document to index into 977 * @param fieldName The index field name 978 */ 979 public void indexDateTimeValues (Date[] values, SolrInputDocument document, String fieldName) 980 { 981 for (Date value : values) 982 { 983 document.addField(fieldName + "_dt", SolrIndexer.dateFormat().format(value)); 984 } 985 986 String sortField = fieldName + "_dt_sort"; 987 if (values.length > 0 && !document.containsKey(sortField)) 988 { 989 document.addField(sortField, SolrIndexer.dateFormat().format(values[0])); 990 } 991 } 992 993 /** 994 * Index a 'double' metadata 995 * @param metadata The parent composite metadata 996 * @param metadataName The name of metadata to index 997 * @param document The solr document to index into 998 * @param fieldName The index field name 999 * @param definition The metadata definition 1000 */ 1001 public void indexDoubleMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition) 1002 { 1003 boolean isFacetable = definition.getEnumerator() != null; 1004 double[] values = metadata.getDoubleArray(metadataName, new double[0]); 1005 indexDoubleValues (ArrayUtils.toObject(values), document, fieldName, isFacetable); 1006 } 1007 1008 /** 1009 * Index 'double' values 1010 * @param values The values 1011 * @param document The solr document to index into 1012 * @param fieldName The index field name 1013 * @param isFacetable true if the field can be used as a facet. 1014 */ 1015 public void indexDoubleValues(Double[] values, SolrInputDocument document, String fieldName, boolean isFacetable) 1016 { 1017 for (Double value : values) 1018 { 1019 document.addField(fieldName + "_d", value); 1020 if (isFacetable) 1021 { 1022 document.addField(fieldName + "_d_dv", value); 1023 } 1024 } 1025 1026 String sortField = fieldName + "_d_sort"; 1027 if (values.length > 0 && !document.containsKey(sortField)) 1028 { 1029 document.addField(sortField, values[0]); 1030 } 1031 } 1032 1033 /** 1034 * Index a 'long' metadata 1035 * @param metadata The parent composite metadata 1036 * @param metadataName The name of metadata to index 1037 * @param document The solr document to index into 1038 * @param fieldName The index field name 1039 * @param definition The metadata definition 1040 */ 1041 public void indexLongMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition) 1042 { 1043 boolean isFacetable = definition.getEnumerator() != null; 1044 long[] values = metadata.getLongArray(metadataName, new long[0]); 1045 indexLongValues(ArrayUtils.toObject(values), document, fieldName, isFacetable); 1046 } 1047 1048 /** 1049 * Index 'long' values 1050 * @param values The values 1051 * @param document The solr document to index into 1052 * @param fieldName The index field name 1053 * @param isFacetable true if the field can be used as a facet. 1054 */ 1055 public void indexLongValues(Long[] values, SolrInputDocument document, String fieldName, boolean isFacetable) 1056 { 1057 for (Long value : values) 1058 { 1059 document.addField(fieldName + "_l", value); 1060 if (isFacetable) 1061 { 1062 document.addField(fieldName + "_l_dv", value); 1063 } 1064 } 1065 1066 String sortField = fieldName + "_l_sort"; 1067 if (values.length > 0 && !document.containsKey(sortField)) 1068 { 1069 document.addField(sortField, values[0]); 1070 } 1071 } 1072 1073 /** 1074 * Index a 'user' metadata 1075 * @param metadata The parent composite metadata 1076 * @param metadataName The name of metadata to index 1077 * @param document The solr document to index into 1078 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1079 * @param fieldName The index field name 1080 * @param language The content language. 1081 * @param definition The metadata definition 1082 */ 1083 public void indexUserMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, MetadataDefinition definition) 1084 { 1085 UserIdentity[] users = metadata.getUserArray(metadataName); 1086 indexUserValues(users, document, contentDoc, fieldName, language); 1087 } 1088 1089 /** 1090 * Index 'user' values 1091 * @param users The users 1092 * @param document The solr document to index into 1093 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1094 * @param fieldName The index field name 1095 * @param language The content language. 1096 */ 1097 public void indexUserValues(UserIdentity[] users, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language) 1098 { 1099 int count = 0; 1100 for (UserIdentity userIdentity : users) 1101 { 1102 String fullName = _userHelper.getUserFullName(userIdentity); 1103 String sortableName = _userHelper.getUserSortableName(userIdentity); 1104 String identityAsString = UserIdentity.userIdentityToString(userIdentity); 1105 1106 indexFulltextValue(document, contentDoc, identityAsString, language); 1107 1108 // Facets 1109 document.addField(fieldName + "_s_dv", identityAsString); 1110 1111 // Dynamic fields 1112 document.addField(fieldName + "_s", identityAsString); 1113 1114 if (StringUtils.isNotEmpty(fullName)) 1115 { 1116 document.addField(fieldName + "_txt", fullName); 1117 1118 indexFulltextValue(document, contentDoc, fullName, language); 1119 } 1120 1121 String sortField = SolrFieldHelper.getMetadataSortFieldName(MetadataType.STRING, fieldName); 1122 if (count == 0 && StringUtils.isNotEmpty(sortableName) && !document.containsKey(sortField)) 1123 { 1124 // Index only first user for sorting 1125 document.addField(sortField, SolrFieldHelper.getSortValue(sortableName)); 1126 } 1127 count++; 1128 } 1129 } 1130 1131 /** 1132 * Index a 'boolean' metadata 1133 * @param metadata The parent composite metadata 1134 * @param metadataName The name of metadata to index 1135 * @param document The solr document to index into 1136 * @param fieldName The index field name 1137 * @param definition The metadata definition 1138 */ 1139 public void indexBooleanMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition) 1140 { 1141 boolean[] values = metadata.getBooleanArray(metadataName, new boolean[0]); 1142 indexBooleanValues(ArrayUtils.toObject(values), document, fieldName); 1143 } 1144 1145 /** 1146 * Index 'boolean' values 1147 * @param values The values 1148 * @param document The solr document to index into 1149 * @param fieldName The index field name 1150 */ 1151 public void indexBooleanValues(Boolean[] values, SolrInputDocument document, String fieldName) 1152 { 1153 for (Boolean value : values) 1154 { 1155 document.addField(fieldName + "_b", value); 1156 document.addField(fieldName + "_b_dv", value); 1157 } 1158 1159 String sortField = fieldName + "_b_sort"; 1160 if (values.length > 0 && !document.containsKey(sortField)) 1161 { 1162 document.addField(sortField, values[0]); 1163 } 1164 } 1165 1166 /** 1167 * Index a 'richtext' metadata 1168 * @param metadata The parent composite metadata 1169 * @param metadataName The name of metadata to index 1170 * @param document The solr document to index into 1171 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1172 * @param fieldName The index field name 1173 * @param language The content language. 1174 * @param definition The metadata definition 1175 */ 1176 public void indexRichtextMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, MetadataDefinition definition) 1177 { 1178 indexRichtextValue(metadata.getRichText(metadataName), document, contentDoc, fieldName, language); 1179 } 1180 1181 /** 1182 * Index 'richtext' values 1183 * @param richText The rich text to index. 1184 * @param document The solr document to index into 1185 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1186 * @param fieldName The index field name. 1187 * @param language The content language. 1188 */ 1189 public void indexRichtextValue(RichText richText, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language) 1190 { 1191 try (InputStream is = richText.getInputStream()) 1192 { 1193 String value = _richTextToString(is); 1194 1195 if (language != null) // language can be null for multilingual content 1196 { 1197 // Index as a text field. 1198 document.addField(fieldName + "_txt_" + language, value); 1199 document.addField(fieldName + "_txt_stemmed_" + language, value); 1200 document.addField(fieldName + "_txt_ws_" + language, value); 1201 } 1202 1203 // Index in the full-text value. 1204 SolrContentIndexer.indexFulltextValue(document, value, language); 1205 1206 if (contentDoc != null) 1207 { 1208 SolrContentIndexer.indexFulltextValue(contentDoc, value, language); 1209 } 1210 } 1211 catch (Exception e) 1212 { 1213 getLogger().warn("Failed to index RICH_TEXT '" + fieldName + "'", e); 1214 } 1215 } 1216 1217 /** 1218 * Gets a XML as a string and extract the text only 1219 * @param is The inputstream of XML 1220 * @return The text or null if the XML is not well formed 1221 */ 1222 protected String _richTextToString(InputStream is) 1223 { 1224 SAXParser saxParser = null; 1225 try 1226 { 1227 RichTextHandler txtHandler = new RichTextHandler(); 1228 saxParser = (SAXParser) _manager.lookup(SAXParser.ROLE); 1229 saxParser.parse(new InputSource(is), txtHandler); 1230 return txtHandler.getValue().trim(); 1231 } 1232 catch (ServiceException e) 1233 { 1234 getLogger().error("Unable to get a SAX parser", e); 1235 return null; 1236 } 1237 catch (IOException | SAXException e) 1238 { 1239 getLogger().error("Cannot parse inputstream", e); 1240 return null; 1241 } 1242 finally 1243 { 1244 _manager.release(saxParser); 1245 } 1246 } 1247 1248 1249 1250 /** 1251 * Index a 'binary' metadata 1252 * @param metadata The parent composite metadata 1253 * @param metadataName The name of metadata to index 1254 * @param document The solr document to index into 1255 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1256 * @param fieldName The index field name 1257 * @param language The content language. 1258 * @param definition The metadata definition 1259 */ 1260 public void indexBinaryMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, MetadataDefinition definition) 1261 { 1262 // Index file name. 1263 BinaryMetadata binary = metadata.getBinaryMetadata(metadataName); 1264 document.addField(fieldName + "_txt", binary.getFilename()); 1265 1266 // Index the contents. 1267 indexFullTextBinary(metadata, metadataName, document, contentDoc, fieldName, language, definition); 1268 } 1269 1270 /** 1271 * Index a 'file' metadata 1272 * @param metadata The parent composite metadata 1273 * @param metadataName The name of metadata to index 1274 * @param document The solr document to index into 1275 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1276 * @param fieldName The index field name 1277 * @param language The content language. 1278 * @param definition The metadata definition 1279 */ 1280 public void indexFileMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, MetadataDefinition definition) 1281 { 1282 if (org.ametys.plugins.repository.metadata.CompositeMetadata.MetadataType.BINARY.equals(metadata.getType(metadataName))) 1283 { 1284 indexBinaryMetadata(metadata, metadataName, document, contentDoc, fieldName, language, definition); 1285 } 1286 else 1287 { 1288 // Resource from the explorer. 1289 String value = metadata.getString(metadataName); 1290 1291 try 1292 { 1293 Resource resource = (Resource) _resolver.resolveById(value); 1294 1295 // Index file name. 1296 document.addField(fieldName + "_txt", resource.getName()); 1297 1298 // Index the contents. 1299 indexResourceContent(resource, document, contentDoc, language); 1300 1301// document.addField(prefix + fieldName + "$path", resource.getId()); 1302// document.addField(prefix + fieldName + "$type", "explorer"); 1303// document.addField(prefix + fieldName + "$mime-type", resource.getMimeType()); 1304// document.addField(prefix + fieldName + "$filename", filename); 1305// document.addField(prefix + fieldName + "$lastModified", resource.getLastModified()); 1306// document.addField(prefix + fieldName + "$size", resource.getLength()); 1307// 1308// String viewUrl = "/plugins/explorer/resource?id=" + resource.getId(); 1309// document.addField(prefix + fieldName + "$viewUrl", viewUrl); 1310// document.addField(prefix + fieldName + "$downloadUrl", viewUrl + "&download=true"); 1311 } 1312 catch (AmetysRepositoryException e) 1313 { 1314 getLogger().warn(String.format("Unable to index the resource of id '%s' : resource does not exist.", value), e); 1315 } 1316 } 1317 } 1318 1319 /** 1320 * Index a 'file' metadata 1321 * @param values The values. 1322 * @param document The solr document to index into 1323 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1324 * @param fieldName The index field name 1325 * @param language The content language. 1326 */ 1327 public void indexFileValue(Object[] values, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language) 1328 { 1329 String type = (String) values[0]; 1330 if (StringUtils.equalsIgnoreCase(org.ametys.plugins.repository.metadata.CompositeMetadata.MetadataType.BINARY.name(), type)) 1331 { 1332 indexFullTextBinaryValue((InputStream) values[1], document, contentDoc, fieldName, language); 1333 } 1334 else 1335 { 1336 indexResourceContent((Resource) values[1], document, contentDoc, language); 1337 } 1338 } 1339 1340 /** 1341 * Index a 'binary' metadata 1342 * @param metadata The parent composite metadata 1343 * @param metadataName The name of metadata to index 1344 * @param document The solr document to index into 1345 * @param contentDoc The content document. 1346 * @param fieldName The index field name 1347 * @param language The content language. 1348 * @param definition The metadata definition 1349 */ 1350 protected void indexFullTextBinary(CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, MetadataDefinition definition) 1351 { 1352 try (InputStream is = metadata.getBinaryMetadata(metadataName).getInputStream()) 1353 { 1354 indexFullTextBinaryValue(is, document, contentDoc, fieldName, language); 1355 } 1356 catch (IOException e) 1357 { 1358 throw new RuntimeException(e); 1359 } 1360 } 1361 1362 /** 1363 * Index a 'binary' value 1364 * @param is An InputStream on the binary data. 1365 * @param document The solr document to index into 1366 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1367 * @param fieldName The index field name 1368 * @param language The content language. 1369 */ 1370 protected void indexFullTextBinaryValue(InputStream is, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language) 1371 { 1372 try 1373 { 1374 String text = _tika.parseToString(is); 1375 1376 indexFulltextValue(document, contentDoc, text, language); 1377 } 1378 catch (Throwable e) 1379 { 1380 getLogger().warn(String.format("Failed to index binary field '%s'", fieldName), e); 1381 } 1382 } 1383 1384 /** 1385 * Index a 'content' metadata 1386 * @param metadata The parent composite metadata 1387 * @param metadataName The name of metadata to index 1388 * @param document The solr document to index into 1389 * @param fieldName The index field name 1390 * @param definition The metadata definition 1391 */ 1392 public void indexContentMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition) 1393 { 1394 String[] contentIds = metadata.getStringArray(metadataName, new String[0]); 1395 indexContentValues(contentIds, document, fieldName); 1396 } 1397 1398 /** 1399 * Index content values. 1400 * @param contentIds The ID of the contents to index. 1401 * @param document The solr document to index into. 1402 * @param fieldName the field name. 1403 */ 1404 public void indexContentValues (String[] contentIds, SolrInputDocument document, String fieldName) 1405 { 1406 for (String contentId : contentIds) 1407 { 1408 document.addField(fieldName + "_s", contentId); 1409 // Facets 1410 document.addField(fieldName + "_s_dv", contentId); 1411 } 1412 1413 String sortField = SolrFieldHelper.getMetadataSortFieldName(MetadataType.STRING, fieldName); 1414 if (contentIds.length > 0 && !document.containsKey(sortField)) 1415 { 1416 try 1417 { 1418 // TODO Est-ce qu'on peut faire autrement qu'un resolve ? 1419 Content content = _resolver.resolveById(contentIds[0]); 1420 CompositeMetadata metadataHolder = content.getMetadataHolder(); 1421 if (org.ametys.plugins.repository.metadata.CompositeMetadata.MetadataType.MULTILINGUAL_STRING.equals(metadataHolder.getType(DefaultContent.METADATA_TITLE))) 1422 { 1423 MultilingualString value = metadataHolder.getMultilingualString(DefaultContent.METADATA_TITLE); 1424 for (Locale locale : value.getLocales()) 1425 { 1426 String str = value.getValue(locale); 1427 document.addField(sortField + "_" + locale.getLanguage(), SolrFieldHelper.getSortValue(str)); 1428 } 1429 } 1430 else 1431 { 1432 document.addField(sortField, SolrFieldHelper.getSortValue(_contentHelper.getTitle(content))); 1433 } 1434 } 1435 catch (AmetysRepositoryException e) 1436 { 1437 // Do not index 1438 } 1439 } 1440 } 1441 1442 /** 1443 * Index a 'sub_content' metadata 1444 * @param metadata The parent composite metadata 1445 * @param metadataName The name of metadata to index 1446 * @param document The solr document to index into 1447 * @param fieldName The index field name 1448 * @param definition The metadata definition 1449 */ 1450 public void indexSubContentMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition) 1451 { 1452 TraversableAmetysObject objectCollection = metadata.getObjectCollection(metadataName); 1453 AmetysObjectIterable<Content> subcontents = objectCollection.getChildren(); 1454 for (Content subcontent : subcontents) 1455 { 1456 document.addField(fieldName + "_s", subcontent.getId()); 1457 // Facets 1458 document.addField(fieldName + "_s_dv", subcontent.getId()); 1459 } 1460 1461// String sortField = fieldName + "_s_sort"; 1462 String sortField = SolrFieldHelper.getMetadataSortFieldName(MetadataType.STRING, fieldName); 1463 subcontents = objectCollection.getChildren(); 1464 Iterator<Content> it = subcontents.iterator(); 1465 1466 if (it.hasNext() && !document.containsKey(sortField)) 1467 { 1468 Content subcontent = it.next(); 1469 CompositeMetadata metadataHolder = subcontent.getMetadataHolder(); 1470 if (org.ametys.plugins.repository.metadata.CompositeMetadata.MetadataType.MULTILINGUAL_STRING.equals(metadataHolder.getType(DefaultContent.METADATA_TITLE))) 1471 { 1472 MultilingualString value = metadataHolder.getMultilingualString(DefaultContent.METADATA_TITLE); 1473 for (Locale locale : value.getLocales()) 1474 { 1475 String str = value.getValue(locale); 1476 document.addField(sortField + "_" + locale.getLanguage(), SolrFieldHelper.getSortValue(str)); 1477 } 1478 } 1479 else 1480 { 1481 document.addField(sortField, SolrFieldHelper.getSortValue(_contentHelper.getTitle(subcontent))); 1482 } 1483 } 1484 } 1485 1486 /** 1487 * Index a 'geocode' metadata 1488 * @param metadata The parent composite metadata 1489 * @param metadataName The name of metadata to index 1490 * @param document The solr document to index into 1491 * @param fieldName The index field name 1492 * @param definition The metadata definition 1493 */ 1494 public void indexGeoCodeMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition) 1495 { 1496 CompositeMetadata geoCodeMetadata = metadata.getCompositeMetadata(metadataName); 1497 if (geoCodeMetadata.hasMetadata("longitude") && geoCodeMetadata.hasMetadata("latitude")) 1498 { 1499 double longitude = geoCodeMetadata.getDouble("longitude"); 1500 double latitude = geoCodeMetadata.getDouble("latitude"); 1501 1502 indexGeocodeValue(latitude, longitude, document, fieldName); 1503 } 1504 } 1505 1506 /** 1507 * Index a 'geocode' metadata 1508 * @param latitude the coord latitude. 1509 * @param longitude the coord longitude. 1510 * @param document The solr document to index into 1511 * @param fieldName The index field name 1512 */ 1513 public void indexGeocodeValue(double latitude, double longitude, SolrInputDocument document, String fieldName) 1514 { 1515 document.addField(fieldName + "$longitude_d", longitude); 1516 document.addField(fieldName + "$latitude_d", latitude); 1517 1518 String geoFieldName = SolrFieldHelper.getIndexingFieldName(MetadataType.GEOCODE, fieldName); 1519 document.addField(geoFieldName, longitude + " " + latitude); 1520 } 1521 1522 /** 1523 * Index a composite metadata, i.e. browse and index the sub-metadatas. 1524 * @param content The content being indexed. 1525 * @param metadata The parent metadata. 1526 * @param metadataName The composite metadata name. 1527 * @param document The solr document to index into. 1528 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1529 * @param fieldName The field name. 1530 * @param definition The composite metadata definition. 1531 * @param additionalDocuments The solr additional documents used for repeater instance 1532 */ 1533 public void indexCompositeMetadata(Content content, CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, MetadataDefinition definition, List<SolrInputDocument> additionalDocuments) 1534 { 1535 CompositeMetadata compositeMetadata = metadata.getCompositeMetadata(metadataName); 1536 1537 // Index recursively 1538 Set<String> subMetadataNames = definition.getMetadataNames(); 1539 for (String subMetadataName : subMetadataNames) 1540 { 1541 if (compositeMetadata.hasMetadata(subMetadataName)) 1542 { 1543 indexMetadata(content, subMetadataName, compositeMetadata, document, contentDoc, additionalDocuments, fieldName + ContentConstants.METADATA_PATH_SEPARATOR + subMetadataName, definition.getMetadataDefinition(subMetadataName)); 1544 } 1545 } 1546 } 1547 1548 /** 1549 * Index a repeater metadata, i.e. browse and index the entries. 1550 * @param content The content being indexed. 1551 * @param metadata The parent metadata. 1552 * @param metadataName The repeater metadata name. 1553 * @param document The solr document to index into. 1554 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1555 * @param fieldName The field name. 1556 * @param definition The repeater metadata definition. 1557 * @param additionalDocuments The solr additional documents used for repeater instance 1558 */ 1559 public void indexRepeaterMetadata(Content content, CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, MetadataDefinition definition, List<SolrInputDocument> additionalDocuments) 1560 { 1561 CompositeMetadata compositeMetadata = metadata.getCompositeMetadata(metadataName); 1562 1563 // Get and sort the entry names. 1564 String[] entries = compositeMetadata.getMetadataNames(); 1565 Arrays.sort(entries, MetadataManager.REPEATER_ENTRY_COMPARATOR); 1566 1567 for (int i = 0; i < entries.length; i++) 1568 { 1569 String entryName = entries[i]; 1570 int position = i + 1; 1571 1572 CompositeMetadata entry = compositeMetadata.getCompositeMetadata(entryName); 1573 1574 String repeaterID = document.getField("id").getFirstValue().toString() + "/" + fieldName + "/" + entryName; 1575 1576 // Creates a new Solr document for each entry 1577 SolrInputDocument repDocument = new SolrInputDocument(); 1578 repDocument.addField("id", repeaterID); 1579 document.addField(fieldName + "_s_dv", repeaterID); 1580 1581 repDocument.addField(DOCUMENT_TYPE, TYPE_REPEATER); 1582 repDocument.addField(REPEATER_ENTRY_POSITION, position); 1583 // Add the created document to additional documents 1584 additionalDocuments.add(repDocument); 1585 1586 SolrInputDocument parentContentDoc = contentDoc != null ? contentDoc : document; 1587 1588 Set<String> subMetadataNames = definition.getMetadataNames(); 1589 for (String subMetadataName : subMetadataNames) 1590 { 1591 if (entry.hasMetadata(subMetadataName)) 1592 { 1593 // Created document is now the main document 1594 indexMetadata(content, subMetadataName, entry, repDocument, parentContentDoc, additionalDocuments, subMetadataName, definition.getMetadataDefinition(subMetadataName)); 1595 } 1596 } 1597 } 1598 } 1599 1600 /** 1601 * Index the content of a resource. 1602 * @param resource The resource 1603 * @param document The solr document to index into 1604 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1605 * @param language The content language. 1606 */ 1607 protected void indexResourceContent(Resource resource, SolrInputDocument document, SolrInputDocument contentDoc, String language) 1608 { 1609 try (InputStream is = resource.getInputStream()) 1610 { 1611 indexResourceContentValue(is, resource.getDCSubject(), resource.getDCDescription(), language, document, contentDoc); 1612 1613 // TODO Declare and index DC metadata? 1614 // DC meta 1615// _resourceIndexer.indexDublinCoreMetadata(resource, document); 1616 } 1617 catch (Exception e) 1618 { 1619 getLogger().error("Unable to index resource at " + resource.getPath(), e); 1620 } 1621 } 1622 1623 /** 1624 * Index the content of a resource. 1625 * @param is An input stream on the resource content. 1626 * @param keywords The resource keywords. 1627 * @param description The resource description. 1628 * @param language The content language. 1629 * @param document The solr document to index into 1630 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1631 * @throws TikaException If an error occurs extracting the document's text content. 1632 * @throws IOException If an error occurs reading the document's text content. 1633 */ 1634 protected void indexResourceContentValue(InputStream is, String[] keywords, String description, String language, SolrInputDocument document, SolrInputDocument contentDoc) throws IOException, TikaException 1635 { 1636 String value = _tika.parseToString(is); 1637 1638 indexFulltextValue(document, contentDoc, value, language); 1639 1640 for (String keyword : keywords) 1641 { 1642 indexFulltextValue(document, contentDoc, keyword, language); 1643 } 1644 1645 if (description != null) 1646 { 1647 indexFulltextValue(document, contentDoc, description, language); 1648 } 1649 } 1650 1651 /** 1652 * Index a full-text value. 1653 * @param mainDocument The document being used, can be either the content document itself or a repeater document. 1654 * @param contentDoc The parent content document. If the mainDocument is the content document, this will be null. 1655 * @param text The text to index. 1656 * @param language The content language. 1657 */ 1658 protected void indexFulltextValue(SolrInputDocument mainDocument, SolrInputDocument contentDoc, String text, String language) 1659 { 1660 indexFulltextValue(mainDocument, text, language); 1661 1662 // The content doc is null if the main document is the content doc (to prevent indexing the data twice). 1663 if (contentDoc != null) 1664 { 1665 indexFulltextValue(contentDoc, text, language); 1666 } 1667 } 1668 1669 /** 1670 * Index a full-text value. 1671 * @param document The document to index into. 1672 * @param text The text to index. 1673 * @param language The content language. 1674 */ 1675 public static void indexFulltextValue(SolrInputDocument document, String text, String language) 1676 { 1677 if (StringUtils.isNotBlank(text)) 1678 { 1679 document.addField(FULL_GENERAL, text); 1680 document.addField(FULL_EXACT_WS, text); 1681 1682 if (StringUtils.isNotEmpty(language)) 1683 { 1684 indexLanguageFulltextValue(document, text, language); 1685 } 1686 } 1687 } 1688 1689 /** 1690 * Index a full-text value. 1691 * @param document The document to index into. 1692 * @param text The text to index. 1693 * @param languages The languages. 1694 */ 1695 public static void indexFulltextValue(SolrInputDocument document, String text, Collection<String> languages) 1696 { 1697 if (StringUtils.isNotBlank(text)) 1698 { 1699 document.addField(FULL_GENERAL, text); 1700 document.addField(FULL_EXACT_WS, text); 1701 1702 for (String language : languages) 1703 { 1704 indexLanguageFulltextValue(document, text, language); 1705 } 1706 } 1707 } 1708 1709 /** 1710 * Index a full-text value in the language-specific fields. 1711 * @param document The document to index into. 1712 * @param text The text to index. 1713 * @param language The content language. 1714 */ 1715 protected static void indexLanguageFulltextValue(SolrInputDocument document, String text, String language) 1716 { 1717 document.addField(FULL_PREFIX + language, text); 1718 document.addField(FULL_STEMMED_PREFIX + language, text); 1719 } 1720}