001/* 002 * Copyright 2015 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.cms.content.indexing.solr; 017 018import java.io.IOException; 019import java.io.InputStream; 020import java.nio.ByteBuffer; 021import java.nio.CharBuffer; 022import java.nio.charset.CharsetDecoder; 023import java.nio.charset.CodingErrorAction; 024import java.nio.charset.StandardCharsets; 025import java.util.Arrays; 026import java.util.Collection; 027import java.util.Date; 028import java.util.HashMap; 029import java.util.Iterator; 030import java.util.List; 031import java.util.Map; 032import java.util.Set; 033 034import org.apache.avalon.framework.component.Component; 035import org.apache.avalon.framework.service.ServiceException; 036import org.apache.avalon.framework.service.ServiceManager; 037import org.apache.avalon.framework.service.Serviceable; 038import org.apache.commons.lang3.ArrayUtils; 039import org.apache.commons.lang3.StringUtils; 040import org.apache.excalibur.xml.sax.SAXParser; 041import org.apache.solr.common.SolrInputDocument; 042import org.apache.tika.Tika; 043import org.apache.tika.exception.TikaException; 044import org.xml.sax.InputSource; 045import org.xml.sax.SAXException; 046 047import org.ametys.cms.content.RichTextHandler; 048import org.ametys.cms.contenttype.ContentConstants; 049import org.ametys.cms.contenttype.ContentType; 050import org.ametys.cms.contenttype.ContentTypeExtensionPoint; 051import org.ametys.cms.contenttype.ContentTypesHelper; 052import org.ametys.cms.contenttype.MetadataDefinition; 053import org.ametys.cms.contenttype.MetadataManager; 054import org.ametys.cms.contenttype.MetadataType; 055import org.ametys.cms.contenttype.RepeaterDefinition; 056import org.ametys.cms.contenttype.indexing.CustomIndexingField; 057import org.ametys.cms.contenttype.indexing.CustomMetadataIndexingField; 058import org.ametys.cms.contenttype.indexing.IndexingField; 059import org.ametys.cms.contenttype.indexing.IndexingModel; 060import org.ametys.cms.contenttype.indexing.MetadataIndexingField; 061import org.ametys.cms.repository.Content; 062import org.ametys.cms.search.model.SystemProperty; 063import org.ametys.cms.search.model.SystemPropertyExtensionPoint; 064import org.ametys.core.user.UserIdentity; 065import org.ametys.plugins.core.user.UserHelper; 066import org.ametys.plugins.explorer.resources.Resource; 067import org.ametys.plugins.explorer.resources.metadata.TikaProvider; 068import org.ametys.plugins.repository.AmetysObjectIterable; 069import org.ametys.plugins.repository.AmetysObjectResolver; 070import org.ametys.plugins.repository.AmetysRepositoryException; 071import org.ametys.plugins.repository.TraversableAmetysObject; 072import org.ametys.plugins.repository.UnknownAmetysObjectException; 073import org.ametys.plugins.repository.metadata.BinaryMetadata; 074import org.ametys.plugins.repository.metadata.CompositeMetadata; 075import org.ametys.plugins.repository.metadata.RichText; 076import org.ametys.runtime.plugin.component.AbstractLogEnabled; 077 078/** 079 * Component for {@link Content} indexing into a Solr server. 080 */ 081public class SolrContentIndexer extends AbstractLogEnabled implements Component, Serviceable, SolrFieldNames 082{ 083 /** The component role. */ 084 public static final String ROLE = SolrContentIndexer.class.getName(); 085 086 private static final int __SOLR_STRING_NB_BYTES_LIMIT = 32766; 087 088 /** The Ametys objet resolver */ 089 protected AmetysObjectResolver _resolver; 090 /** The content type extension point */ 091 protected ContentTypeExtensionPoint _cTypeEP; 092 /** The content type helper */ 093 protected ContentTypesHelper _cTypesHelper; 094 /** The users manager */ 095 protected UserHelper _userHelper; 096 /** The Tika instance */ 097 protected Tika _tika; 098 /** The resource indexer */ 099 protected SolrResourceIndexer _resourceIndexer; 100 /** The sax parser */ 101 protected SAXParser _parser; 102 /** The system property extension point. */ 103 protected SystemPropertyExtensionPoint _systemPropEP; 104 105 @Override 106 public void service(ServiceManager manager) throws ServiceException 107 { 108 _resolver = (AmetysObjectResolver) manager.lookup(AmetysObjectResolver.ROLE); 109 _resourceIndexer = (SolrResourceIndexer) manager.lookup(SolrResourceIndexer.ROLE); 110 _cTypeEP = (ContentTypeExtensionPoint) manager.lookup(ContentTypeExtensionPoint.ROLE); 111 _cTypesHelper = (ContentTypesHelper) manager.lookup(ContentTypesHelper.ROLE); 112 _userHelper = (UserHelper) manager.lookup(UserHelper.ROLE); 113 TikaProvider tikaProvider = (TikaProvider) manager.lookup(TikaProvider.ROLE); 114 _tika = tikaProvider.getTika(); 115 _parser = (SAXParser) manager.lookup(SAXParser.ROLE); 116 _systemPropEP = (SystemPropertyExtensionPoint) manager.lookup(SystemPropertyExtensionPoint.ROLE); 117 } 118 119 /** 120 * Populate a solr input document by adding fields to index into it. 121 * @param content The content to index 122 * @param document The main solr document to index into 123 * @param additionalDocuments The additional documents for repeater instances 124 * @throws Exception if an error occurred while indexing 125 */ 126 public void indexContent(Content content, SolrInputDocument document, List<SolrInputDocument> additionalDocuments) throws Exception 127 { 128 // Properties specific to a stand-alone indexation. 129 String contentId = content.getId(); 130 document.addField(ID, contentId); 131 document.addField(DOCUMENT_TYPE, TYPE_CONTENT); 132 document.addField(TITLE, _truncateUtf8StringValue(content.getTitle(), contentId, TITLE)); 133 document.addField(CONTENT_NAME, _truncateUtf8StringValue(content.getName(), contentId, CONTENT_NAME)); 134 135 document.addField(WORKFLOW_REF_DV, contentId + "#workflow"); 136 137 // Index content system properties. 138 indexSystemProperties(content, document); 139 140 // Index the fields specified in the indexation model. 141 indexModelFields(content, document, additionalDocuments); 142 } 143 144 /** 145 * Index the system properties of a content. 146 * @param content The content to index. 147 * @param document The solr document to index into. 148 */ 149 protected void indexSystemProperties(Content content, SolrInputDocument document) 150 { 151 for (String sysPropId : _systemPropEP.getExtensionsIds()) 152 { 153 SystemProperty sysProp = _systemPropEP.getExtension(sysPropId); 154 155 sysProp.index(content, document); 156 } 157 } 158 159 /** 160 * Index the content type and all its supertypes in the given document (recursively). 161 * @param cTypeId The ID of the content type to index. 162 * @param document The solr document to index into. 163 * @param fieldName The field name. 164 */ 165 protected void indexAllContentTypes(String cTypeId, SolrInputDocument document, String fieldName) 166 { 167 document.addField(fieldName, cTypeId); 168 169 if (_cTypeEP.hasExtension(cTypeId)) 170 { 171 ContentType contentType = _cTypeEP.getExtension(cTypeId); 172 for (String supertypeId : contentType.getSupertypeIds()) 173 { 174 indexAllContentTypes(supertypeId, document, fieldName); 175 } 176 } 177 } 178 179 /** 180 * Index the fields specified in the indexation model. 181 * @param content The content to index. 182 * @param document The main content solr document. 183 * @param additionalDocuments The additional documents for repeater instances. 184 */ 185 protected void indexModelFields(Content content, SolrInputDocument document, List<SolrInputDocument> additionalDocuments) 186 { 187 IndexingModel indexingModel = null; 188 try 189 { 190 indexingModel = _cTypesHelper.getIndexingModel(content); 191 } 192 catch (RuntimeException e) 193 { 194 getLogger().error("indexContent > Error getting the indexing model of content " + content.getId(), e); 195 throw e; 196 } 197 198 for (IndexingField field : indexingModel.getFields()) 199 { 200 if (field instanceof CustomIndexingField) 201 { 202 Object[] values = ((CustomIndexingField) field).getValues(content); 203 indexValues(content, field.getName(), field.getType(), values, document, null); 204 } 205 else if (field instanceof MetadataIndexingField) 206 { 207 String metadataPath = ((MetadataIndexingField) field).getMetadataPath(); 208 String[] pathSegments = metadataPath.split(ContentConstants.METADATA_PATH_SEPARATOR); 209 210 MetadataDefinition definition = _cTypesHelper.getMetadataDefinition(pathSegments[0], content.getTypes(), content.getMixinTypes()); 211 if (definition != null) 212 { 213 findAndIndexMetadata(content, pathSegments, content.getMetadataHolder(), definition, field, field.getName(), document, null, additionalDocuments); 214 } 215 } 216 } 217 } 218 219 /** 220 * Populate a Solr input document by adding fields for a single system property. 221 * @param content The content to index 222 * @param propertyId The system property ID. 223 * @param document The solr document 224 * @throws Exception if an error occurred 225 */ 226 public void indexPartialSystemProperty(Content content, String propertyId, SolrInputDocument document) throws Exception 227 { 228 if (!_systemPropEP.hasExtension(propertyId)) 229 { 230 throw new IllegalStateException("The property '" + propertyId + "' can't be indexed as it does not exist."); 231 } 232 233 SolrInputDocument tempDocument = new SolrInputDocument(); 234 235 SystemProperty property = _systemPropEP.getExtension(propertyId); 236 property.index(content, tempDocument); 237 238 // Copy the indexed values as partial updates. 239 for (String fieldName : tempDocument.getFieldNames()) 240 { 241 Collection<Object> fieldValues = tempDocument.getFieldValues(fieldName); 242 243 Map<String, Object> partialUpdate = new HashMap<>(); 244 partialUpdate.put("set", fieldValues); 245 document.addField(fieldName, partialUpdate); 246 } 247 248 document.addField("id", content.getId()); 249 } 250 251 /** 252 * Find the metadata to index from its path 253 * @param content the content currently being traversed. 254 * @param pathSegments The segments of path of metadata to index 255 * @param metadata The parent composite metadata 256 * @param definition The metadata definition 257 * @param field the current indexing field. 258 * @param fieldName the name of the field to index. 259 * @param document The main solr document to index into 260 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 261 * @param additionalDocuments The additional documents 262 */ 263 protected void findAndIndexMetadata(Content content, String[] pathSegments, CompositeMetadata metadata, MetadataDefinition definition, IndexingField field, String fieldName, SolrInputDocument document, SolrInputDocument contentDoc, List<SolrInputDocument> additionalDocuments) 264 { 265 String currentFieldName = pathSegments[0]; 266 267 IndexingModel indexingModel = null; 268 try 269 { 270 indexingModel = _cTypesHelper.getIndexingModel(content); 271 } 272 catch (RuntimeException e) 273 { 274 if (content != null) 275 { 276 getLogger().error("findAndIndexMetadata > Error while indexing content " + content.getId() + " metadata", e); 277 } 278 else 279 { 280 getLogger().error("findAndIndexMetadata > Error while indexing null content metadata"); 281 } 282 throw e; 283 } 284 285 IndexingField refField = indexingModel.getField(currentFieldName); 286 if (refField != null && refField instanceof CustomMetadataIndexingField) 287 { 288 CustomMetadataIndexingField overridingField = (CustomMetadataIndexingField) refField; 289 findAndIndexOverridingField(content, indexingModel, overridingField, fieldName, definition, pathSegments, document, contentDoc, additionalDocuments); 290 } 291 else 292 { 293 if (metadata.hasMetadata(currentFieldName)) 294 { 295 findAndIndexMetadataField(content, pathSegments, metadata, definition, field, fieldName, document, contentDoc, additionalDocuments); 296 } 297 } 298 } 299 300 /** 301 * Find and index a metadata. 302 * @param content the current content being traversed. 303 * @param pathSegments the full metadata path segments. 304 * @param metadata the current metadata holder. 305 * @param definition the current metadata definition. 306 * @param field the current indexing field. 307 * @param fieldName the name of the field to index. 308 * @param document the solr main document. 309 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 310 * @param additionalDocuments the solr additional documents. 311 */ 312 protected void findAndIndexMetadataField(Content content, String[] pathSegments, CompositeMetadata metadata, MetadataDefinition definition, IndexingField field, String fieldName, SolrInputDocument document, SolrInputDocument contentDoc, List<SolrInputDocument> additionalDocuments) 313 { 314 String currentFieldName = pathSegments[0]; 315 316 if (pathSegments.length == 1) 317 { 318 indexMetadata(content, currentFieldName, metadata, document, contentDoc, additionalDocuments, fieldName, definition); 319 return; 320 } 321 322 String[] followingSegments = ArrayUtils.subarray(pathSegments, 1, pathSegments.length); 323 324 switch (definition.getType()) 325 { 326 case BINARY: 327 case BOOLEAN: 328 case STRING: 329 case USER: 330 case LONG: 331 case DOUBLE: 332 case DATE: 333 case DATETIME: 334 case REFERENCE: 335 case RICH_TEXT: 336 case FILE: 337 case GEOCODE: 338 getLogger().warn("The metadata '{}' of type {} can not be a part of a path to index : {}", currentFieldName, definition.getType().toString(), StringUtils.join(pathSegments, ContentConstants.METADATA_PATH_SEPARATOR)); 339 break; 340 case CONTENT: 341 String[] contentIds = metadata.getStringArray(currentFieldName, new String[0]); 342 for (String contentId : contentIds) 343 { 344 try 345 { 346 Content refContent = _resolver.resolveById(contentId); 347 MetadataDefinition remoteMetadataDef = _cTypesHelper.getMetadataDefinition(followingSegments[0], refContent.getTypes(), refContent.getMixinTypes()); 348 findAndIndexMetadata(refContent, followingSegments, refContent.getMetadataHolder(), remoteMetadataDef, field, fieldName, document, contentDoc, additionalDocuments); 349 } 350 catch (UnknownAmetysObjectException e) 351 { 352 // Nothing to index 353 } 354 } 355 break; 356 case SUB_CONTENT: 357 TraversableAmetysObject objectCollection = metadata.getObjectCollection(currentFieldName); 358 AmetysObjectIterable<Content> subcontents = objectCollection.getChildren(); 359 for (Content subcontent : subcontents) 360 { 361 MetadataDefinition remoteMetadataDef = _cTypesHelper.getMetadataDefinition(followingSegments[0], subcontent.getTypes(), subcontent.getMixinTypes()); 362 findAndIndexMetadata(subcontent, followingSegments, subcontent.getMetadataHolder(), remoteMetadataDef, field, fieldName, document, contentDoc, additionalDocuments); 363 } 364 break; 365 case COMPOSITE: 366 CompositeMetadata composite = metadata.getCompositeMetadata(currentFieldName); 367 368 if (definition instanceof RepeaterDefinition) 369 { 370 String[] entries = composite.getMetadataNames(); 371 for (String entry : entries) 372 { 373 findAndIndexMetadata(content, followingSegments, composite.getCompositeMetadata(entry), definition.getMetadataDefinition(followingSegments[0]), field, fieldName, document, contentDoc, additionalDocuments); 374 } 375 } 376 else 377 { 378 findAndIndexMetadata(content, followingSegments, composite, definition.getMetadataDefinition(followingSegments[0]), field, fieldName, document, contentDoc, additionalDocuments); 379 } 380 break; 381 default: 382 break; 383 384 } 385 } 386 387 /** 388 * Find and index a property represented by an overriding field. 389 * @param content the current content being traversed. 390 * @param indexingModel the current indexing model. 391 * @param pathSegments the full metadata path segments. 392 * @param definition the current metadata definition. 393 * @param field the current indexing field. 394 * @param fieldName the name of the field to index. 395 * @param document the solr main document. 396 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 397 * @param additionalDocuments the solr additional documents. 398 */ 399 protected void findAndIndexOverridingField(Content content, IndexingModel indexingModel, CustomMetadataIndexingField field, String fieldName, MetadataDefinition definition, String[] pathSegments, SolrInputDocument document, SolrInputDocument contentDoc, List<SolrInputDocument> additionalDocuments) 400 { 401 String currentFieldName = field.getName(); 402 403 if (pathSegments.length == 1) 404 { 405 indexOverridingField(field, content, fieldName, document, contentDoc, additionalDocuments); 406 return; 407 } 408 409 String[] followingSegments = ArrayUtils.subarray(pathSegments, 1, pathSegments.length); 410 MetadataType type = definition.getType(); 411 412 switch (type) 413 { 414 case BINARY: 415 case BOOLEAN: 416 case STRING: 417 case USER: 418 case LONG: 419 case DOUBLE: 420 case DATE: 421 case DATETIME: 422 case REFERENCE: 423 case RICH_TEXT: 424 case FILE: 425 case GEOCODE: 426 getLogger().warn("The field '{}' of type {} can not be a part of a path to index : {}", currentFieldName, type.toString(), StringUtils.join(pathSegments, ContentConstants.METADATA_PATH_SEPARATOR)); 427 break; 428 case COMPOSITE: 429 getLogger().warn("The type {} is invalid for the overriding field '{}'.", type.toString(), currentFieldName); 430 break; 431 case CONTENT: 432 case SUB_CONTENT: 433 String[] contentIds = (String[]) field.getValues(content); 434 for (String contentId : contentIds) 435 { 436 Content refContent = _resolver.resolveById(contentId); 437 MetadataDefinition remoteMetadataDef = _cTypesHelper.getMetadataDefinition(followingSegments[0], refContent.getTypes(), refContent.getMixinTypes()); 438 findAndIndexMetadata(refContent, followingSegments, refContent.getMetadataHolder(), remoteMetadataDef, field, fieldName, document, contentDoc, additionalDocuments); 439 } 440 break; 441 default: 442 break; 443 } 444 } 445 446 /** 447 * Index a content metadata. 448 * @param content the current content being traversed. 449 * @param metadataName The name of metadata to index 450 * @param metadata The parent composite metadata 451 * @param document the solr document to index into. 452 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 453 * @param additionalDocuments The solr additional documents used for repeater instance 454 * @param fieldName the name of the indexed field. 455 * @param definition the metadata definition. 456 */ 457 public void indexMetadata(Content content, String metadataName, CompositeMetadata metadata, SolrInputDocument document, SolrInputDocument contentDoc, List<SolrInputDocument> additionalDocuments, String fieldName, MetadataDefinition definition) 458 { 459 String language = content.getLanguage(); 460 461 switch (definition.getType()) 462 { 463 case STRING: 464 indexStringMetadata(metadata, metadataName, content.getId(), document, contentDoc, fieldName, language, definition); 465 break; 466 case USER: 467 indexUserMetadata(metadata, metadataName, document, contentDoc, fieldName, language, definition); 468 break; 469 case GEOCODE: 470 indexGeoCodeMetadata(metadata, metadataName, document, fieldName, definition); 471 break; 472 case BINARY: 473 indexBinaryMetadata(metadata, metadataName, document, contentDoc, fieldName, language, definition); 474 break; 475 case FILE: 476 indexFileMetadata(metadata, metadataName, document, contentDoc, fieldName, language, definition); 477 break; 478 case DATE: 479 indexDateMetadata(metadata, metadataName, document, fieldName, definition); 480 break; 481 case DATETIME: 482 indexDateTimeMetadata(metadata, metadataName, document, fieldName, definition); 483 break; 484 case CONTENT: 485 indexContentMetadata(metadata, metadataName, document, fieldName, definition); 486 break; 487 case SUB_CONTENT: 488 indexSubContentMetadata(metadata, metadataName, document, fieldName, definition); 489 break; 490 case LONG: 491 indexLongMetadata(metadata, metadataName, document, fieldName, definition); 492 break; 493 case DOUBLE: 494 indexDoubleMetadata(metadata, metadataName, document, fieldName, definition); 495 break; 496 case BOOLEAN: 497 indexBooleanMetadata(metadata, metadataName, document, fieldName, definition); 498 break; 499 case RICH_TEXT: 500 indexRichtextMetadata(metadata, metadataName, document, contentDoc, fieldName, language, definition); 501 break; 502 case COMPOSITE: 503 if (definition instanceof RepeaterDefinition) 504 { 505 indexRepeaterMetadata(content, metadata, metadataName, document, contentDoc, fieldName, definition, additionalDocuments); 506 } 507 else 508 { 509 indexCompositeMetadata(content, metadata, metadataName, document, contentDoc, fieldName, definition, additionalDocuments); 510 } 511 break; 512 case REFERENCE: 513 // TODO reference -> to be indexed? 514 break; 515 default: 516 break; 517 } 518 } 519 520 /** 521 * Index a property represented by an overriding field. 522 * @param field The overriding field. 523 * @param content The content of which to get the property. 524 * @param fieldName The name of the field to index. 525 * @param document the solr document to index into. 526 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 527 * @param additionalDocuments The solr additional documents used for repeater instance 528 */ 529 public void indexOverridingField(CustomMetadataIndexingField field, Content content, String fieldName, SolrInputDocument document, SolrInputDocument contentDoc, List<SolrInputDocument> additionalDocuments) 530 { 531 Object[] values = field.getValues(content); 532 MetadataDefinition definition = field.getMetadataDefinition(); 533 boolean isFacetable = definition.getEnumerator() != null; 534 String language = content.getLanguage(); 535 536 switch (definition.getType()) 537 { 538 case STRING: 539 indexStringValues((String[]) values, content.getId(), document, contentDoc, fieldName, language, isFacetable); 540 break; 541 case USER: 542 UserIdentity[] users = new UserIdentity[values.length]; 543 for (int i = 0; i < values.length; i++) 544 { 545 users[i] = UserIdentity.stringToUserIdentity((String) values[i]); 546 } 547 indexUserValues(users, document, contentDoc, fieldName, language); 548 break; 549 case GEOCODE: 550 if (values.length > 1) 551 { 552 indexGeocodeValue((double) values[0], (double) values[1], document, fieldName); 553 } 554 break; 555 case BINARY: 556 if (values.length > 0) 557 { 558 indexFullTextBinaryValue((InputStream) values[0], document, contentDoc, fieldName, language); 559 } 560 break; 561 case FILE: 562 indexFileValue(values, document, contentDoc, fieldName, language); 563 break; 564 case DATE: 565 indexDateValues((Date[]) values, document, fieldName); 566 break; 567 case DATETIME: 568 indexDateTimeValues((Date[]) values, document, fieldName); 569 break; 570 case CONTENT: 571 indexContentValues((String[]) values, document, fieldName); 572 break; 573 case SUB_CONTENT: 574 indexContentValues((String[]) values, document, fieldName); 575 break; 576 case LONG: 577 indexLongValues((Long[]) values, document, fieldName, isFacetable); 578 break; 579 case DOUBLE: 580 indexDoubleValues((Double[]) values, document, fieldName, isFacetable); 581 break; 582 case BOOLEAN: 583 indexBooleanValues((Boolean[]) values, document, fieldName); 584 break; 585 case RICH_TEXT: 586 if (values.length > 0) 587 { 588 indexRichtextValue((RichText) values[0], document, contentDoc, fieldName, language); 589 } 590 break; 591 case COMPOSITE: 592 break; 593 case REFERENCE: 594 // TODO reference -> to be indexed? 595 break; 596 default: 597 break; 598 } 599 } 600 601 /** 602 * Index values 603 * @param content The content being indexed. 604 * @param fieldName The Solr field's name 605 * @param type the type of values to index 606 * @param values the values 607 * @param document the Solr document 608 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 609 */ 610 public void indexValues(Content content, String fieldName, MetadataType type, Object[] values, SolrInputDocument document, SolrInputDocument contentDoc) 611 { 612 String language = content.getLanguage(); 613 614 switch (type) 615 { 616 case STRING: 617 indexStringValues((String[]) values, content.getId(), document, contentDoc, fieldName, language, false); 618 break; 619 case LONG: 620 indexLongValues((Long[]) values, document, fieldName, false); 621 break; 622 case DOUBLE: 623 indexDoubleValues((Double[]) values, document, fieldName, false); 624 break; 625 case DATE: 626 indexDateValues((Date[]) values, document, fieldName); 627 break; 628 case DATETIME: 629 indexDateTimeValues((Date[]) values, document, fieldName); 630 break; 631 case CONTENT: 632 indexContentValues((String[]) values, document, fieldName); 633 break; 634 case BOOLEAN: 635 indexBooleanValues((Boolean[]) values, document, fieldName); 636 break; 637 case USER: 638 UserIdentity[] users = new UserIdentity[values.length]; 639 for (int i = 0; i < values.length; i++) 640 { 641 users[i] = UserIdentity.stringToUserIdentity((String) values[i]); 642 } 643 indexUserValues(users, document, contentDoc, fieldName, language); 644 break; 645 case RICH_TEXT: 646 indexRichtextValue((RichText) values[0], document, contentDoc, fieldName, language); 647 break; 648 case BINARY: 649 case FILE: 650 case COMPOSITE: 651 case REFERENCE: 652 case SUB_CONTENT: 653 case GEOCODE: 654 getLogger().warn("Only primitive type is allowed on a custom indexing field"); 655 break; 656 default: 657 break; 658 } 659 } 660 661 662 /** 663 * Index a 'string' metadata 664 * @param metadata The parent composite metadata 665 * @param metadataName The name of metadata to index 666 * @param contentId The content id. For logging purposes 667 * @param document The solr document to index into 668 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 669 * @param fieldName The index field name 670 * @param language The content language. 671 * @param definition The metadata definition 672 */ 673 public void indexStringMetadata(CompositeMetadata metadata, String metadataName, String contentId, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, MetadataDefinition definition) 674 { 675 String[] strValues = metadata.getStringArray(metadataName, new String[0]); 676 indexStringValues(strValues, contentId, document, contentDoc, fieldName, language, definition.getEnumerator() != null); 677 } 678 679 /** 680 * Index 'string' values 681 * @param values The values 682 * @param contentId The content id. For logging purposes 683 * @param document The solr document to index into 684 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 685 * @param fieldName The index field name 686 * @param language The content language. 687 * @param isFacetable true if the field can be used as a facet. 688 */ 689 public void indexStringValues(String[] values, String contentId, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, boolean isFacetable) 690 { 691 for (String value : values) 692 { 693 if (!isFacetable) 694 { 695 // No enumerator: index as full-text. 696 document.addField(fieldName + "_txt_" + language, value); 697 document.addField(fieldName + "_txt_stemmed_" + language, value); 698 document.addField(fieldName + "_txt_ws_" + language, value); 699 } 700 else 701 { 702 // Facets (enumeration only) 703 document.addField(fieldName + "_s_dv", value); 704 } 705 706 // Index without analyzing. 707 String possiblyTruncatedValue = _truncateUtf8StringValue(value, contentId, fieldName); 708 document.addField(fieldName + "_s", possiblyTruncatedValue); 709 710 // Index without analyzing but lower-case (for wildcard queries). 711 document.addField(fieldName + "_s_lower", possiblyTruncatedValue.toLowerCase()); 712 713 // Exact words tokenized by whitespace. 714 document.addField(fieldName + "_s_ws", value.toLowerCase()); 715 716 // Index with analyze (full-text search). 717 document.addField(fieldName + "_txt", value); 718 719 indexFulltextValue(document, contentDoc, value, language); 720 } 721 722 String sortField = SolrFieldHelper.getMetadataSortFieldName(MetadataType.STRING, fieldName); 723 if (values.length > 0 && !document.containsKey(sortField)) 724 { 725 // FIXME Si la meta est enumerée, indexer le label ? dans quelle langue ? 726 document.addField(sortField, SolrFieldHelper.getSortValue(values[0])); 727 } 728 } 729 730 private String _truncateUtf8StringValue(String value, String contentId /*logging purpose*/, String fieldName /*logging purpose*/) 731 { 732 if (value.length() * 4 <= __SOLR_STRING_NB_BYTES_LIMIT) 733 { 734 // With UTF-8, a character is encoded using 1, 2, 3 or 4 bytes, so (value.length() <= value.getBytes().length <= 4 * value.length()) 735 // As a result, value.getBytes().length <= limit 736 return value; 737 } 738 739 // There is a doubt, the string may need to be truncated (or not) 740 byte[] valueBytes = value.getBytes(StandardCharsets.UTF_8); 741 int bytesLength = valueBytes.length; 742 if (bytesLength <= __SOLR_STRING_NB_BYTES_LIMIT) 743 { 744 return value; 745 } 746 747 getLogger().warn("The string value for content '{}' and field name '{}' is longer ({}) than the max bytes length {}. It will be truncated to prevent Solr error, but you should consider verifying why this string is so long.", contentId, fieldName, bytesLength, __SOLR_STRING_NB_BYTES_LIMIT); 748 749 // Need a truncation (inspired by https://stackoverflow.com/questions/119328/how-do-i-truncate-a-java-string-to-fit-in-a-given-number-of-bytes-once-utf-8-en#answer-35148974) 750 CharBuffer charBuffer = CharBuffer.allocate(__SOLR_STRING_NB_BYTES_LIMIT); 751 CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder() 752 .onMalformedInput(CodingErrorAction.IGNORE); 753 decoder.decode(ByteBuffer.wrap(valueBytes, 0, __SOLR_STRING_NB_BYTES_LIMIT), charBuffer, true); 754 decoder.flush(charBuffer); 755 return new String(charBuffer.array(), 0, charBuffer.position()); 756 } 757 758 /** 759 * Index a 'date' metadata 760 * @param metadata The parent composite metadata 761 * @param metadataName The name of metadata to index 762 * @param document The solr document to index into 763 * @param fieldName The index field name 764 * @param definition The metadata definition 765 */ 766 public void indexDateMetadata (CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition) 767 { 768 Date[] dateValues = metadata.getDateArray(metadataName, new Date[0]); 769 indexDateValues (dateValues, document, fieldName); 770 } 771 772 /** 773 * Index 'date' values 774 * @param values The values 775 * @param document The solr document to index into 776 * @param fieldName The index field name 777 */ 778 public void indexDateValues (Date[] values, SolrInputDocument document, String fieldName) 779 { 780 for (Date value : values) 781 { 782 document.addField(fieldName + "_dt", SolrIndexer.dateFormat().format(value)); 783 } 784 785 String sortField = fieldName + "_dt_sort"; 786 if (values.length > 0 && !document.containsKey(sortField)) 787 { 788 document.addField(sortField, SolrIndexer.dateFormat().format(values[0])); 789 } 790 } 791 792 /** 793 * Index a 'datetime' metadata 794 * @param metadata The parent composite metadata 795 * @param metadataName The name of metadata to index 796 * @param document The solr document to index into 797 * @param fieldName The index field name 798 * @param definition The metadata definition 799 */ 800 public void indexDateTimeMetadata (CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition) 801 { 802 Date[] dateValues = metadata.getDateArray(metadataName, new Date[0]); 803 indexDateTimeValues(dateValues, document, fieldName); 804 } 805 806 /** 807 * Index 'datetime' values 808 * @param values The values 809 * @param document The solr document to index into 810 * @param fieldName The index field name 811 */ 812 public void indexDateTimeValues (Date[] values, SolrInputDocument document, String fieldName) 813 { 814 for (Date value : values) 815 { 816 document.addField(fieldName + "_dt", SolrIndexer.dateFormat().format(value)); 817 } 818 819 String sortField = fieldName + "_dt_sort"; 820 if (values.length > 0 && !document.containsKey(sortField)) 821 { 822 document.addField(sortField, SolrIndexer.dateFormat().format(values[0])); 823 } 824 } 825 826 /** 827 * Index a 'double' metadata 828 * @param metadata The parent composite metadata 829 * @param metadataName The name of metadata to index 830 * @param document The solr document to index into 831 * @param fieldName The index field name 832 * @param definition The metadata definition 833 */ 834 public void indexDoubleMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition) 835 { 836 boolean isFacetable = definition.getEnumerator() != null; 837 double[] values = metadata.getDoubleArray(metadataName, new double[0]); 838 indexDoubleValues (ArrayUtils.toObject(values), document, fieldName, isFacetable); 839 } 840 841 /** 842 * Index 'double' values 843 * @param values The values 844 * @param document The solr document to index into 845 * @param fieldName The index field name 846 * @param isFacetable true if the field can be used as a facet. 847 */ 848 public void indexDoubleValues(Double[] values, SolrInputDocument document, String fieldName, boolean isFacetable) 849 { 850 for (Double value : values) 851 { 852 document.addField(fieldName + "_d", value); 853 if (isFacetable) 854 { 855 document.addField(fieldName + "_d_dv", value); 856 } 857 } 858 859 String sortField = fieldName + "_d_sort"; 860 if (values.length > 0 && !document.containsKey(sortField)) 861 { 862 document.addField(sortField, values[0]); 863 } 864 } 865 866 /** 867 * Index a 'long' metadata 868 * @param metadata The parent composite metadata 869 * @param metadataName The name of metadata to index 870 * @param document The solr document to index into 871 * @param fieldName The index field name 872 * @param definition The metadata definition 873 */ 874 public void indexLongMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition) 875 { 876 boolean isFacetable = definition.getEnumerator() != null; 877 long[] values = metadata.getLongArray(metadataName, new long[0]); 878 indexLongValues(ArrayUtils.toObject(values), document, fieldName, isFacetable); 879 } 880 881 /** 882 * Index 'long' values 883 * @param values The values 884 * @param document The solr document to index into 885 * @param fieldName The index field name 886 * @param isFacetable true if the field can be used as a facet. 887 */ 888 public void indexLongValues(Long[] values, SolrInputDocument document, String fieldName, boolean isFacetable) 889 { 890 for (Long value : values) 891 { 892 document.addField(fieldName + "_l", value); 893 if (isFacetable) 894 { 895 document.addField(fieldName + "_l_dv", value); 896 } 897 } 898 899 String sortField = fieldName + "_l_sort"; 900 if (values.length > 0 && !document.containsKey(sortField)) 901 { 902 document.addField(sortField, values[0]); 903 } 904 } 905 906 /** 907 * Index a 'user' metadata 908 * @param metadata The parent composite metadata 909 * @param metadataName The name of metadata to index 910 * @param document The solr document to index into 911 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 912 * @param fieldName The index field name 913 * @param language The content language. 914 * @param definition The metadata definition 915 */ 916 public void indexUserMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, MetadataDefinition definition) 917 { 918 UserIdentity[] users = metadata.getUserArray(metadataName); 919 indexUserValues(users, document, contentDoc, fieldName, language); 920 } 921 922 /** 923 * Index 'user' values 924 * @param users The users 925 * @param document The solr document to index into 926 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 927 * @param fieldName The index field name 928 * @param language The content language. 929 */ 930 public void indexUserValues(UserIdentity[] users, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language) 931 { 932 int count = 0; 933 for (UserIdentity userIdentity : users) 934 { 935 String fullName = _userHelper.getUserFullName(userIdentity); 936 String identityAsString = UserIdentity.userIdentityToString(userIdentity); 937 938 indexFulltextValue(document, contentDoc, identityAsString, language); 939 940 // Dynamic fields 941 document.addField(fieldName + "_s", identityAsString); 942 943 if (StringUtils.isNotEmpty(fullName)) 944 { 945 document.addField(fieldName + "_txt", fullName); 946 947 indexFulltextValue(document, contentDoc, fullName, language); 948 } 949 950 String sortField = SolrFieldHelper.getMetadataSortFieldName(MetadataType.STRING, fieldName); 951 if (count == 0 && StringUtils.isNotEmpty(fullName) && !document.containsKey(sortField)) 952 { 953 // Index only first user for sorting 954 document.addField(sortField, SolrFieldHelper.getSortValue(fullName)); 955 } 956 count++; 957 } 958 } 959 960 /** 961 * Index a 'boolean' metadata 962 * @param metadata The parent composite metadata 963 * @param metadataName The name of metadata to index 964 * @param document The solr document to index into 965 * @param fieldName The index field name 966 * @param definition The metadata definition 967 */ 968 public void indexBooleanMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition) 969 { 970 boolean[] values = metadata.getBooleanArray(metadataName, new boolean[0]); 971 indexBooleanValues(ArrayUtils.toObject(values), document, fieldName); 972 } 973 974 /** 975 * Index 'boolean' values 976 * @param values The values 977 * @param document The solr document to index into 978 * @param fieldName The index field name 979 */ 980 public void indexBooleanValues(Boolean[] values, SolrInputDocument document, String fieldName) 981 { 982 for (Boolean value : values) 983 { 984 document.addField(fieldName + "_b", value); 985 } 986 987 String sortField = fieldName + "_b_sort"; 988 if (values.length > 0 && !document.containsKey(sortField)) 989 { 990 document.addField(sortField, values[0]); 991 } 992 } 993 994 /** 995 * Index a 'richtext' metadata 996 * @param metadata The parent composite metadata 997 * @param metadataName The name of metadata to index 998 * @param document The solr document to index into 999 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1000 * @param fieldName The index field name 1001 * @param language The content language. 1002 * @param definition The metadata definition 1003 */ 1004 public void indexRichtextMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, MetadataDefinition definition) 1005 { 1006 indexRichtextValue(metadata.getRichText(metadataName), document, contentDoc, fieldName, language); 1007 } 1008 1009 /** 1010 * Index 'richtext' values 1011 * @param richText The rich text to index. 1012 * @param document The solr document to index into 1013 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1014 * @param fieldName The index field name. 1015 * @param language The content language. 1016 */ 1017 public void indexRichtextValue(RichText richText, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language) 1018 { 1019 try (InputStream is = richText.getInputStream()) 1020 { 1021 String value = _richTextToString(is); 1022 1023 // Index as a text field. 1024 document.addField(fieldName + "_txt_" + language, value); 1025 document.addField(fieldName + "_txt_stemmed_" + language, value); 1026 document.addField(fieldName + "_txt_ws_" + language, value); 1027 1028 // Index in the full-text value. 1029 SolrContentIndexer.indexFulltextValue(document, value, language); 1030 1031 if (contentDoc != null) 1032 { 1033 SolrContentIndexer.indexFulltextValue(contentDoc, value, language); 1034 } 1035 } 1036 catch (Exception e) 1037 { 1038 getLogger().warn("Failed to index RICH_TEXT value'", e); 1039 } 1040 } 1041 1042 /** 1043 * Gets a XML as a string and extract the text only 1044 * @param is The inputstream of XML 1045 * @return The text or null if the XML is not well formed 1046 */ 1047 protected String _richTextToString(InputStream is) 1048 { 1049 try 1050 { 1051 RichTextHandler txtHandler = new RichTextHandler(); 1052 _parser.parse(new InputSource(is), txtHandler); 1053 return txtHandler.getValue().trim(); 1054 } 1055 catch (IOException | SAXException e) 1056 { 1057 getLogger().error("Cannot parse inputstream", e); 1058 return null; 1059 } 1060 } 1061 1062 1063 1064 /** 1065 * Index a 'binary' metadata 1066 * @param metadata The parent composite metadata 1067 * @param metadataName The name of metadata to index 1068 * @param document The solr document to index into 1069 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1070 * @param fieldName The index field name 1071 * @param language The content language. 1072 * @param definition The metadata definition 1073 */ 1074 public void indexBinaryMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, MetadataDefinition definition) 1075 { 1076 // Index file name. 1077 BinaryMetadata binary = metadata.getBinaryMetadata(metadataName); 1078 document.addField(fieldName + "_txt", binary.getFilename()); 1079 1080 // Index the contents. 1081 indexFullTextBinary(metadata, metadataName, document, contentDoc, fieldName, language, definition); 1082 } 1083 1084 /** 1085 * Index a 'file' metadata 1086 * @param metadata The parent composite metadata 1087 * @param metadataName The name of metadata to index 1088 * @param document The solr document to index into 1089 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1090 * @param fieldName The index field name 1091 * @param language The content language. 1092 * @param definition The metadata definition 1093 */ 1094 public void indexFileMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, MetadataDefinition definition) 1095 { 1096 if (org.ametys.plugins.repository.metadata.CompositeMetadata.MetadataType.BINARY.equals(metadata.getType(metadataName))) 1097 { 1098 indexBinaryMetadata(metadata, metadataName, document, contentDoc, fieldName, language, definition); 1099 } 1100 else 1101 { 1102 // Resource from the explorer. 1103 String value = metadata.getString(metadataName); 1104 1105 try 1106 { 1107 Resource resource = (Resource) _resolver.resolveById(value); 1108 1109 // Index file name. 1110 document.addField(fieldName + "_txt", resource.getName()); 1111 1112 // Index the contents. 1113 indexResourceContent(resource, document, contentDoc, language); 1114 1115// document.addField(prefix + fieldName + "$path", resource.getId()); 1116// document.addField(prefix + fieldName + "$type", "explorer"); 1117// document.addField(prefix + fieldName + "$mime-type", resource.getMimeType()); 1118// document.addField(prefix + fieldName + "$filename", filename); 1119// document.addField(prefix + fieldName + "$lastModified", resource.getLastModified()); 1120// document.addField(prefix + fieldName + "$size", resource.getLength()); 1121// 1122// String viewUrl = "/plugins/explorer/resource?id=" + resource.getId(); 1123// document.addField(prefix + fieldName + "$viewUrl", viewUrl); 1124// document.addField(prefix + fieldName + "$downloadUrl", viewUrl + "&download=true"); 1125 } 1126 catch (AmetysRepositoryException e) 1127 { 1128 getLogger().warn(String.format("Unable to index the resource of id '%s' : resource does not exist.", value), e); 1129 } 1130 } 1131 } 1132 1133 /** 1134 * Index a 'file' metadata 1135 * @param values The values. 1136 * @param document The solr document to index into 1137 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1138 * @param fieldName The index field name 1139 * @param language The content language. 1140 */ 1141 public void indexFileValue(Object[] values, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language) 1142 { 1143 String type = (String) values[0]; 1144 if (org.ametys.plugins.repository.metadata.CompositeMetadata.MetadataType.BINARY.equals(type)) 1145 { 1146 indexFullTextBinaryValue((InputStream) values[1], document, contentDoc, fieldName, language); 1147 } 1148 else 1149 { 1150 indexResourceContent((Resource) values[1], document, contentDoc, language); 1151 } 1152 } 1153 1154 /** 1155 * Index a 'binary' metadata 1156 * @param metadata The parent composite metadata 1157 * @param metadataName The name of metadata to index 1158 * @param document The solr document to index into 1159 * @param contentDoc The content document. 1160 * @param fieldName The index field name 1161 * @param language The content language. 1162 * @param definition The metadata definition 1163 */ 1164 protected void indexFullTextBinary(CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language, MetadataDefinition definition) 1165 { 1166 try (InputStream is = metadata.getBinaryMetadata(metadataName).getInputStream()) 1167 { 1168 indexFullTextBinaryValue(is, document, contentDoc, fieldName, language); 1169 } 1170 catch (IOException e) 1171 { 1172 throw new RuntimeException(e); 1173 } 1174 } 1175 1176 /** 1177 * Index a 'binary' value 1178 * @param is An InputStream on the binary data. 1179 * @param document The solr document to index into 1180 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1181 * @param fieldName The index field name 1182 * @param language The content language. 1183 */ 1184 protected void indexFullTextBinaryValue(InputStream is, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, String language) 1185 { 1186 try 1187 { 1188 String text = _tika.parseToString(is); 1189 1190 indexFulltextValue(document, contentDoc, text, language); 1191 } 1192 catch (Throwable e) 1193 { 1194 getLogger().warn(String.format("Failed to index binary field '%s'", fieldName), e); 1195 } 1196 } 1197 1198 /** 1199 * Index a 'content' metadata 1200 * @param metadata The parent composite metadata 1201 * @param metadataName The name of metadata to index 1202 * @param document The solr document to index into 1203 * @param fieldName The index field name 1204 * @param definition The metadata definition 1205 */ 1206 public void indexContentMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition) 1207 { 1208 String[] contentIds = metadata.getStringArray(metadataName, new String[0]); 1209 indexContentValues(contentIds, document, fieldName); 1210 } 1211 1212 /** 1213 * Index content values. 1214 * @param contentIds The ID of the contents to index. 1215 * @param document The solr document to index into. 1216 * @param fieldName the field name. 1217 */ 1218 public void indexContentValues (String[] contentIds, SolrInputDocument document, String fieldName) 1219 { 1220 for (String contentId : contentIds) 1221 { 1222 document.addField(fieldName + "_s", contentId); 1223 // Facets 1224 document.addField(fieldName + "_s_dv", contentId); 1225 } 1226 1227 String sortField = SolrFieldHelper.getMetadataSortFieldName(MetadataType.STRING, fieldName); 1228 if (contentIds.length > 0 && !document.containsKey(sortField)) 1229 { 1230 try 1231 { 1232 // TODO Est-ce qu'on peut faire autrement qu'un resolve ? 1233 Content content = _resolver.resolveById(contentIds[0]); 1234 document.addField(sortField, SolrFieldHelper.getSortValue(content.getTitle())); 1235 } 1236 catch (AmetysRepositoryException e) 1237 { 1238 // Do not index 1239 } 1240 } 1241 } 1242 1243 /** 1244 * Index a 'sub_content' metadata 1245 * @param metadata The parent composite metadata 1246 * @param metadataName The name of metadata to index 1247 * @param document The solr document to index into 1248 * @param fieldName The index field name 1249 * @param definition The metadata definition 1250 */ 1251 public void indexSubContentMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition) 1252 { 1253 TraversableAmetysObject objectCollection = metadata.getObjectCollection(metadataName); 1254 AmetysObjectIterable<Content> subcontents = objectCollection.getChildren(); 1255 for (Content subcontent : subcontents) 1256 { 1257 document.addField(fieldName + "_s", subcontent.getId()); 1258 // Facets 1259 document.addField(fieldName + "_s_dv", subcontent.getId()); 1260 } 1261 1262// String sortField = fieldName + "_s_sort"; 1263 String sortField = SolrFieldHelper.getMetadataSortFieldName(MetadataType.STRING, fieldName); 1264 subcontents = objectCollection.getChildren(); 1265 Iterator<Content> it = subcontents.iterator(); 1266 1267 if (it.hasNext() && !document.containsKey(sortField)) 1268 { 1269 Content subcontent = it.next(); 1270 document.addField(sortField, SolrFieldHelper.getSortValue(subcontent.getTitle())); 1271 } 1272 } 1273 1274 /** 1275 * Index a 'geocode' metadata 1276 * @param metadata The parent composite metadata 1277 * @param metadataName The name of metadata to index 1278 * @param document The solr document to index into 1279 * @param fieldName The index field name 1280 * @param definition The metadata definition 1281 */ 1282 public void indexGeoCodeMetadata(CompositeMetadata metadata, String metadataName, SolrInputDocument document, String fieldName, MetadataDefinition definition) 1283 { 1284 CompositeMetadata geoCodeMetadata = metadata.getCompositeMetadata(metadataName); 1285 if (geoCodeMetadata.hasMetadata("longitude") && geoCodeMetadata.hasMetadata("latitude")) 1286 { 1287 double longitude = geoCodeMetadata.getDouble("longitude"); 1288 double latitude = geoCodeMetadata.getDouble("latitude"); 1289 1290 indexGeocodeValue(latitude, longitude, document, fieldName); 1291 } 1292 } 1293 1294 /** 1295 * Index a 'geocode' metadata 1296 * @param latitude the coord latitude. 1297 * @param longitude the coord longitude. 1298 * @param document The solr document to index into 1299 * @param fieldName The index field name 1300 */ 1301 public void indexGeocodeValue(double latitude, double longitude, SolrInputDocument document, String fieldName) 1302 { 1303 document.addField(fieldName + "$longitude_d", longitude); 1304 document.addField(fieldName + "$latitude_d", latitude); 1305 1306 String geoFieldName = SolrFieldHelper.getIndexingFieldName(MetadataType.GEOCODE, fieldName); 1307 document.addField(geoFieldName, longitude + " " + latitude); 1308 } 1309 1310 /** 1311 * Index a composite metadata, i.e. browse and index the sub-metadatas. 1312 * @param content The content being indexed. 1313 * @param metadata The parent metadata. 1314 * @param metadataName The composite metadata name. 1315 * @param document The solr document to index into. 1316 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1317 * @param fieldName The field name. 1318 * @param definition The composite metadata definition. 1319 * @param additionalDocuments The solr additional documents used for repeater instance 1320 */ 1321 public void indexCompositeMetadata(Content content, CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, MetadataDefinition definition, List<SolrInputDocument> additionalDocuments) 1322 { 1323 CompositeMetadata compositeMetadata = metadata.getCompositeMetadata(metadataName); 1324 1325 // Index recursively 1326 Set<String> subMetadataNames = definition.getMetadataNames(); 1327 for (String subMetadataName : subMetadataNames) 1328 { 1329 if (compositeMetadata.hasMetadata(subMetadataName)) 1330 { 1331 indexMetadata(content, subMetadataName, compositeMetadata, document, contentDoc, additionalDocuments, fieldName + ContentConstants.METADATA_PATH_SEPARATOR + subMetadataName, definition.getMetadataDefinition(subMetadataName)); 1332 } 1333 } 1334 } 1335 1336 /** 1337 * Index a repeater metadata, i.e. browse and index the entries. 1338 * @param content The content being indexed. 1339 * @param metadata The parent metadata. 1340 * @param metadataName The repeater metadata name. 1341 * @param document The solr document to index into. 1342 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1343 * @param fieldName The field name. 1344 * @param definition The repeater metadata definition. 1345 * @param additionalDocuments The solr additional documents used for repeater instance 1346 */ 1347 public void indexRepeaterMetadata(Content content, CompositeMetadata metadata, String metadataName, SolrInputDocument document, SolrInputDocument contentDoc, String fieldName, MetadataDefinition definition, List<SolrInputDocument> additionalDocuments) 1348 { 1349 CompositeMetadata compositeMetadata = metadata.getCompositeMetadata(metadataName); 1350 1351 // Get and sort the entry names. 1352 String[] entries = compositeMetadata.getMetadataNames(); 1353 Arrays.sort(entries, MetadataManager.REPEATER_ENTRY_COMPARATOR); 1354 1355 for (int i = 0; i < entries.length; i++) 1356 { 1357 String entryName = entries[i]; 1358 int position = i + 1; 1359 1360 CompositeMetadata entry = compositeMetadata.getCompositeMetadata(entryName); 1361 1362 String repeaterID = document.getField("id").getFirstValue().toString() + "/" + fieldName + "/" + entryName; 1363 1364 // Creates a new Solr document for each entry 1365 SolrInputDocument repDocument = new SolrInputDocument(); 1366 repDocument.addField("id", repeaterID); 1367 document.addField(fieldName + "_s_dv", repeaterID); 1368 1369 repDocument.addField(DOCUMENT_TYPE, TYPE_REPEATER); 1370 repDocument.addField(REPEATER_ENTRY_POSITION, position); 1371 // Add the created document to additional documents 1372 additionalDocuments.add(repDocument); 1373 1374 SolrInputDocument parentContentDoc = contentDoc != null ? contentDoc : document; 1375 1376 Set<String> subMetadataNames = definition.getMetadataNames(); 1377 for (String subMetadataName : subMetadataNames) 1378 { 1379 if (entry.hasMetadata(subMetadataName)) 1380 { 1381 // Created document is now the main document 1382 indexMetadata(content, subMetadataName, entry, repDocument, parentContentDoc, additionalDocuments, subMetadataName, definition.getMetadataDefinition(subMetadataName)); 1383 } 1384 } 1385 } 1386 } 1387 1388 /** 1389 * Index the content of a resource. 1390 * @param resource The resource 1391 * @param document The solr document to index into 1392 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1393 * @param language The content language. 1394 */ 1395 protected void indexResourceContent(Resource resource, SolrInputDocument document, SolrInputDocument contentDoc, String language) 1396 { 1397 try (InputStream is = resource.getInputStream()) 1398 { 1399 indexResourceContentValue(is, resource.getDCSubject(), resource.getDCDescription(), language, document, contentDoc); 1400 1401 // TODO Declare and index DC metadata? 1402 // DC meta 1403// _resourceIndexer.indexDublinCoreMetadata(resource, document); 1404 } 1405 catch (Exception e) 1406 { 1407 getLogger().error("Unable to index resource at " + resource.getPath(), e); 1408 } 1409 } 1410 1411 /** 1412 * Index the content of a resource. 1413 * @param is An input stream on the resource content. 1414 * @param keywords The resource keywords. 1415 * @param description The resource description. 1416 * @param language The content language. 1417 * @param document The solr document to index into 1418 * @param contentDoc The parent content document to index into (can be null if the main document is the content doc itself). 1419 * @throws TikaException If an error occurs extracting the document's text content. 1420 * @throws IOException If an error occurs reading the document's text content. 1421 */ 1422 protected void indexResourceContentValue(InputStream is, String[] keywords, String description, String language, SolrInputDocument document, SolrInputDocument contentDoc) throws IOException, TikaException 1423 { 1424 String value = _tika.parseToString(is); 1425 1426 indexFulltextValue(document, contentDoc, value, language); 1427 1428 for (String keyword : keywords) 1429 { 1430 indexFulltextValue(document, contentDoc, keyword, language); 1431 } 1432 1433 if (description != null) 1434 { 1435 indexFulltextValue(document, contentDoc, description, language); 1436 } 1437 } 1438 1439 /** 1440 * Index a full-text value. 1441 * @param mainDocument The document being used, can be either the content document itself or a repeater document. 1442 * @param contentDoc The parent content document. If the mainDocument is the content document, this will be null. 1443 * @param text The text to index. 1444 * @param language The content language. 1445 */ 1446 protected void indexFulltextValue(SolrInputDocument mainDocument, SolrInputDocument contentDoc, String text, String language) 1447 { 1448 indexFulltextValue(mainDocument, text, language); 1449 1450 // The content doc is null if the main document is the content doc (to prevent indexing the data twice). 1451 if (contentDoc != null) 1452 { 1453 indexFulltextValue(contentDoc, text, language); 1454 } 1455 } 1456 1457 /** 1458 * Index a full-text value. 1459 * @param document The document to index into. 1460 * @param text The text to index. 1461 * @param language The content language. 1462 */ 1463 public static void indexFulltextValue(SolrInputDocument document, String text, String language) 1464 { 1465 if (StringUtils.isNotBlank(text)) 1466 { 1467 document.addField(FULL_GENERAL, text); 1468 document.addField(FULL_EXACT_WS, text); 1469 1470 indexLanguageFulltextValue(document, text, language); 1471 } 1472 } 1473 1474 /** 1475 * Index a full-text value. 1476 * @param document The document to index into. 1477 * @param text The text to index. 1478 * @param languages The languages. 1479 */ 1480 public static void indexFulltextValue(SolrInputDocument document, String text, Collection<String> languages) 1481 { 1482 if (StringUtils.isNotBlank(text)) 1483 { 1484 document.addField(FULL_GENERAL, text); 1485 document.addField(FULL_EXACT_WS, text); 1486 1487 for (String language : languages) 1488 { 1489 indexLanguageFulltextValue(document, text, language); 1490 } 1491 } 1492 } 1493 1494 /** 1495 * Index a full-text value in the language-specific fields. 1496 * @param document The document to index into. 1497 * @param text The text to index. 1498 * @param language The content language. 1499 */ 1500 protected static void indexLanguageFulltextValue(SolrInputDocument document, String text, String language) 1501 { 1502 document.addField(FULL_PREFIX + language, text); 1503 document.addField(FULL_STEMMED_PREFIX + language, text); 1504 } 1505}