001/* 002 * Copyright 2020 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.plugins.contentio.csv; 017 018import java.io.BufferedReader; 019import java.io.File; 020import java.io.FileInputStream; 021import java.io.IOException; 022import java.io.InputStreamReader; 023import java.nio.charset.Charset; 024import java.nio.file.Files; 025import java.nio.file.Path; 026import java.nio.file.Paths; 027import java.nio.file.StandardCopyOption; 028import java.util.ArrayList; 029import java.util.Arrays; 030import java.util.HashMap; 031import java.util.List; 032import java.util.Map; 033import java.util.UUID; 034import java.util.function.Function; 035import java.util.stream.Collectors; 036 037import org.apache.avalon.framework.parameters.Parameters; 038import org.apache.avalon.framework.service.ServiceException; 039import org.apache.avalon.framework.service.ServiceManager; 040import org.apache.cocoon.acting.ServiceableAction; 041import org.apache.cocoon.environment.ObjectModelHelper; 042import org.apache.cocoon.environment.Redirector; 043import org.apache.cocoon.environment.Request; 044import org.apache.cocoon.environment.SourceResolver; 045import org.apache.cocoon.servlet.multipart.Part; 046import org.apache.cocoon.servlet.multipart.PartOnDisk; 047import org.apache.cocoon.servlet.multipart.RejectedPart; 048import org.apache.commons.io.FileUtils; 049import org.apache.commons.io.FilenameUtils; 050import org.apache.commons.lang3.StringUtils; 051import org.apache.tika.detect.DefaultEncodingDetector; 052import org.apache.tika.io.TikaInputStream; 053import org.apache.tika.metadata.Metadata; 054import org.supercsv.io.CsvMapReader; 055import org.supercsv.io.ICsvMapReader; 056import org.supercsv.prefs.CsvPreference; 057 058import org.ametys.cms.contenttype.ContentType; 059import org.ametys.cms.contenttype.ContentTypeExtensionPoint; 060import org.ametys.cms.data.type.ModelItemTypeConstants; 061import org.ametys.core.cocoon.JSonReader; 062import org.ametys.plugins.workflow.support.WorkflowProvider; 063import org.ametys.runtime.i18n.I18nizableText; 064import org.ametys.runtime.model.ModelItem; 065import org.ametys.runtime.servlet.RuntimeConfig; 066 067/** 068 * Import contents from an uploaded CSV file. 069 */ 070public class ImportCSVFileAction extends ServiceableAction 071{ 072 073 private static final String[] _ALLOWED_EXTENSIONS = new String[] {"txt", "csv"}; 074 075 private static final String CONTENTIO_STORAGE_DIRECTORY = "contentio/temp"; 076 077 private ContentTypeExtensionPoint _contentTypeEP; 078 079 private WorkflowProvider _workflowProvider; 080 081 @Override 082 public void service(ServiceManager serviceManager) throws ServiceException 083 { 084 super.service(serviceManager); 085 _contentTypeEP = (ContentTypeExtensionPoint) serviceManager.lookup(ContentTypeExtensionPoint.ROLE); 086 _workflowProvider = (WorkflowProvider) serviceManager.lookup(WorkflowProvider.ROLE); 087 } 088 089 @Override 090 public Map act(Redirector redirector, SourceResolver resolver, Map objectModel, String source, Parameters parameters) throws Exception 091 { 092 Request request = ObjectModelHelper.getRequest(objectModel); 093 094 Map<String, Object> result = new HashMap<>(); 095 String contentTypeId = (String) request.get("contentType"); 096 ContentType contentType = _contentTypeEP.getExtension(contentTypeId); 097 Part part = (Part) request.get("file"); 098 if (part instanceof RejectedPart || part == null) 099 { 100 result.put("success", false); 101 result.put("error", "rejected"); 102 } 103 else 104 { 105 PartOnDisk uploadedFilePart = (PartOnDisk) part; 106 File uploadedFile = uploadedFilePart.getFile(); 107 108 String filename = uploadedFilePart.getFileName().toLowerCase(); 109 110 if (!FilenameUtils.isExtension(filename, _ALLOWED_EXTENSIONS)) 111 { 112 result.put("error", "invalid-extension"); 113 request.setAttribute(JSonReader.OBJECT_TO_READ, result); 114 return EMPTY_MAP; 115 } 116 TikaInputStream stream = TikaInputStream.get(uploadedFile.toPath()); 117 DefaultEncodingDetector defaultEncodingDetector = new DefaultEncodingDetector(); 118 Charset charset = defaultEncodingDetector.detect(stream, new Metadata()); 119 result.put("charset", charset); 120 String[] headers = _extractHeaders(uploadedFile, request, charset); 121 122 if (headers == null) 123 { 124 result.put("error", "no-header"); 125 request.setAttribute(JSonReader.OBJECT_TO_READ, result); 126 return EMPTY_MAP; 127 } 128 129 List<Map<String, String>> mapping = Arrays.asList(headers) 130 .stream() 131 .filter(StringUtils::isNotEmpty) 132 .map(header -> new HashMap<>(Map.of("header", header, 133 "attributePath", contentType.hasModelItem(header.replace("*", "").replace(".", "/")) ? header.replace("/", ".").replace("*", "") : "", 134 "isId", Boolean.toString(header.endsWith("*"))))) 135 .collect(Collectors.toList()); 136 result.put("mapping", mapping); 137 138 // Build a map to count how many attribute are there for each group 139 Map<String, Long> attributeCount = mapping.stream() 140 .map(map -> map.get("attributePath")) 141 .filter(StringUtils::isNotEmpty) 142 .map(attributePath -> { 143 String attributePrefix = ""; 144 int endIndex = attributePath.lastIndexOf("."); 145 if (endIndex != -1) 146 { 147 attributePrefix = attributePath.substring(0, endIndex); 148 } 149 return attributePrefix.replace(".", "/"); 150 }) 151 .collect(Collectors.groupingBy(Function.identity(), Collectors.counting())); 152 153 // If an attribute is the only for his group, it is the identifier 154 for (Map<String, String> map : mapping) 155 { 156 String attributePath = map.get("attributePath").replace(".", "/"); 157 String attributePrefix = StringUtils.EMPTY; 158 int endIndex = attributePath.lastIndexOf("/"); 159 if (endIndex != -1) 160 { 161 attributePrefix = attributePath.substring(0, endIndex); 162 } 163 164 boolean parentIsContent; 165 // If there is no prefix, it is an attribute of the content we want to import 166 if (attributePrefix.equals(StringUtils.EMPTY)) 167 { 168 parentIsContent = true; 169 } 170 // Otherwise, check the modelItem 171 else if (contentType.hasModelItem(attributePrefix)) 172 { 173 ModelItem modelItem = contentType.getModelItem(attributePrefix); 174 String modelTypeId = modelItem.getType().getId(); 175 parentIsContent = ModelItemTypeConstants.CONTENT_ELEMENT_TYPE_ID.equals(modelTypeId); 176 } 177 else 178 { 179 parentIsContent = false; 180 } 181 182 // If an attribute is the only one of its level, and is part of a content, consider it as the identifier 183 if (attributeCount.getOrDefault(attributePrefix, 0L).equals(1L) && parentIsContent && StringUtils.isNotBlank(attributePath)) 184 { 185 map.put("isId", "true"); 186 } 187 } 188 189 String newPath = _copyFile(uploadedFile.toPath()); 190 result.put("path", newPath); 191 result.put("success", true); 192 result.put("workflows", _getWorkflows()); 193 result.put("defaultWorkflow", contentType.getDefaultWorkflowName().orElse(null)); 194 result.put("fileName", uploadedFile.getName()); 195 } 196 197 request.setAttribute(JSonReader.OBJECT_TO_READ, result); 198 return EMPTY_MAP; 199 } 200 201 private String _copyFile(Path path) throws IOException 202 { 203 File contentIOStorageDir = FileUtils.getFile(RuntimeConfig.getInstance().getAmetysHome(), CONTENTIO_STORAGE_DIRECTORY); 204 205 if (!contentIOStorageDir.exists()) 206 { 207 if (!contentIOStorageDir.mkdirs()) 208 { 209 throw new IOException("Unable to create monitoring directory: " + contentIOStorageDir); 210 } 211 } 212 String id = UUID.randomUUID().toString() + ".csv"; 213 Path copy = Files.copy(path, Paths.get(contentIOStorageDir.getPath(), id), StandardCopyOption.REPLACE_EXISTING); 214 215 return copy.toString(); 216 217 } 218 219 private CsvPreference _getCSVPreference(Request request) 220 { 221 String escapingChar = (String) request.get("escaping-char"); 222 String separatingChar = (String) request.get("separating-char"); 223 char separating = separatingChar.charAt(0); 224 char escaping = escapingChar.charAt(0); 225 226 if (separating == escaping) 227 { 228 throw new IllegalArgumentException("Separating character can not be equals to escaping character"); 229 } 230 return new CsvPreference.Builder(escaping, separating, "\r\n").build(); 231 } 232 233 private String[] _extractHeaders(File uploadedFile, Request request, Charset charset) throws IOException 234 { 235 236 try (BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(uploadedFile), charset))) 237 { 238 reader.mark(8192); 239 240 CsvPreference preference = _getCSVPreference(request); 241 String headerLine = reader.readLine(); 242 if (headerLine != null) 243 { 244 reader.reset(); 245 try (ICsvMapReader mapReader = new CsvMapReader(reader, preference)) 246 { 247 String[] headers = mapReader.getHeader(true); 248 return headers; 249 } 250 } 251 else 252 { 253 return null; 254 } 255 } 256 } 257 258 /** 259 * getWorkflows 260 * @return map of workflows 261 */ 262 private List<Map<String, Object>> _getWorkflows() 263 { 264 List<Map<String, Object>> workflows = new ArrayList<>(); 265 String[] workflowNames = _workflowProvider.getAmetysObjectWorkflow().getWorkflowNames(); 266 for (String workflowName : workflowNames) 267 { 268 Map<String, Object> workflowMap = new HashMap<>(); 269 workflowMap.put("value", workflowName); 270 workflowMap.put("label", new I18nizableText("application", "WORKFLOW_" + workflowName)); 271 workflows.add(workflowMap); 272 } 273 return workflows; 274 } 275}