001/* 002 * (C) Copyright 2006-2018 Nuxeo (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Tiry 018 * Florent Guillaume 019 * Estelle Giuly <[email protected]> 020 */ 021package org.nuxeo.ecm.core.convert.service; 022 023import java.io.File; 024import java.io.IOException; 025import java.io.Serializable; 026import java.nio.file.Path; 027import java.util.ArrayList; 028import java.util.Collections; 029import java.util.HashMap; 030import java.util.List; 031import java.util.Map; 032import java.util.function.Function; 033import java.util.regex.Matcher; 034import java.util.regex.Pattern; 035 036import javax.ws.rs.core.MediaType; 037 038import org.apache.commons.io.FilenameUtils; 039import org.apache.commons.lang3.StringUtils; 040import org.apache.logging.log4j.LogManager; 041import org.apache.logging.log4j.Logger; 042import org.nuxeo.common.utils.FileUtils; 043import org.nuxeo.ecm.core.api.Blob; 044import org.nuxeo.ecm.core.api.blobholder.BlobHolder; 045import org.nuxeo.ecm.core.api.blobholder.SimpleBlobHolder; 046import org.nuxeo.ecm.core.api.impl.blob.StringBlob; 047import org.nuxeo.ecm.core.convert.api.ConversionException; 048import org.nuxeo.ecm.core.convert.api.ConversionService; 049import org.nuxeo.ecm.core.convert.api.ConversionStatus; 050import org.nuxeo.ecm.core.convert.api.ConverterCheckResult; 051import org.nuxeo.ecm.core.convert.api.ConverterNotAvailable; 052import org.nuxeo.ecm.core.convert.api.ConverterNotRegistered; 053import org.nuxeo.ecm.core.convert.cache.CacheKeyGenerator; 054import org.nuxeo.ecm.core.convert.cache.ConversionCacheHolder; 055import org.nuxeo.ecm.core.convert.cache.GCTask; 056import org.nuxeo.ecm.core.convert.extension.ChainedConverter; 057import org.nuxeo.ecm.core.convert.extension.Converter; 058import org.nuxeo.ecm.core.convert.extension.ConverterDescriptor; 059import org.nuxeo.ecm.core.convert.extension.ExternalConverter; 060import org.nuxeo.ecm.core.convert.extension.GlobalConfigDescriptor; 061import org.nuxeo.ecm.core.io.download.DownloadService; 062import org.nuxeo.ecm.core.transientstore.work.TransientStoreWork; 063import org.nuxeo.ecm.core.work.api.Work; 064import org.nuxeo.ecm.core.work.api.WorkManager; 065import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeEntry; 066import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeRegistry; 067import org.nuxeo.runtime.api.Framework; 068import org.nuxeo.runtime.model.ComponentContext; 069import org.nuxeo.runtime.model.ComponentInstance; 070import org.nuxeo.runtime.model.DefaultComponent; 071import org.nuxeo.runtime.services.config.ConfigurationService; 072 073/** 074 * Runtime Component that also provides the POJO implementation of the {@link ConversionService}. 075 */ 076public class ConversionServiceImpl extends DefaultComponent implements ConversionService { 077 078 private static final Logger log = LogManager.getLogger(ConversionServiceImpl.class); 079 080 public static final String CONVERTER_EP = "converter"; 081 082 public static final String CONFIG_EP = "configuration"; 083 084 /** 085 * @since 10.3 086 */ 087 public static final String ENFORCE_SOURCE_MIME_TYPE_CHECK = "nuxeo.convert.enforceSourceMimeTypeCheck"; 088 089 protected final Map<String, ConverterDescriptor> converterDescriptors = new HashMap<>(); 090 091 protected final MimeTypeTranslationHelper translationHelper = new MimeTypeTranslationHelper(); 092 093 protected final GlobalConfigDescriptor config = new GlobalConfigDescriptor(); 094 095 protected static ConversionServiceImpl self; 096 097 protected Thread gcThread; 098 099 protected GCTask gcTask; 100 101 @Override 102 public void activate(ComponentContext context) { 103 converterDescriptors.clear(); 104 translationHelper.clear(); 105 self = this; 106 config.clearCachingDirectory(); 107 } 108 109 @Override 110 public void deactivate(ComponentContext context) { 111 if (config.isCacheEnabled()) { 112 ConversionCacheHolder.deleteCache(); 113 } 114 self = null; 115 converterDescriptors.clear(); 116 translationHelper.clear(); 117 } 118 119 /** 120 * Component implementation. 121 */ 122 @Override 123 public void registerContribution(Object contribution, String extensionPoint, ComponentInstance contributor) { 124 125 if (CONVERTER_EP.equals(extensionPoint)) { 126 ConverterDescriptor desc = (ConverterDescriptor) contribution; 127 registerConverter(desc); 128 } else if (CONFIG_EP.equals(extensionPoint)) { 129 GlobalConfigDescriptor desc = (GlobalConfigDescriptor) contribution; 130 config.update(desc); 131 config.clearCachingDirectory(); 132 } else { 133 log.error("Unable to handle unknown extensionPoint {}", extensionPoint); 134 } 135 } 136 137 @Override 138 public void unregisterContribution(Object contribution, String extensionPoint, ComponentInstance contributor) { 139 } 140 141 /* Component API */ 142 143 public static Converter getConverter(String converterName) { 144 ConverterDescriptor desc = self.converterDescriptors.get(converterName); 145 if (desc == null) { 146 return null; 147 } 148 return desc.getConverterInstance(); 149 } 150 151 public static ConverterDescriptor getConverterDescriptor(String converterName) { 152 return self.converterDescriptors.get(converterName); 153 } 154 155 public static long getGCIntervalInMinutes() { 156 return self.config.getGCInterval(); 157 } 158 159 public static void setGCIntervalInMinutes(long interval) { 160 self.config.setGCInterval(interval); 161 } 162 163 public static void registerConverter(ConverterDescriptor desc) { 164 165 if (self.converterDescriptors.containsKey(desc.getConverterName())) { 166 167 ConverterDescriptor existing = self.converterDescriptors.get(desc.getConverterName()); 168 desc = existing.merge(desc); 169 } 170 desc.initConverter(); 171 self.translationHelper.addConverter(desc); 172 self.converterDescriptors.put(desc.getConverterName(), desc); 173 } 174 175 public static int getMaxCacheSizeInKB() { 176 return self.config.getDiskCacheSize(); 177 } 178 179 public static void setMaxCacheSizeInKB(int size) { 180 self.config.setDiskCacheSize(size); 181 } 182 183 public static boolean isCacheEnabled() { 184 return self.config.isCacheEnabled(); 185 } 186 187 public static String getCacheBasePath() { 188 return self.config.getCachingDirectory(); 189 } 190 191 /* Service API */ 192 193 @Override 194 public List<String> getRegistredConverters() { 195 return new ArrayList<>(converterDescriptors.keySet()); 196 } 197 198 @Override 199 @Deprecated 200 public Blob convertBlobToPDF(Blob blob) { 201 return convertThroughHTML(new SimpleBlobHolder(blob), MimetypeRegistry.PDF_MIMETYPE).getBlob(); 202 } 203 204 protected BlobHolder convertThroughHTML(BlobHolder blobHolder, String destMimeType) { 205 Blob blob = blobHolder.getBlob(); 206 String mimetype = blob.getMimeType(); 207 String filename = blob.getFilename(); 208 if (destMimeType.equals(mimetype)) { 209 return blobHolder; 210 } 211 212 Path tempDirectory = null; 213 // Convert the blob to HTML 214 if (!MediaType.TEXT_HTML.equals(mimetype)) { 215 blobHolder = convertBlobToMimeType(blobHolder, MediaType.TEXT_HTML); 216 } 217 try { 218 tempDirectory = Framework.createTempDirectory("blobs"); 219 // Replace the image URLs by absolute paths 220 DownloadService downloadService = Framework.getService(DownloadService.class); 221 blobHolder.setBlob( 222 replaceURLsByAbsolutePaths(blob, tempDirectory, downloadService::resolveBlobFromDownloadUrl)); 223 // Convert the blob to the destination mimetype 224 blobHolder = convertBlobToMimeType(blobHolder, destMimeType); 225 adjustBlobName(filename, blobHolder, destMimeType); 226 } catch (IOException e) { 227 throw new ConversionException(e); 228 } finally { 229 if (tempDirectory != null) { 230 org.apache.commons.io.FileUtils.deleteQuietly(tempDirectory.toFile()); 231 } 232 } 233 return blobHolder; 234 } 235 236 protected BlobHolder convertBlobToMimeType(BlobHolder bh, String destinationMimeType) { 237 return convertToMimeType(destinationMimeType, bh, Collections.emptyMap()); 238 } 239 240 protected void adjustBlobName(String filename, BlobHolder blobHolder, String mimeType) { 241 Blob blob = blobHolder.getBlob(); 242 adjustBlobName(filename, blob, mimeType); 243 blobHolder.setBlob(blob); 244 } 245 246 protected void adjustBlobName(String filename, Blob blob, String mimeType) { 247 if (StringUtils.isBlank(filename)) { 248 filename = "file_" + System.currentTimeMillis(); 249 } else { 250 filename = FilenameUtils.removeExtension(FilenameUtils.getName(filename)); 251 } 252 String extension = Framework.getService(MimetypeRegistry.class) 253 .getExtensionsFromMimetypeName(mimeType) 254 .stream() 255 .findFirst() 256 .orElse("bin"); 257 blob.setFilename(filename + "." + extension); 258 blob.setMimeType(mimeType); 259 } 260 261 /** 262 * Replace the image URLs of an HTML blob by absolute local paths. 263 * 264 * @since 9.1 265 */ 266 protected static Blob replaceURLsByAbsolutePaths(Blob blob, Path tempDirectory, Function<String, Blob> blobResolver) 267 throws IOException { 268 String initialBlobContent = blob.getString(); 269 // Find images links in the blob 270 Pattern pattern = Pattern.compile("(src=([\"']))(.*?)(\\2)"); 271 Matcher matcher = pattern.matcher(initialBlobContent); 272 StringBuffer sb = new StringBuffer(); 273 while (matcher.find()) { 274 // Retrieve the image from the URL 275 String url = matcher.group(3); 276 Blob imageBlob = blobResolver.apply(url); 277 if (imageBlob == null) { 278 break; 279 } 280 // Export the image to a temporary directory in File System 281 String safeFilename = FileUtils.getSafeFilename(imageBlob.getFilename()); 282 File imageFile = tempDirectory.resolve(safeFilename).toFile(); 283 imageBlob.transferTo(imageFile); 284 // Replace the image URL by its absolute local path 285 matcher.appendReplacement(sb, "$1" + Matcher.quoteReplacement(imageFile.toPath().toString()) + "$4"); 286 } 287 matcher.appendTail(sb); 288 String blobContentWithAbsolutePaths = sb.toString(); 289 if (blobContentWithAbsolutePaths.equals(initialBlobContent)) { 290 return blob; 291 } 292 // Create a new blob with the new content 293 Blob newBlob = new StringBlob(blobContentWithAbsolutePaths, blob.getMimeType(), blob.getEncoding()); 294 newBlob.setFilename(blob.getFilename()); 295 return newBlob; 296 } 297 298 @Override 299 public BlobHolder convert(String converterName, BlobHolder blobHolder, Map<String, Serializable> parameters) 300 throws ConversionException { 301 302 // set parameters if null to avoid NPE in converters 303 if (parameters == null) { 304 parameters = new HashMap<>(); 305 } 306 307 // exit if not registered 308 ConverterCheckResult check = isConverterAvailable(converterName); 309 if (!check.isAvailable()) { 310 // exit is not installed / configured 311 throw new ConverterNotAvailable(converterName); 312 } 313 314 ConverterDescriptor desc = converterDescriptors.get(converterName); 315 if (desc == null) { 316 throw new ConversionException("Converter " + converterName + " can not be found"); 317 } 318 319 // make sure the converter can handle the blob mime type 320 String mimeType = blobHolder.getBlob().getMimeType(); 321 if (!hasSourceMimeType(desc, mimeType)) { 322 throw new ConversionException( 323 String.format("%s mime type not supported by %s converter", mimeType, desc.getConverterName())); 324 } 325 326 String cacheKey = CacheKeyGenerator.computeKey(converterName, blobHolder, parameters); 327 328 BlobHolder result = ConversionCacheHolder.getFromCache(cacheKey); 329 330 if (result == null) { 331 Converter converter = desc.getConverterInstance(); 332 result = converter.convert(blobHolder, parameters); 333 334 if (config.isCacheEnabled()) { 335 ConversionCacheHolder.addToCache(cacheKey, result); 336 } 337 } else if (result.getBlobs() != null && result.getBlobs().size() == 1) { 338 // we need to reset the filename if result is a single file from the cache because the name is just a hash 339 result.getBlob().setFilename(null); 340 } 341 342 if (result != null) { 343 updateResultBlobMimeType(result, desc); 344 updateResultBlobFileName(blobHolder, result); 345 } 346 347 return result; 348 } 349 350 /** 351 * Returns true if the converter has the given {@code mimeType} as source mime type, false otherwise. 352 * 353 * @since 10.3 354 */ 355 protected boolean hasSourceMimeType(ConverterDescriptor converterDescriptor, String mimeType) { 356 if (!Framework.getService(ConfigurationService.class).isBooleanPropertyTrue(ENFORCE_SOURCE_MIME_TYPE_CHECK)) { 357 return true; 358 } 359 360 return translationHelper.hasCompatibleMimeType(converterDescriptor.getSourceMimeTypes(), mimeType); 361 } 362 363 protected void updateResultBlobMimeType(BlobHolder resultBh, ConverterDescriptor desc) { 364 Blob mainBlob = resultBh.getBlob(); 365 if (mainBlob == null) { 366 return; 367 } 368 String mimeType = mainBlob.getMimeType(); 369 if (StringUtils.isBlank(mimeType) || mimeType.equals("application/octet-stream")) { 370 mainBlob.setMimeType(desc.getDestinationMimeType()); 371 } 372 } 373 374 protected void updateResultBlobFileName(BlobHolder srcBh, BlobHolder resultBh) { 375 Blob mainBlob = resultBh.getBlob(); 376 if (mainBlob == null) { 377 return; 378 } 379 String filename = mainBlob.getFilename(); 380 if (StringUtils.isBlank(filename) || filename.startsWith("nxblob-")) { 381 Blob srcBlob = srcBh.getBlob(); 382 if (srcBlob != null && StringUtils.isNotBlank(srcBlob.getFilename())) { 383 String baseName = FilenameUtils.getBaseName(srcBlob.getFilename()); 384 385 MimetypeRegistry mimetypeRegistry = Framework.getService(MimetypeRegistry.class); 386 MimetypeEntry mimeTypeEntry = mimetypeRegistry.getMimetypeEntryByMimeType(mainBlob.getMimeType()); 387 List<String> extensions = mimeTypeEntry.getExtensions(); 388 String extension; 389 if (!extensions.isEmpty()) { 390 extension = extensions.get(0); 391 } else { 392 extension = FilenameUtils.getExtension(filename); 393 if (extension == null) { 394 extension = "bin"; 395 } 396 } 397 mainBlob.setFilename(baseName + "." + extension); 398 } 399 400 } 401 } 402 403 @Override 404 public BlobHolder convertToMimeType(String destinationMimeType, BlobHolder blobHolder, 405 Map<String, Serializable> parameters) throws ConversionException { 406 String srcMimeType = blobHolder.getBlob().getMimeType(); 407 String converterName = translationHelper.getConverterName(srcMimeType, destinationMimeType); 408 if (converterName == null) { 409 // check if a conversion is available through HTML 410 converterName = translationHelper.getConverterName(srcMimeType, MediaType.TEXT_HTML); 411 if (converterName == null) { 412 throw new ConversionException(String.format("No converters available to convert from %s to %s.", 413 srcMimeType, destinationMimeType)); 414 } 415 // Use a chain of 2 converters which will first try to go through HTML, 416 // then HTML to the destination mimetype 417 return convertThroughHTML(blobHolder, destinationMimeType); 418 } else { 419 return convert(converterName, blobHolder, parameters); 420 } 421 } 422 423 @Override 424 public List<String> getConverterNames(String sourceMimeType, String destinationMimeType) { 425 return translationHelper.getConverterNames(sourceMimeType, destinationMimeType); 426 } 427 428 @Override 429 public String getConverterName(String sourceMimeType, String destinationMimeType) { 430 return translationHelper.getConverterName(sourceMimeType, destinationMimeType); 431 } 432 433 @Override 434 public ConverterCheckResult isConverterAvailable(String converterName) throws ConversionException { 435 return isConverterAvailable(converterName, false); 436 } 437 438 protected final Map<String, ConverterCheckResult> checkResultCache = new HashMap<>(); 439 440 @Override 441 public ConverterCheckResult isConverterAvailable(String converterName, boolean refresh) 442 throws ConverterNotRegistered { 443 444 if (!refresh) { 445 if (checkResultCache.containsKey(converterName)) { 446 return checkResultCache.get(converterName); 447 } 448 } 449 450 ConverterDescriptor descriptor = converterDescriptors.get(converterName); 451 if (descriptor == null) { 452 throw new ConverterNotRegistered(converterName); 453 } 454 455 Converter converter = descriptor.getConverterInstance(); 456 457 ConverterCheckResult result; 458 if (converter instanceof ExternalConverter) { 459 ExternalConverter exConverter = (ExternalConverter) converter; 460 result = exConverter.isConverterAvailable(); 461 } else if (converter instanceof ChainedConverter) { 462 ChainedConverter chainedConverter = (ChainedConverter) converter; 463 result = new ConverterCheckResult(); 464 if (chainedConverter.isSubConvertersBased()) { 465 for (String subConverterName : chainedConverter.getSubConverters()) { 466 result = isConverterAvailable(subConverterName, refresh); 467 if (!result.isAvailable()) { 468 break; 469 } 470 } 471 } 472 } else { 473 // return success since there is nothing to test 474 result = new ConverterCheckResult(); 475 } 476 477 result.setSupportedInputMimeTypes(descriptor.getSourceMimeTypes()); 478 checkResultCache.put(converterName, result); 479 480 return result; 481 } 482 483 @Override 484 public boolean isSourceMimeTypeSupported(String converterName, String sourceMimeType) { 485 return getConverterDescriptor(converterName).getSourceMimeTypes().contains(sourceMimeType); 486 } 487 488 @Override 489 public String scheduleConversion(String converterName, BlobHolder blobHolder, 490 Map<String, Serializable> parameters) { 491 WorkManager workManager = Framework.getService(WorkManager.class); 492 ConversionWork work = new ConversionWork(converterName, null, blobHolder, parameters); 493 workManager.schedule(work); 494 return work.getId(); 495 } 496 497 @Override 498 public String scheduleConversionToMimeType(String destinationMimeType, BlobHolder blobHolder, 499 Map<String, Serializable> parameters) { 500 WorkManager workManager = Framework.getService(WorkManager.class); 501 ConversionWork work = new ConversionWork(null, destinationMimeType, blobHolder, parameters); 502 workManager.schedule(work); 503 return work.getId(); 504 } 505 506 @Override 507 public ConversionStatus getConversionStatus(String id) { 508 WorkManager workManager = Framework.getService(WorkManager.class); 509 Work.State workState = workManager.getWorkState(id); 510 if (workState == null) { 511 String entryKey = TransientStoreWork.computeEntryKey(id); 512 if (TransientStoreWork.containsBlobHolder(entryKey)) { 513 return new ConversionStatus(id, ConversionStatus.Status.COMPLETED); 514 } 515 return null; 516 } 517 518 return new ConversionStatus(id, ConversionStatus.Status.valueOf(workState.name())); 519 } 520 521 @Override 522 public BlobHolder getConversionResult(String id, boolean cleanTransientStoreEntry) { 523 String entryKey = TransientStoreWork.computeEntryKey(id); 524 BlobHolder bh = TransientStoreWork.getBlobHolder(entryKey); 525 if (cleanTransientStoreEntry) { 526 TransientStoreWork.removeBlobHolder(entryKey); 527 } 528 return bh; 529 } 530 531 @Override 532 public <T> T getAdapter(Class<T> adapter) { 533 if (adapter.isAssignableFrom(MimeTypeTranslationHelper.class)) { 534 return adapter.cast(translationHelper); 535 } 536 return super.getAdapter(adapter); 537 } 538 539 @Override 540 public void start(ComponentContext context) { 541 startGC(); 542 } 543 544 @Override 545 public void stop(ComponentContext context) { 546 endGC(); 547 } 548 549 protected void startGC() { 550 log.debug("CasheCGTaskActivator activated starting GC thread"); 551 gcTask = new GCTask(); 552 gcThread = new Thread(gcTask, "Nuxeo-Convert-GC"); 553 gcThread.setDaemon(true); 554 gcThread.start(); 555 log.debug("GC Thread started"); 556 557 } 558 559 public void endGC() { 560 if (gcTask == null) { 561 return; 562 } 563 log.debug("Stopping GC Thread"); 564 gcTask.GCEnabled = false; 565 gcTask = null; 566 gcThread.interrupt(); 567 gcThread = null; 568 } 569 570}