001/* 002 * (C) Copyright 2015 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Nuxeo 018 */ 019 020package org.nuxeo.ecm.blob.azure; 021 022import java.net.URISyntaxException; 023import java.util.EnumSet; 024import java.util.HashSet; 025import java.util.Set; 026import java.util.regex.Pattern; 027 028import org.apache.commons.logging.Log; 029import org.apache.commons.logging.LogFactory; 030import org.nuxeo.ecm.blob.AbstractBinaryGarbageCollector; 031 032import com.microsoft.azure.storage.ResultContinuation; 033import com.microsoft.azure.storage.ResultSegment; 034import com.microsoft.azure.storage.StorageException; 035import com.microsoft.azure.storage.blob.BlobListingDetails; 036import com.microsoft.azure.storage.blob.CloudBlockBlob; 037import com.microsoft.azure.storage.blob.ListBlobItem; 038 039/** 040 * @author <a href="mailto:[email protected]">Arnaud Kervern</a> 041 * @since 7.10 042 */ 043public class AzureGarbageCollector extends AbstractBinaryGarbageCollector<AzureBinaryManager> { 044 045 private static final Log log = LogFactory.getLog(AzureGarbageCollector.class); 046 047 private static final Pattern MD5_RE = Pattern.compile("[0-9a-f]{32}"); 048 049 public AzureGarbageCollector(AzureBinaryManager binaryManager) { 050 super(binaryManager); 051 } 052 053 @Override 054 public String getId() { 055 return "azure:" + binaryManager.container.getName(); 056 } 057 058 @Override 059 public Set<String> getUnmarkedBlobs() { 060 Set<String> unmarked = new HashSet<>(); 061 ResultContinuation continuationToken = null; 062 ResultSegment<ListBlobItem> lbs; 063 do { 064 try { 065 lbs = binaryManager.container.listBlobsSegmented(binaryManager.prefix, false, 066 EnumSet.noneOf(BlobListingDetails.class), null, continuationToken, null, null); 067 } catch (StorageException e) { 068 throw new RuntimeException(e); 069 } 070 071 for (ListBlobItem item : lbs.getResults()) { 072 073 if (!(item instanceof CloudBlockBlob)) { 074 // ignore subdirectories 075 continue; 076 } 077 078 CloudBlockBlob blob = (CloudBlockBlob) item; 079 080 String digest; 081 try { 082 String name = blob.getName(); 083 digest = name.substring(binaryManager.prefix.length()); 084 } catch (URISyntaxException e) { 085 // Should never happends 086 // @see com.microsoft.azure.storage.blob.CloudBlob.getName() 087 continue; 088 } 089 090 if (!isMD5(digest)) { 091 // ignore files that cannot be MD5 digests for 092 // safety 093 continue; 094 } 095 096 long length = blob.getProperties().getLength(); 097 if (marked.contains(digest)) { 098 status.numBinaries++; 099 status.sizeBinaries += length; 100 marked.remove(digest); // optimize memory 101 } else { 102 status.numBinariesGC++; 103 status.sizeBinariesGC += length; 104 // record file to delete 105 unmarked.add(digest); 106 } 107 } 108 109 continuationToken = lbs.getContinuationToken(); 110 } while (lbs.getHasMoreResults()); 111 marked = null; // help GC 112 113 return unmarked; 114 } 115 116 public static boolean isMD5(String digest) { 117 return MD5_RE.matcher(digest).matches(); 118 } 119}