001/* 002 * (C) Copyright 2006-2016 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Nuxeo - initial API and implementation 018 */ 019package org.nuxeo.common.utils; 020 021import java.text.Normalizer; 022import java.util.ArrayList; 023import java.util.List; 024import java.util.Map; 025 026/** 027 * Utils for String manipulations. 028 * 029 * @author <a href="mailto:[email protected]">Anahide Tchertchian</a> 030 * @author <a href="mailto:[email protected]">Bogdan Stefanescu</a> 031 */ 032public final class StringUtils { 033 034 private static final String PLAIN_ASCII = 035 // grave 036 "AaEeIiOoUu" 037 // acute 038 + "AaEeIiOoUuYy" 039 // circumflex 040 + "AaEeIiOoUuYy" 041 // tilde 042 + "AaEeIiOoUuYy" 043 // umlaut 044 + "AaEeIiOoUuYy" 045 // ring 046 + "Aa" 047 // cedilla 048 + "Cc"; 049 050 private static final String UNICODE = "\u00C0\u00E0\u00C8\u00E8\u00CC\u00EC\u00D2\u00F2\u00D9\u00F9" 051 + "\u00C1\u00E1\u00C9\u00E9\u00CD\u00ED\u00D3\u00F3\u00DA\u00FA\u00DD\u00FD" 052 + "\u00C2\u00E2\u00CA\u00EA\u00CE\u00EE\u00D4\u00F4\u00DB\u00FB\u0176\u0177" 053 + "\u00C2\u00E2\u00CA\u00EA\u00CE\u00EE\u00D4\u00F4\u00DB\u00FB\u0176\u0177" 054 + "\u00C4\u00E4\u00CB\u00EB\u00CF\u00EF\u00D6\u00F6\u00DC\u00FC\u0178\u00FF" + "\u00C5\u00E5" 055 + "\u00C7\u00E7"; 056 057 // This is an utility class. 058 private StringUtils() { 059 } 060 061 /** 062 * Replaces accented characters from a non-null String by their ascii equivalent. 063 * 064 * @param normalize if true, normalize the string using NFC 065 * @since 7.1 066 */ 067 public static String toAscii(String s, boolean normalize) { 068 if (normalize) { 069 s = Normalizer.normalize(s, Normalizer.Form.NFC); 070 } 071 StringBuilder sb = new StringBuilder(); 072 int n = s.length(); 073 for (int i = 0; i < n; i++) { 074 char c = s.charAt(i); 075 int pos = UNICODE.indexOf(c); 076 if (pos > -1) { 077 sb.append(PLAIN_ASCII.charAt(pos)); 078 } else { 079 sb.append(c); 080 } 081 } 082 return sb.toString(); 083 } 084 085 /** 086 * Replaces accented characters from a non-null String by their ascii equivalent. 087 */ 088 public static String toAscii(String s) { 089 return toAscii(s, false); 090 } 091 092 /** 093 * Split the given string. 094 * 095 * @param str the string to split 096 * @param delimiter the delimiter to split with 097 * @param escape the character used to escape the delimiter 098 * @param trim trim the extracted segments 099 * @return the list of strings computed by splitting this string 100 * @since 9.1 101 */ 102 public static List<String> split(String str, char delimiter, char escape, boolean trim) { 103 if (delimiter == escape) { 104 throw new IllegalArgumentException("Delimiter cannot be the escape character"); 105 } 106 List<String> ar = new ArrayList<>(); 107 if (str.isEmpty()) { 108 ar.add(str); 109 return ar; 110 } 111 StringBuilder segment = new StringBuilder(); 112 int i = 0; 113 int length = str.length(); 114 boolean lastCharDelimiter = false; 115 while (i < length) { 116 char c = str.charAt(i); 117 if (c == escape) { 118 if (i < length - 1) { 119 char nextC = str.charAt(i + 1); 120 if (nextC == delimiter || nextC == escape) { 121 segment.append(nextC); 122 i = i + 2; 123 } else { 124 segment.append(c); 125 i++; 126 } 127 } else { 128 segment.append(c); 129 i++; 130 } 131 } else if (c == delimiter) { 132 ar.add(trim ? segment.toString().trim() : segment.toString()); 133 segment = new StringBuilder(); 134 if (i == length - 1) { 135 lastCharDelimiter = true; 136 } 137 i++; 138 } else { 139 segment.append(c); 140 i++; 141 } 142 } 143 if (segment.length() > 0 || lastCharDelimiter) { 144 ar.add(trim ? segment.toString().trim() : segment.toString()); 145 } 146 return ar; 147 } 148 149 public static String[] split(String str, char delimiter, boolean trim) { 150 int s = 0; 151 int e = str.indexOf(delimiter, s); 152 if (e == -1) { 153 if (trim) { 154 str = str.trim(); 155 } 156 return new String[] { str }; 157 } 158 List<String> ar = new ArrayList<>(); 159 do { 160 String segment = str.substring(s, e); 161 if (trim) { 162 segment = segment.trim(); 163 } 164 ar.add(segment); 165 s = e + 1; 166 e = str.indexOf(delimiter, s); 167 } while (e != -1); 168 169 int len = str.length(); 170 if (s < len) { 171 String segment = str.substring(s); 172 if (trim) { 173 segment = segment.trim(); 174 } 175 ar.add(segment); 176 } else { 177 ar.add(""); 178 } 179 180 return ar.toArray(new String[ar.size()]); 181 } 182 183 /** 184 * Expands any variable found in the given expression with the values in the given map. 185 * <p> 186 * The variable format is ${property_key}. 187 * 188 * @param expression the expression to expand 189 * @param properties a map containing variables 190 */ 191 public static String expandVars(String expression, Map<?, ?> properties) { 192 return Vars.expand(expression, properties); 193 } 194 195}