面试题:
编写一个截取字符串的函数,输入为一个字符串和字节数,输出为按字节截取的字符串。但是要保证汉字不被截半个,如"我ABC"4,应该截为"我AB",输入"我ABC汉DEF",6,应该输出为"我ABC"而不是"我ABC+汉的半个"。
此问题跟字符编码格式相关,以下为UTF-8编码的情况:
public class CutString
{ private static final String CHARSET = "utf-8"; /** * <一句话功能简述> * <功能详细描述> * @param s * @param len * @return * @throws UnsupportedEncodingException * @see [类、类#方法、类#成员] */ public static String substring(String s, int len) throws UnsupportedEncodingException { byte[] src = s.getBytes(CHARSET); if (len >= src.length) { return s; } byte[] dest = new byte[len]; System.arraycopy(src, 0, dest, 0, len); dest = checkLastChar(dest, src); return new String(dest, CHARSET); } /** * <一句话功能简述> * <功能详细描述> * @param dest * @param src * @return * @throws UnsupportedEncodingException * @see [类、类#方法、类#成员] */ private static byte[] checkLastChar(byte[] dest, byte[] src) throws UnsupportedEncodingException { if (dest.length == 0 || src.length <= dest.length) { return dest; } //1.最后一个字节 +后续1个字节 为一个字符 byte[] tmp = new byte[2]; int len = dest.length; tmp[0] = dest[dest.length - 1]; tmp[1] = src[len]; String stmp = new String(tmp, CHARSET); if (stmp.length() == 1) { byte[] ret = new byte[len - 1]; System.arraycopy(dest, 0, ret, 0, ret.length); return ret; } //2.最后两个字节 +后续一个字节为一个字符 if (dest.length == 1) { return dest; } tmp = new byte[3]; len = dest.length; tmp[0] = dest[dest.length - 2]; tmp[1] = dest[dest.length - 1]; tmp[2] = src[len]; stmp = new String(tmp, CHARSET); if (stmp.length() == 1) { byte[] ret = new byte[len - 2]; System.arraycopy(dest, 0, ret, 0, ret.length); return ret; } //3.最后一个字节 +后续两个字节为一个字符 if (src.length >= len + 1) { tmp = new byte[3]; len = dest.length; tmp[0] = dest[dest.length - 1]; tmp[1] = src[len]; tmp[2] = src[len + 1]; stmp = new String(tmp, CHARSET); if (stmp.length() == 1) { byte[] ret = new byte[len - 1]; System.arraycopy(dest, 0, ret, 0, ret.length); return ret; } } return dest; } public static void main(String[] args) { // 原始字符串 String s = "我ZWR爱你们JAVA"; System.out.println("原始字符串:" + s); try { System.out.println("截取前1位:" + CutString.substring(s, 1)); System.out.println("截取前2位:" + CutString.substring(s, 2)); System.out.println("截取前4位:" + CutString.substring(s, 4)); System.out.println("截取前8位:" + CutString.substring(s, 8)); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } } }