缘起

最近公司有个需要,将一个utf8编码的csv上传到ftp供上游零碎应用。
然而上游零碎要求上传到ftp的文件是GBK编码的。

计划一

惯例思路,间接应用InputStreamReader和OutputStreamWriter来读写文件就行
(代码抄他人的)

import java.io.*;//转换文件编码:将GBK编码文件转化为UTF-8编码的文件public class GBKtoUtf {    public static void main(String[] args) throws IOException {        InputStreamReader inputStreamReader = new InputStreamReader(new FileInputStream("C:\\test\\GBK文件.txt"),"GBK");//这里必须是GBK        OutputStreamWriter outputStreamWriter = new OutputStreamWriter(new FileOutputStream("C:\\test\\UTF-8文件.txt"),"UTF-8");        int len = 0;        while ((len = inputStreamReader.read()) != -1){            outputStreamWriter.write(len);        }        outputStreamWriter.close();        inputStreamReader.close();    }}

计划二

对于小文件,计划一比拟简单明了
对于大文件,计划一须要读写磁盘,性能非常低(磁盘往往是一个操作系统的瓶颈)

对于上传文件,必定又一次文件的读取,有没有方法在读取的过程实现转码?

剖析FtpClient的API


由此可知,FtpClient上传文件其实就是字节流的copy而已。

剖析OutputStreamWriter

翻阅OutputStreamWriter源码发现它应用了一个StreamEncoder的类来实现编码转换
而StreamEncoder则是实用CharsetEncoder实现编码转换

CharsetEncoder有两个转换API

    public final CoderResult encode(CharBuffer in, ByteBuffer out, boolean endOfInput){    ......    }    public final ByteBuffer encode(CharBuffer in){    ......    }

文件读取过程实现转码

既然FtpClient上传文件只是字节流的Copy,那只有在它读取字节流的时候实现转码就OK。
CharsetEncoder提供了API把CharBuffer转换为ByteBuffer。

所以,只须要实现一个自定义InputStream即可

import org.apache.commons.io.IOUtils;import org.slf4j.Logger;import org.slf4j.LoggerFactory;import java.io.*;import java.nio.ByteBuffer;import java.nio.CharBuffer;import java.nio.charset.Charset;import java.nio.charset.CharsetEncoder;import java.nio.charset.CoderResult;import java.nio.charset.StandardCharsets;public class FtpInputStream extends InputStream{    private static final Logger logger = LoggerFactory.getLogger(FtpInputStream.class);    private final InputStreamReader inputStreamReader;    private final Charset originCharset;    private final Charset destCharset;    private final CharsetEncoder encoder;    private final CharBuffer charBuffer;    private final ByteBuffer byteBuffer;    private CoderResult coderResult = CoderResult.UNDERFLOW;    private boolean end = false;    public FtpInputStream(InputStream inputStream, Charset originCharset, Charset destCharset) {        this.inputStreamReader = new InputStreamReader(inputStream, originCharset);        this.originCharset = originCharset;        this.destCharset = destCharset;        this.encoder = destCharset.newEncoder();        this.charBuffer = CharBuffer.allocate(1000);        this.byteBuffer = ByteBuffer.allocate((int) (1000 * encoder.averageBytesPerChar()));        this.byteBuffer.flip();    }    @Override    public void close() throws IOException {        inputStreamReader.close();    }    @Override    public int read() throws IOException {        if (byteBuffer.hasRemaining()) {            // byteBuffer还有数据就间接读            return byteBuffer.get();        } else {            // byteBuffer曾经被读取完了,清空待用            byteBuffer.clear();        }        if (end) {            return -1;        }        //byteBuffer没有数据了,charBuffer存在两种状况:曾经被耗费完了、未被耗费完        //曾经被耗费完了,须要从新从inputStreamReader读取        //未被耗费完,不需从inputStreamReader读取,间接编码        if (coderResult.isUnderflow()) {            charBuffer.clear();            int r = inputStreamReader.read(charBuffer);            if(r == -1) {                //读取完结                end = true;                charBuffer.flip();            }else if (r == 0) {                //不应该的状况                throw new IOException("read 0 chart");            } else {                //翻转被编码器读取                charBuffer.flip();            }        }        //编码为字节        coderResult = encoder.encode(charBuffer, byteBuffer, end);        if (coderResult.isError()) {            throw new IOException(coderResult.toString());        }        if (end) {            encoder.flush(byteBuffer);        }        //翻转,筹备读取        byteBuffer.flip();        if (byteBuffer.hasRemaining()) {            return byteBuffer.get();        } else {            //不应该呈现到清空            throw new IOException("encode result is null");        }    }}