共计 2280 个字符,预计需要花费 6 分钟才能阅读完成。
主动依据文本文件的字符集编码加载文件内容字符串,并反对按原始编码格局再次写入。
using System;
using System.IO;
using System.Linq;
using System.Text;
/// <summary>
/// 获取文件的编码格局
/// </summary>
public class TextEncode
{public Encoding Encoding { get; private set;}
public byte[] BOM { get; private set;}
public string ReadText(string fileName, out Encoding enc)
{var r = ReadText(fileName);
enc = Encoding;
return r;
}
public string ReadText(string fileName)
{
Encoding encoding = null;
byte[] bytes = File.ReadAllBytes(fileName);
int bomLen = 0;
if (bytes.Length > 1)
{if (bytes[0] == 0xFE && bytes[1] == 0xFF) //UTF-16(大端序){encoding = new UnicodeEncoding(true, true);
bomLen = 2;
}
else if (bytes[0] == 0xFF && bytes[1] == 0xFE) //UTF-16(小端序){encoding = new UnicodeEncoding(false, true);
bomLen = 2;
}
}
if (encoding == null && bytes.Length > 2)
{if ((bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF)) //UTF-8
{encoding = new UTF8Encoding(true);
bomLen = 3;
}
}
if (encoding == null && bytes.Length > 3)
{if (bytes[0] == 0x00 && bytes[1] == 0x00 && bytes[2] == 0xFE && bytes[3] == 0xFF) //UTF-32(大端序){encoding = new UTF32Encoding(true, true);
bomLen = 4;
}
else if (bytes[0] == 0xFF && bytes[1] == 0xFE && bytes[2] == 0x00 && bytes[3] == 0x00) //UTF-32(小端序){encoding = new UTF32Encoding(false, true);
bomLen = 4;
}
}
if (encoding == null && IsUTF8Bytes(bytes))
{encoding = Encoding.UTF8; //UTF8 无 BOM}
if (encoding == null) encoding = Encoding.Default;
Encoding = encoding;
BOM = new byte[bomLen];
Array.Copy(bytes, BOM, bomLen);
return encoding.GetString(bytes, bomLen, bytes.Length - bomLen);
}
/// <summary>
/// 判断是否是不带 BOM 的 UTF8 格局
/// </summary>
/// <param name="data"></param>
/// <returns></returns>
private bool IsUTF8Bytes(byte[] data)
{
int charByteCounter = 1; // 计算以后正剖析的字符应还有的字节数
byte curByte; // 以后剖析的字节.
for (int i = 0; i < data.Length; i++)
{curByte = data[i];
if (curByte == 0) throw new FormatException("非预期的 byte 格局");
if (charByteCounter == 1)
{if (curByte >= 0x80)
{
// 判断以后
while (((curByte <<= 1) & 0x80) != 0)
{charByteCounter++;}
// 标记位首位若为非 0 则至多以 2 个 1 开始 如:110XXXXX...........1111110X
if (charByteCounter == 1 || charByteCounter > 6)
{return false;}
}
}
else
{
// 若是 UTF-8 此时第一位必须为 1
if ((curByte & 0xC0) != 0x80)
{return false;}
charByteCounter--;
}
}
if (charByteCounter > 1)
{throw new FormatException("非预期的 byte 格局");
}
return true;
}
/// <summary>
/// 已雷同的 Encoding 和 BOM 再次写入
/// </summary>
public void WriteBySameEncoding(string fileName, string content)
{if (BOM == null)
{throw new Exception("须要先调用 ReadText 办法");
}
using (var file = File.Create(fileName))
{file.Write(BOM, 0, BOM.Length);
var bytes = Encoding.GetBytes(content);
file.Write(bytes, 0, bytes.Length);
}
}
}
正文完