背景
最近好几个我的项目在运行过程中客户都提出文件上传大小的限度是否设置的大一些,用户常常须要上传好几个G的材料文件,如图纸,视频等,并且须要在上传大文件过程中进行优化实时展示进度条,进行技术评估后针对框架文件上传进行扩大降级,扩大接口反对大文件分片上传解决,缩小服务器刹时的内存压力,同一个文件上传失败后能够从胜利上传分片地位进行断点续传,文件上传胜利后再次上传无需期待达到秒传的成果,优化用户交互体验,具体的实现流程如下图所示
文件MD5计算
对于文件md5的计算咱们应用spark-md5第三方库,大文件咱们能够分片别离计算再合并节省时间,然而经测试1G文件计算MD5须要20s左右的工夫,所以通过优化咱们抽取文件局部特色信息(文件第一片+文件最初一片+文件批改工夫),来保障文件的绝对唯一性,只须要2s左右,大大提高前端计算效率,对于前端文件内容块的读取咱们须要应用html5的api中fileReader.readAsArrayBuffer办法,因为是异步触发,封装的办法提供一个回调函数进行应用
createSimpleFileMD5(file, chunkSize, finishCaculate) {
var fileReader = new FileReader();
var blobSlice = File.prototype.mozSlice || File.prototype.webkitSlice || File.prototype.slice;
var chunks = Math.ceil(file.size / chunkSize);
var currentChunk = 0;
var spark = new SparkMD5.ArrayBuffer();
var startTime = new Date().getTime();
loadNext();
fileReader.onload = function() {
spark.append(this.result);
if (currentChunk == 0) {
currentChunk = chunks - 1;
loadNext();
} else {
var fileMD5 = hpMD5(spark.end() + file.lastModifiedDate);
finishCaculate(fileMD5)
}
};
function loadNext() {
var start = currentChunk * chunkSize;
var end = start + chunkSize >= file.size ? file.size : start + chunkSize;
fileReader.readAsArrayBuffer(blobSlice.call(file, start, end));
}
}
文件分片切割
咱们通过定义好文件分片大小,应用blob对象反对的file.slice办法切割文件,分片上传申请须要同步按程序申请,因为应用了同步申请,前端ui会阻塞无奈点击,须要开启worker线程进行操作,实现后通过postMessage办法传递音讯给主页面告诉ui进度条的更新,须要留神的是,worker线程办法不反对window对象,所以尽量不要应用第三方库,应用原生的XMLHttpRequest对象发动申请,须要的参数通过onmessage办法传递获取
页面upload申请办法如下
upload() {
var file = document.getElementById("file").files[0];
if (!file) {
alert("请抉择须要上传的文件");
return;
}
if (file.size < pageData.chunkSize) {
alert("抉择的文件请大于" + pageData.chunkSize / 1024 / 1024 + "M")
}
var filesize = file.size;
var filename = file.name;
pageData.chunkCount = Math.ceil(filesize / pageData.chunkSize);
this.createSimpleFileMD5(file, pageData.chunkSize, function(fileMD5) {
console.log("计算文件MD:" + fileMD5);
pageData.showProgress = true;
var worker = new Worker('worker.js');
var param = {
token: GetTokenID(),
uploadUrl: uploadUrl,
filename: filename,
filesize: filesize,
fileMD5: fileMD5,
groupguid: pageData.groupguid1,
grouptype: pageData.grouptype1,
chunkCount: pageData.chunkCount,
chunkSize: pageData.chunkSize,
file: file
}
worker.onmessage = function(event) {
var workresult = event.data;
if (workresult.code == 0) {
pageData.percent = workresult.percent;
if (workresult.percent == 100) {
pageData.showProgress = false;
worker.terminate();
}
} else {
pageData.showProgress = false;
worker.terminate();
}
}
worker.postMessage(param);
})
}
worker.js执行办法如下
function FormAjax_Sync(token, data, url, success) {
var xmlHttp = new XMLHttpRequest();
xmlHttp.open("post", url, false);
xmlHttp.setRequestHeader("token", token);
xmlHttp.onreadystatechange = function() {
if (xmlHttp.status == 200) {
var result = JSON.parse(this.responseText);
var status = this.status
success(result, status);
}
};
xmlHttp.send(data);
}
onmessage = function(evt) {
var data = evt.data;
console.log(data)
//传递的参数
var token = data.token
var uploadUrl = data.uploadUrl
var filename = data.filename
var fileMD5 = data.fileMD5
var groupguid = data.groupguid
var grouptype = data.grouptype
var chunkCount = data.chunkCount
var chunkSize = data.chunkSize
var filesize = data.filesize
var filename = data.filename
var file = data.file
var start = 0;
var end;
var index = 0;
var startTime = new Date().getTime();
while (start < filesize) {
end = start + chunkSize;
if (end > filesize) {
end = filesize;
}
var chunk = file.slice(start, end); //切割文件
var formData = new FormData();
formData.append("file", chunk, filename);
formData.append("fileMD5", fileMD5);
formData.append("chunkCount", chunkCount)
formData.append("chunkIndex", index);
formData.append("chunkSize", end - start);
formData.append("groupguid", groupguid);
formData.append("grouptype", grouptype);
//上传文件
FormAjax_Sync(token, formData, uploadUrl, function(result, status) {
var code = 0;
var percent = 0;
if (result.code == 0) {
console.log("分片共" + chunkCount + "个" + ",已胜利上传第" + index + "个")
percent = parseInt((parseInt(formData.get("chunkIndex")) + 1) * 100 / chunkCount);
} else {
filesize = -1;
code = -1
console.log("分片第" + index + "个上传失败")
}
self.postMessage({ code: code, percent: percent });
})
start = end;
index++;
}
console.log("上传分片总工夫:" + (new Date().getTime() - startTime));
console.log("分片实现");
}
文件分片接管
前端文件分片处理完毕后,接下来咱们具体介绍下后端文件承受解决的计划,分片解决须要反对用户随时中断上传与文件反复上传,咱们新建表f_attachchunk来记录文件分片的详细信息,表结构设计如下
CREATE TABLE `f_attachchunk` (
`ID` int(11) NOT NULL AUTO_INCREMENT,
`ChunkGuid` varchar(50) NOT NULL,
`FileMD5` varchar(100) DEFAULT NULL,
`FileName` varchar(200) DEFAULT NULL,
`ChunkSize` int(11) DEFAULT NULL,
`ChunkCount` int(11) DEFAULT NULL,
`ChunkIndex` int(11) DEFAULT NULL,
`ChunkFilePath` varchar(500) DEFAULT NULL,
`UploadUserGuid` varchar(50) DEFAULT NULL,
`UploadUserName` varchar(100) DEFAULT NULL,
`UploadDate` datetime DEFAULT NULL,
`UploadOSSID` varchar(200) DEFAULT NULL,
`UploadOSSChunkInfo` varchar(1000) DEFAULT NULL,
`ChunkType` varchar(50) DEFAULT NULL,
`MergeStatus` int(11) DEFAULT NULL,
PRIMARY KEY (`ID`)
) ENGINE=InnoDB AUTO_INCREMENT=237 DEFAULT CHARSET=utf8mb4;
- FileMD5:文件MD5惟一标识文件
- FileName:文件名称
- ChunkSize:分片大小
- ChunkCount:分片总数量
- ChunkIndex:分片对应序号
- ChunkFilePath:分片存储门路(本地存储文件计划应用)
- UploadUserGuid:上传人主键
- UploadUserName:上传人姓名
- UploadDate:上传人日期
- UploadOSSID:分片上传批次ID(云存储计划应用)
- UploadOSSChunkInfo:分片上传单片信息(云存储计划应用)
- ChunkType:分片存储形式(本地存储,阿里云,华为云,Minio标识)
- MergeStatus:分片合并状态(未合并,已合并)
文件分片存储后端一共分为三步,查看分片=》保留分片=》合并分片,咱们这里先以本地文件存储为例解说,云存储思路统一,后续会提供对应应用的api办法
查看分片
查看分片以数据库文件分片记录的FIleMD5与ChunkIndex组合来确定分片的唯一性,因为本地分片temp文件是作为临时文件存储,可能会呈现手动革除施放磁盘空间的问题,所以数据库存在记录咱们还须要对应的查看理论文件状况
boolean existChunk = false;
AttachChunkDO dbChunk = attachChunkService.checkExistChunk(fileMD5, chunkIndex, "Local");
if (dbChunk != null) {
File chunkFile = new File(dbChunk.getChunkFilePath());
if (chunkFile.exists()) {
if (chunkFile.length() == chunkSize) {
existChunk = true;
} else {
//删除数据库记录
attachChunkService.delete(dbChunk.getChunkGuid());
}
} else {
//删除数据库记录
attachChunkService.delete(dbChunk.getChunkGuid());
}
}
保留分片
保留分片分为两块,文件存储到本地,胜利后数据库插入对应分片信息
//获取配置中附件上传文件夹
String filePath = frameConfig.getAttachChunkPath() + "/" + fileMD5 + "/";
//依据附件guid创立文件夹
File targetFile = new File(filePath);
if (!targetFile.exists()) {
targetFile.mkdirs();
}
if (!existChunk) {
//保留文件到文件夹
String chunkFileName = fileMD5 + "-" + chunkIndex + ".temp";
FileUtil.uploadFile(FileUtil.convertStreamToByte(fileContent), filePath, chunkFileName);
//插入chunk表
AttachChunkDO attachChunkDO = new AttachChunkDO(fileMD5, fileName, chunkSize, chunkCount, chunkIndex, filePath + chunkFileName, "Local");
attachChunkService.insert(attachChunkDO);
}
合并分片
在上传分片办法中,如果以后分片是最初一片,上传完毕后进行文件合并工作,同时进行数据库合并状态的更新,下一次同一个文件上传时咱们能够间接拷贝之前合并过的文件作为新附件,缩小合并这一步骤的I/O操作,合并文件咱们采纳BufferedOutputStream与BufferedInputStream两个对象,固定缓冲区大小
if (chunkIndex == chunkCount - 1) {
//合并文件
String merageFileFolder = frameConfig.getAttachPath() + groupType + "/" + attachGuid;
File attachFolder = new File(merageFileFolder);
if (!attachFolder.exists()) {
attachFolder.mkdirs();
}
String merageFilePath = merageFileFolder + "/" + fileName;
merageFile(fileMD5, merageFilePath);
attachChunkService.updateMergeStatusToFinish(fileMD5);
//插入到附件库
//设置附件惟一guid
attachGuid = CommonUtil.getNewGuid();
attachmentDO.setAttguid(attachGuid);
attachmentService.insert(attachmentDO);
}
public void merageFile(String fileMD5, String targetFilePath) throws Exception {
String merageFilePath = frameConfig.getAttachChunkPath()+"/"+fileMD5+"/"+fileMD5+".temp";
File merageFile = new File(merageFilePath);
if(!merageFile.exists()){
BufferedOutputStream destOutputStream = new BufferedOutputStream(new FileOutputStream(merageFilePath));
List<AttachChunkDO> attachChunkDOList = attachChunkService.selectListByFileMD5(fileMD5, "Local");
for (AttachChunkDO attachChunkDO : attachChunkDOList) {
File file = new File(attachChunkDO.getChunkFilePath());
byte[] fileBuffer = new byte[1024 * 1024 * 5];//文件读写缓存
int readBytesLength = 0; //每次读取字节数
BufferedInputStream sourceInputStream = new BufferedInputStream(new FileInputStream(file));
while ((readBytesLength = sourceInputStream.read(fileBuffer)) != -1) {
destOutputStream.write(fileBuffer, 0, readBytesLength);
}
sourceInputStream.close();
}
destOutputStream.flush();
destOutputStream.close();
}
FileUtil.copyFile(merageFilePath,targetFilePath);
}
云文件分片上传
云文件上传与本地文件上传的区别就是,分片文件间接上传到云端,再调用云存储api进行文件合并与文件拷贝,数据库相干记录与查看差别不大
阿里云OSS
上传分片前须要生成该文件的分片上传组标识uploadid
public String getUplaodOSSID(String key){
key = "chunk/" + key + "/" + key;
TenantParams.attach appConfig = getAttach();
OSSClient ossClient = InitOSS(appConfig);
String bucketName = appConfig.getBucketname_auth();
InitiateMultipartUploadRequest request = new InitiateMultipartUploadRequest(bucketName, key);
InitiateMultipartUploadResult upresult = ossClient.initiateMultipartUpload(request);
String uploadId = upresult.getUploadId();
ossClient.shutdown();
return uploadId;
}
上传分片时须要指定uploadid,同时咱们要将返回的分片信息PartETag序列化保留数据库,用于后续的文件合并
public String uploadChunk(InputStream stream,String key, int chunkIndex, int chunkSize, String uploadId){
key = "chunk/" + key + "/" + key;
String result = "";
try{
TenantParams.attach appConfig = getAttach();
OSSClient ossClient = InitOSS(appConfig);
String bucketName = appConfig.getBucketname_auth();
UploadPartRequest uploadPartRequest = new UploadPartRequest();
uploadPartRequest.setBucketName(bucketName);
uploadPartRequest.setKey(key);
uploadPartRequest.setUploadId(uploadId);
uploadPartRequest.setInputStream(stream);
// 设置分片大小。除了最初一个分片没有大小限度,其余的分片最小为100 KB。
uploadPartRequest.setPartSize(chunkSize);
// 设置分片号。每一个上传的分片都有一个分片号,取值范畴是1~10000,如果超出此范畴,OSS将返回InvalidArgument错误码。
uploadPartRequest.setPartNumber(chunkIndex+1);
// 每个分片不须要按程序上传,甚至能够在不同客户端上传,OSS会依照分片号排序组成残缺的文件。
UploadPartResult uploadPartResult = ossClient.uploadPart(uploadPartRequest);
PartETag partETag = uploadPartResult.getPartETag();
result = JSON.toJSONString(partETag);
ossClient.shutdown();
}catch (Exception e){
logger.error("OSS上传文件Chunk失败:" + e.getMessage());
}
return result;
}
合并分片时通过传递保留分片的PartETag对象数组进行操作,为了附件独立唯一性咱们不间接应用合并后的文件,通过api进行文件拷贝正本应用
public boolean merageFile(String uploadId, List<PartETag> chunkInfoList,String key,AttachmentDO attachmentDO,boolean checkMerge){
key = "chunk/" + key + "/" + key;
boolean result = true;
try{
TenantParams.attach appConfig = getAttach();
OSSClient ossClient = InitOSS(appConfig);
String bucketName = appConfig.getBucketname_auth();
if(!checkMerge){
CompleteMultipartUploadRequest completeMultipartUploadRequest = new CompleteMultipartUploadRequest(bucketName, key, uploadId, chunkInfoList);
CompleteMultipartUploadResult completeMultipartUploadResult = ossClient.completeMultipartUpload(completeMultipartUploadRequest);
}
String attachKey = getKey(attachmentDO);
ossClient.copyObject(bucketName,key,bucketName,attachKey);
ossClient.shutdown();
}catch (Exception e){
e.printStackTrace();
logger.error("OSS合并文件失败:" + e.getMessage());
result = false;
}
return result;
}
华为云OBS
华为云api与阿里云api大致相同,只有个别参数名称不同,间接上代码
public String getUplaodOSSID(String key) throws Exception {
key = "chunk/" + key + "/" + key;
TenantParams.attach appConfig = getAttach();
ObsClient obsClient = InitOBS(appConfig);
String bucketName = appConfig.getBucketname_auth();
InitiateMultipartUploadRequest request = new InitiateMultipartUploadRequest(bucketName, key);
InitiateMultipartUploadResult result = obsClient.initiateMultipartUpload(request);
String uploadId = result.getUploadId();
obsClient.close();
return uploadId;
}
public String uploadChunk(InputStream stream, String key, int chunkIndex, int chunkSize, String uploadId) {
key = "chunk/" + key + "/" + key;
String result = "";
try {
TenantParams.attach appConfig = getAttach();
ObsClient obsClient = InitOBS(appConfig);
String bucketName = appConfig.getBucketname_auth();
UploadPartRequest uploadPartRequest = new UploadPartRequest();
uploadPartRequest.setBucketName(bucketName);
uploadPartRequest.setUploadId(uploadId);
uploadPartRequest.setObjectKey(key);
uploadPartRequest.setInput(stream);
uploadPartRequest.setOffset(chunkIndex * chunkSize);
// 设置分片大小。除了最初一个分片没有大小限度,其余的分片最小为100 KB。
uploadPartRequest.setPartSize((long) chunkSize);
// 设置分片号。每一个上传的分片都有一个分片号,取值范畴是1~10000,如果超出此范畴,OSS将返回InvalidArgument错误码。
uploadPartRequest.setPartNumber(chunkIndex + 1);
// 每个分片不须要按程序上传,甚至能够在不同客户端上传,OSS会依照分片号排序组成残缺的文件。
UploadPartResult uploadPartResult = obsClient.uploadPart(uploadPartRequest);
PartEtag partETag = new PartEtag(uploadPartResult.getEtag(), uploadPartResult.getPartNumber());
result = JSON.toJSONString(partETag);
obsClient.close();
} catch (Exception e) {
e.printStackTrace();
logger.error("OBS上传文件Chunk:" + e.getMessage());
}
return result;
}
public boolean merageFile(String uploadId, List<PartEtag> chunkInfoList, String key, AttachmentDO attachmentDO, boolean checkMerge) {
key = "chunk/" + key + "/" + key;
boolean result = true;
try {
TenantParams.attach appConfig = getAttach();
ObsClient obsClient = InitOBS(appConfig);
String bucketName = appConfig.getBucketname_auth();
if (!checkMerge) {
CompleteMultipartUploadRequest request = new CompleteMultipartUploadRequest(bucketName, key, uploadId, chunkInfoList);
obsClient.completeMultipartUpload(request);
}
String attachKey = getKey(attachmentDO);
obsClient.copyObject(bucketName, key, bucketName, attachKey);
obsClient.close();
} catch (Exception e) {
e.printStackTrace();
logger.error("OBS合并文件失败:" + e.getMessage());
result = false;
}
return result;
}
Minio
文件存储Minio利用比拟宽泛,框架也同时反对了本人独立部署的Minio文件存储系统,Minio没有对应的分片上传api反对,咱们能够在上传完分片文件后,应用composeObject办法进行文件的合并
public boolean uploadChunk(InputStream stream, String key, int chunkIndex) {
boolean result = true;
try {
MinioClient minioClient = InitMinio();
String bucketName = frameConfig.getMinio_bucknetname();
PutObjectOptions option = new PutObjectOptions(stream.available(), -1);
key = "chunk/" + key + "/" + key;
minioClient.putObject(bucketName, key + "-" + chunkIndex, stream, option);
} catch (Exception e) {
logger.error("Minio上传Chunk文件失败:" + e.getMessage());
result = false;
}
return result;
}
public boolean merageFile(String key, int chunkCount, AttachmentDO attachmentDO, boolean checkMerge) {
boolean result = true;
try {
MinioClient minioClient = InitMinio();
String bucketName = frameConfig.getMinio_bucknetname();
key = "chunk/" + key + "/" + key;
if (!checkMerge) {
List<ComposeSource> sourceObjectList = new ArrayList<ComposeSource>();
for (int i = 0; i < chunkCount; i++) {
ComposeSource composeSource = ComposeSource.builder().bucket(bucketName).object(key + "-" + i).build();
sourceObjectList.add(composeSource);
}
minioClient.composeObject(ComposeObjectArgs.builder().bucket(bucketName).object(key).sources(sourceObjectList).build());
}
String attachKey = getKey(attachmentDO);
minioClient.copyObject(
CopyObjectArgs.builder()
.bucket(bucketName)
.object(attachKey)
.source(
CopySource.builder()
.bucket(bucketName)
.object(key)
.build())
.build());
} catch (Exception e) {
logger.error("Minio合并文件失败:" + e.getMessage());
result = false;
}
return result;
}
发表回复