关于python:格式工厂合并-mp4-和-srt并利用-python-按照字幕剪辑视频将其分割为若干小段

格局工厂合并 mp4 和 srt，并利用 python 依照字幕剪辑视频，将其宰割为若干小段

一、视频合并

1.抉择转换为mp4，将视频导入格局工厂

2.调整字幕款式

二、python

1.可能用到的命令：

pip install moviepy

2.main.py

import cut_srtimport cut_videoif __name__ == '__main__':    my_video_path = r"D:\Videos\Star Wars\Star Wars 9 The.Rise.of.Skywalker.2019.mp4"    # 将指标文件夹里所有的srt文件都进行格式化txt操作    my_srt_path = r"D:\Documents"    cut_srt.srt_to_format_txt(my_srt_path)    cut_video.cut_video_by_srt(my_video_path, my_srt_path)

3.cut_srt.py

import reimport osdef check_contain_chinese(check_str):    for ch in check_str:        if u'\u4e00' <= ch <= u'\u9fff':            return True    return False# 输出原题目，返回格式化后的文件名def validate_title(str_):    pattern = r"[\/\\\:\*\?\"\<\>\|\.\,\!\'\-\♪\？\！\…\“\”\，]"    new_title = re.sub(pattern, "", str_)    # strip() 办法用于移除字符串头尾指定的字符（默认为空格或换行符）或字符序列。留神：该办法只能删除结尾或是结尾的字符，不能删除两头局部的字符    # re.sub(' +', ' ', str) 将 str 中的多个空格转化为一个空格    return re.sub(' +', ' ', new_title.lower().strip())# 输出字幕文件，失去由字幕组成的二维列表def get_format_sequences(srt_path_):    with open(srt_path_, 'r', encoding='utf-8-sig') as f:        content_ = f.read()    sequences_ = content_.split('\n\n')    sequences_ = [se.split('\n') for se in sequences_]    # 去除每一句空值    sequences_ = [list(filter(None, sequence)) for sequence in sequences_]    new_sequences = []    for se in sequences_:        if len(se) == 4:            new_sequences.append(se)    i = 0    for se in new_sequences:        # 序号，时间段，字幕1，字幕2        new_sequences[i] = [se[0], se[1], se[2], se[3]]        i += 1    return new_sequences# 输出工夫 1:20:12, 输入对应工夫的秒数（1*3600+20*60+12）def str2sec(x):    h, m, s = x.split(':')    return int(h) * 3600 + int(m) * 60 + int(s)# 输出 02:09:53,440 --> 02:09:55,740，返回一个起始工夫对应的秒数和完结工夫对应的秒数def get_start_end_time(str_):    start_time_, end_time_ = str_.strip().split("-->")    start_time_ = start_time_.split(",")[0]    end_time_ = end_time_.split(",")[0]    return str2sec(start_time_), str2sec(end_time_)def srt_to_format_txt(srt_path):    for fileName in os.listdir(srt_path):        if fileName.endswith(".srt"):            print(fileName)            file_path = srt_path + "\\" + fileName            count = 1            sequences = get_format_sequences(file_path)            # 判断字幕第一行是否蕴含中文            if check_contain_chinese(sequences[0][2]):                en_position = 3                ch_position = 2            else:                en_position = 2                ch_position = 3            # r: 以只读形式关上文件。文件的指针将会放在文件的结尾            # rb: 以二进制格局关上一个文件用于只读。文件指针将会放在文件的结尾            # w: 关上一个文件只用于写入。如果该文件已存在则将其笼罩。如果该文件不存在，创立新文件            # wb: 以二进制格局关上一个文件只用于写入。如果该文件已存在则将其笼罩。如果该文件不存在，创立新文件            # a: 关上一个文件用于追加。如果该文件已存在，文件指针将会放在文件的结尾。也就是说，新的内容将会被写入到已有内容之后。如果该文件不存在，创立新文件进行写入            # ab: 以二进制格局关上一个文件用于追加。如果该文件已存在，文件指针将会放在文件的结尾。也就是说，新的内容将会被写入到已有内容之后。如果该文件不存在，创立新文件进行写入            with open(file_path.replace(".srt", ".csv"), "w", encoding='utf-8-sig') as f:                for i in sequences:                    en = i[en_position]                    ch = i[ch_position]                    movie_name = file_path.split("\\")[-1].replace(".srt", "")                    count_format = "{:05d}".format(count)                    sentence_id = i[0]                    en_format = re.sub(' +', ' ', en.replace("- ", " ").replace(",", " ").strip())                    ch_format = validate_title(ch)                    line = movie_name + "," + count_format + "," + sentence_id + "," + en_format + "," + ch_format                    print(line)                    f.write(line + "\n")                    count = count + 1

4.cut_video.py

import osimport timefrom moviepy.video.io.VideoFileClip import VideoFileClipfrom cut_srt import get_format_sequences, get_start_end_timedef cut_video_by_start_end(video_path_, save_file_path_, my_start, my_end, save_name):    video = VideoFileClip(video_path_)    video = video.subclip(my_start, my_end)    video.write_videofile(save_file_path_ + save_name, fps=24, logger=None)    video.close()def cut_video_by_srt(video_path, srt_path):    # 将裁剪后的视频片段寄存在 save_file_path    save_file_path = video_path.replace(".mp4", "\\")    if not os.path.exists(save_file_path):        os.makedirs(save_file_path)    count = len(os.listdir(save_file_path))    if count == 0:        count = 1        start_index = 0    else:        count = count        start_index = count - 1    sequences = get_format_sequences(srt_path)    my_time = time.time()    for i in sequences[start_index:]:        file_name = "{:05d}".format(count) + ".mp4"        print(srt_path.split("\\")[-1].replace(".srt", "") +              "共" + str(len(sequences)) + "个，以后：" + file_name +              ", 以后进度：" + str("{:<.2f}".format((count / len(sequences)) * 100)) + "%" + "," +              " 耗时：" + str("{:<.2f}".format(time.time() - my_time)) + "s")        start_time, end_time = get_start_end_time(i[1])        cut_video_by_start_end(video_path, save_file_path, start_time - 2, end_time + 2, file_name)        count = count + 1