diff --git a/.gitignore b/.gitignore index 3966cdf..217d87f 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,6 @@ old/ *.exe build/ dist/ -*.spec \ No newline at end of file +*.spec +whisper_models/ +release_*/ diff --git a/README.md b/README.md index baeca2d..8a88171 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ ## 使用:下载指定课程 -[下载并解压](https://github.com/AuYang261/BIT_yanhe_download/releases/latest/download/yanhe.zip)。 +[下载并解压](https://github.com/AuYang261/BIT_yanhe_download/releases/latest/download/release_downloader.zip)。 在[延河课堂 (yanhekt.cn)](https://www.yanhekt.cn/recordCourse)中找到想下载的课程,以链接为https://www.yanhekt.cn/course/40524 的课程为例,复制地址栏最后的五位编号40524。注意是课程列表的链接(以`yanhekt.cn/course/五位编号`开头),不是视频界面的链接(以`yanhekt.cn/session/六位编号`开头)。 @@ -16,41 +16,97 @@ 双击运行`main.exe`(Release中的)或`run.bat`文件,并输入你想下载的课程编号(40524)。输出课程视频列表: -![image-20230926124749421](md/README/image-20230926124749421.png) +![image-20240409103306945](md/README/image-20240409103306945.png) -输入想下载的视频编号,用英文逗号(,)分隔,回车。接着选择下载video视频录像(即教室后的摄像头录像)还是下载screen信号(即教室电脑的屏幕),默认为视频录像。回车即开始下载: +输入想下载的视频编号,用英文逗号(,)分隔,回车。接着输入数字选择下载video视频录像(即教室后的摄像头录像)还是下载screen信号(即教室电脑的屏幕),默认为视频录像。回车即开始下载: -![image-20230926124841432](md/README/image-20230926124841432.png) +![image-20240409103338980](md/README/image-20240409103338980.png) 下载完成的文件在`output/`目录下以`课程名-video/screen`格式命名的文件夹中。 ![image-20230926124922726](md/README/image-20230926124922726.png) +## 自动生成字幕 + +本项目提供自动生成字幕功能,使用openai的[whisper](https://github.com/openai/whisper)项目及其模型在本地进行语音转文字生成字幕。 + +最好使用GPU运行,否则速度较慢,依赖见[下文](#依赖)。 + +下载[字幕生成程序gen_caption](https://github.com/AuYang261/BIT_yanhe_download/releases/latest),由于程序比较大,采用了分卷压缩发布。全部下载并解压,得到一个`gen_caption.exe`可执行文件,保存在上述`release_downloader.zip`解压的目录中,和保存视频的目录`output/`同级,如下所示: + +![image-20240409105228362](md/README/image-20240409105228362.png) + +下载完视频后,双击运行`gen_caption.exe`(文件较大,需要等一会),输入数字选择视频,回车。再输入数字选择使用多大的模型,越往下效果越好,但所需时间也越长,默认使用base模型。第一次使用会自动下载模型(几百M),请耐心等待。如下所示: + +![image-20240409131033038](md/README/image-20240409131033038.png) + +等待程序运行完成,生成的字幕文件为`.srt`格式,与视频文件在同级目录下,用支持字幕的播放器(如potplayer)打开视频即可看到带字幕的视频。 + +*tips: 语音转文字所需的时间较长,可以先观看视频,字幕生成好了再重新打开视频享受字幕。使用GPU大约需要几分钟,不使用GPU则需要更长时间。* + ## 依赖 -* ffmpeg,已在Release中提供。 +* ffmpeg,已在Release中提供。若在Linux环境下运行,需手动安装ffmpeg: + +```bash +sudo apt update +sudo apt install ffmpeg +``` -*若想用python环境运行,需安装这些依赖* +* **若使用GPU运行自动生成字幕功能,需要先安装cuda,安装方法见[cuda安装](https://blog.csdn.net/chen565884393/article/details/127905428)。** + +*若想用python环境运行,需安装以下依赖* * python,[下载](https://www.python.org/ftp/python/3.9.4/python-3.9.4-amd64.exe)并安装 * python第三方库requests。打开命令行(按win+r,在打开的窗口中输入cmd,回车),运行如下命令安装: - ```bash - pip install -r requirements.txt - ``` +```bash +pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple +``` + +* 安装语音转文字的依赖:(依赖于pytorch,若未安装pytorch,会自动安装,但是cpu版本。安装cuda版本的pytorch方法见[pytorch官网](https://pytorch.org/get-started/locally/)。) + +```bash +pip install -r requirements_whisper.txt -i https://pypi.tuna.tsinghua.edu.cn/simple +``` ## 注意 * 需要关闭本机上的代理,否则会提示类似`check_hostname requires server_hostname`的报错信息。 * 可以下载无权限的课程,只要知道课程链接(中的课程编号)就行。 -## 打包 +## 打包(仅开发者需要) 如果想要运行时不依赖python环境,可将python程序打包成可执行文件。Release中已打包。 使用如下命令打包: ```bash -Pyinstaller -F main.py +# 若未安装pyinstaller,运行以下命令安装 +pip install pyinstaller +# 打包 +pyinstaller -F main.py +pyinstaller -F gen_caption.py +``` +打包`gen_caption.py`时可能会失败,提示递归过深: + +image-20240409095211597 + +解决方法参考[这里](https://zhuanlan.zhihu.com/p/661325305),需要修改项目根目录下的`gen_caption.spec`配置文件,在文件开始处加上以下代码: + +```python +import sys ; sys.setrecursionlimit(sys.getrecursionlimit() * 5) ``` + +再使用如下命令打包: + +```bash +pyinstaller --clean .\gen_caption.spec +``` + +打包完成后运行若出现Temp目录下的文件未找到: + +![image-20240409095831766](md/README/image-20240409095831766.png) + +解决方法参考[这个](https://blog.csdn.net/qq_42324086/article/details/118280341),将项目`hooks`目录下的`hook-whisper.py`和`hook-zhconv.py`文件复制到pyinstaller的hook目录下(通常在`python根目录\Lib\site-packages\PyInstaller\hooks`),。 diff --git a/gen_caption.py b/gen_caption.py new file mode 100644 index 0000000..31fd5c2 --- /dev/null +++ b/gen_caption.py @@ -0,0 +1,96 @@ +import whisper +import time +from zhconv import convert # 简繁体转换 +import sys +import os + + +def seconds_to_hmsm(seconds): + """ + 输入一个秒数,输出为H:M:S:M时间格式 + @params: + seconds - Required : 秒 (float) + """ + hours = str(int(seconds // 3600)) + minutes = str(int((seconds % 3600) // 60)) + seconds = seconds % 60 + milliseconds = str(int(int((seconds - int(seconds)) * 1000))) # 毫秒留三位 + seconds = str(int(seconds)) + # 补0 + if len(hours) < 2: + hours = "0" + hours + if len(minutes) < 2: + minutes = "0" + minutes + if len(seconds) < 2: + seconds = "0" + seconds + if len(milliseconds) < 3: + milliseconds = "0" * (3 - len(milliseconds)) + milliseconds + return f"{hours}:{minutes}:{seconds},{milliseconds}" + + +def main(): + # 视频文件路径 + video_paths = [] + if len(sys.argv) >= 2: + video_paths.append(sys.argv[1]) + else: + files = [] + for dirpath, dirnames, filenames in os.walk("."): + for filename in filenames: + if filename.endswith(".mp4"): + files.append(os.path.join(dirpath, filename).replace("\\", "/")) + for i, f in enumerate(files): + print(f"[{i}]: ", f) + input_list = eval( + "[" + input("select a video file by input a num(split with ','): ") + "]" + ) + for i in input_list: + video_paths.append(files[i]) + print("selected video files:", video_paths) + models = [] + for model in whisper.available_models(): + if ".en" in model: + continue + print(f"[{len(models)}]: ", model) + models.append(model) + model_index = input("select a model by input a num(default 'base'): ") + try: + model_name = models[eval(model_index)] + except: + model_name = "base" + print("selected model:", model_name) + + for video_path in video_paths: + audio_path = video_path.replace("mp4", "m4a") + cmd = f'ffmpeg -i "{video_path}" -vn -ar {whisper.audio.SAMPLE_RATE} "{audio_path}"' + os.system(cmd) + + model = whisper.load_model(model_name, download_root="whisper_models/") + + start = time.time() + result = model.transcribe(audio_path, verbose=False, language="zh") + print("Time cost: ", time.time() - start) + + # 写入字幕文件 + with open(video_path.replace("mp4", "srt"), "w", encoding="utf-8") as f: + i = 1 + for r in result["segments"]: + f.write(str(i) + "\n") + f.write( + seconds_to_hmsm(float(r["start"])) + + " --> " + + seconds_to_hmsm(float(r["end"])) + + "\n" + ) + i += 1 + f.write( + convert(r["text"], "zh-cn") + "\n" + ) # 结果可能是繁体,转为简体zh-cn + f.write("\n") + + # 删除音频文件 + os.remove(audio_path) + + +if __name__ == "__main__": + main() diff --git a/hooks/hook-whisper.py b/hooks/hook-whisper.py new file mode 100644 index 0000000..66bb552 --- /dev/null +++ b/hooks/hook-whisper.py @@ -0,0 +1,3 @@ +from PyInstaller.utils.hooks import collect_data_files + +datas = collect_data_files("whisper") diff --git a/hooks/hook-zhconv.py b/hooks/hook-zhconv.py new file mode 100644 index 0000000..be7e3a9 --- /dev/null +++ b/hooks/hook-zhconv.py @@ -0,0 +1,3 @@ +from PyInstaller.utils.hooks import collect_data_files + +datas = collect_data_files("zhconv") diff --git a/main.py b/main.py index 012ab4d..38fad20 100644 --- a/main.py +++ b/main.py @@ -6,55 +6,82 @@ import json import os import cProfile -headers={ - 'Origin': 'https://www.yanhekt.cn', + +headers = { + "Origin": "https://www.yanhekt.cn", "xdomain-client": "web_user", - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 Edg/107.0.1418.26' + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 Edg/107.0.1418.26", } # courseID = 31425 def main(): if len(sys.argv) == 1: - courseID = eval(input('Please input course ID: ')) + courseID = eval(input("Please input course ID: ")) else: courseID = sys.argv[1] - course = requests.get(f'https://cbiz.yanhekt.cn/v1/course?id={courseID}&with_professor_badges=true', headers=headers) - req = requests.get(f'https://cbiz.yanhekt.cn/v2/course/session/list?course_id={courseID}', headers=headers) - if course.json()['code'] != '0' and course.json()['code'] != 0: - print(course.json()['code']) - print(course.json()['message']) - raise Exception("Please Check your course ID, note that it should be started with yanhekt.cn/course/***, not yanhekt.cn/session/***") - print(course.json()['data']['name_zh']) - videoList = req.json()['data'] + course = requests.get( + f"https://cbiz.yanhekt.cn/v1/course?id={courseID}&with_professor_badges=true", + headers=headers, + ) + req = requests.get( + f"https://cbiz.yanhekt.cn/v2/course/session/list?course_id={courseID}", + headers=headers, + ) + if course.json()["code"] != "0" and course.json()["code"] != 0: + print(course.json()["code"]) + print(course.json()["message"]) + raise Exception( + "Please Check your course ID, note that it should be started with yanhekt.cn/course/***, not yanhekt.cn/session/***" + ) + print(course.json()["data"]["name_zh"]) + videoList = req.json()["data"] # print(json.dumps(videoList, indent=2)) for i, c in enumerate(videoList): - print(i, ":", c['title']) + print(f"[{i}]: ", c["title"]) - index = eval('[' + input('select(split by \',\', such as: 0,2,4):') + ']') - vga = input('video(1) or screen(2)?(input 1 or 2, default video):') - if not os.path.exists('output/'): - os.mkdir('output/') + index = eval("[" + input("select(split by ',', such as: 0,2,4): ") + "]") + vga = input("video(1) or screen(2)?(input 1 or 2, default video):") + if not os.path.exists("output/"): + os.mkdir("output/") for i in index: c = videoList[i] - name = course.json()['data']['name_zh'].strip() + '-' + course.json()['data']['professors'][0]['name'] + '-' + c['title'] + name = ( + course.json()["data"]["name_zh"].strip() + + "-" + + course.json()["data"]["professors"][0]["name"] + + "-" + + c["title"] + ) print(name) if vga == "2": print("Downloading screen...") - m3u8dl.M3u8Download(c['videos'][0]['vga'], 'output/' + course.json()['data']['name_zh'].strip() + '-screen', name) + m3u8dl.M3u8Download( + c["videos"][0]["vga"], + "output/" + course.json()["data"]["name_zh"].strip() + "-screen", + name, + ) else: print("Downloading video...") - m3u8dl.M3u8Download(c['videos'][0]['main'], 'output/'+ course.json()['data']['name_zh'].strip() + '-video', name) + m3u8dl.M3u8Download( + c["videos"][0]["main"], + "output/" + course.json()["data"]["name_zh"].strip() + "-video", + name, + ) -if __name__ == '__main__': +if __name__ == "__main__": try: main() # cProfile.run('main()', 'output/profile.txt') except Exception as e: print(e) - print("If the problem is still not solved, you can report an issue in https://github.com/AuYang261/BIT_yanhe_download/issues.") + print( + "If the problem is still not solved, you can report an issue in https://github.com/AuYang261/BIT_yanhe_download/issues." + ) print("Or contact with the author xu_jyang@163.com. Thanks for your report!") - print("如果问题仍未解决,您可以在https://github.com/AuYang261/BIT_yanhe_download/issues 中报告问题。") + print( + "如果问题仍未解决,您可以在https://github.com/AuYang261/BIT_yanhe_download/issues 中报告问题。" + ) print("或者联系作者xu_jyang@163.com。感谢您的报告!") diff --git a/md/README/image-20230926124749421.png b/md/README/image-20230926124749421.png deleted file mode 100644 index 06bdf63..0000000 Binary files a/md/README/image-20230926124749421.png and /dev/null differ diff --git a/md/README/image-20230926124841432.png b/md/README/image-20230926124841432.png deleted file mode 100644 index 61baebf..0000000 Binary files a/md/README/image-20230926124841432.png and /dev/null differ diff --git a/md/README/image-20240409095211597.png b/md/README/image-20240409095211597.png new file mode 100644 index 0000000..4d89803 Binary files /dev/null and b/md/README/image-20240409095211597.png differ diff --git a/md/README/image-20240409095831766.png b/md/README/image-20240409095831766.png new file mode 100644 index 0000000..ae1e688 Binary files /dev/null and b/md/README/image-20240409095831766.png differ diff --git a/md/README/image-20240409103306945.png b/md/README/image-20240409103306945.png new file mode 100644 index 0000000..f097d11 Binary files /dev/null and b/md/README/image-20240409103306945.png differ diff --git a/md/README/image-20240409103338980.png b/md/README/image-20240409103338980.png new file mode 100644 index 0000000..9851bf6 Binary files /dev/null and b/md/README/image-20240409103338980.png differ diff --git a/md/README/image-20240409105228362.png b/md/README/image-20240409105228362.png new file mode 100644 index 0000000..210e35d Binary files /dev/null and b/md/README/image-20240409105228362.png differ diff --git a/md/README/image-20240409131033038.png b/md/README/image-20240409131033038.png new file mode 100644 index 0000000..7fa470d Binary files /dev/null and b/md/README/image-20240409131033038.png differ diff --git a/requirements-whisper.txt b/requirements-whisper.txt new file mode 100644 index 0000000..640ffc4 --- /dev/null +++ b/requirements-whisper.txt @@ -0,0 +1,2 @@ +openai-whisper +zhconv