一直在爱下电子书下些爽书看,不知道什么时候开始屏蔽了大陆的访问,然后下载也只有繁体,于是有了下面的代码。这段代码来自网络,无法确定出处,贴上来纯粹是把环境交代下,如果你有使用到可以参考。
Python 版本:3.9
Opencc库:pip install opencc-python-reimplemented==0.1.4
# Copyright 2022 ljmold.cn All rights reserved
#
# Authors: Carry Jin
# 将电子书中的繁体中文转换为简体中文
import opencc
import os
import zipfile
import shutil
def test_convert():
"""
s2t.json Simplified Chinese to Traditional Chinese 簡體到繁體
t2s.json Traditional Chinese to Simplified Chinese 繁體到簡體
"""
converter = opencc.OpenCC('s2t.json')
print(converter.convert('汉字acb')) # 漢字
converter = opencc.OpenCC('t2s.json')
print(converter.convert('漢字123')) # 漢字
def unzip_dir(zipfilename, unzipdirname):
"""解压zip文件"""
fullzipfilename = os.path.abspath(zipfilename)
fullunzipdirname = os.path.abspath(unzipdirname)
print("Start to unzip file %s to folder %s ..." % (zipfilename, unzipdirname))
# Check input ...
if not os.path.exists(fullzipfilename):
print("Dir/File %s is not exist, Press any key to quit..." % fullzipfilename)
inputStr = input()
return
if not os.path.exists(fullunzipdirname):
os.mkdir(fullunzipdirname)
else:
if os.path.isfile(fullunzipdirname):
print("File %s is exist, are you sure to delet it first ? [Y/N]" % fullunzipdirname)
while 1:
inputStr = input()
if inputStr == "N" or inputStr == "n":
return
else:
if inputStr == "Y" or inputStr == "y":
os.remove(fullunzipdirname)
print("Continue to unzip files ...")
break
# Start extract files ...
srcZip = zipfile.ZipFile(fullzipfilename, "r")
for eachfile in srcZip.namelist():
if eachfile.endswith('/'):
# is a directory
print('Unzip directory %s ...' % eachfilename)
os.makedirs(os.path.normpath(os.path.join(fullunzipdirname, eachfile)))
continue
print("Unzip file %s ..." % eachfile)
eachfilename = os.path.normpath(os.path.join(fullunzipdirname, eachfile))
eachdirname = os.path.dirname(eachfilename)
if not os.path.exists(eachdirname):
os.makedirs(eachdirname)
fd = open(eachfilename, "wb")
fd.write(srcZip.read(eachfile))
fd.close()
srcZip.close()
print("Unzip file succeed!")
def zip_dir(dirname,zipfilename):
filelist = []
if os.path.isfile(dirname):
filelist.append(dirname)
else :
for root, dirs, files in os.walk(dirname):
for dir in dirs:
filelist.append(os.path.join(root,dir))
for name in files:
filelist.append(os.path.join(root, name))
zf = zipfile.ZipFile(zipfilename, "w", zipfile.zlib.DEFLATED)
for tar in filelist:
arcname = tar[len(dirname):]
#print arcname
zf.write(tar,arcname)
zf.close()
def convert_file_to_chinese(file_path):
"""按行读取文件,存储到list集合中,转换元素的语言(繁体->简体),将结果写回到文件中"""
file_lines = []
converter = opencc.OpenCC('t2s')
with open(file_path, mode='r', encoding='utf-8') as f:
for line in f.readlines():
file_lines.append(converter.convert(line))
with open(file_path, mode='w', encoding='utf-8') as f:
f.writelines(file_lines)
def convert_epub_simplified(file_path):
"""将epub文件转繁体换成简体"""
if not (os.path.exists(file_path) or os.path.isfile(file_path)):
raise Exception("请检查文件路径:{}", file_path)
dir_name, file_name = os.path.split(file_path)
unzip_dir_path = dir_name + "/unzip"
unzip_dir(epub_file_path, unzip_dir_path)
files = find_content_files(unzip_dir_path)
for file in files:
convert_file_to_chinese(file)
new_file_name = file_name[0:file_name.rindex(".")] + "-简体.epub"
new_epub_file_path = os.path.join(dir_name, new_file_name)
zip_dir(unzip_dir_path, new_epub_file_path)
# os.remove(unzip_dir_path)
shutil.rmtree(unzip_dir_path)
def find_content_files(folder_path):
"""查询文件夹中所有需要修改的文件的路径,返回路径的列表
只转换epub文件的内容,搜索后缀为 'xhtml' 的文件
"""
result_files = []
for root, dirs, files in os.walk(folder_path):
# for dir in dirs:
# filelist.append(os.path.join(root, dir))
for name in files:
if name.endswith('xhtml'):
result_files.append(os.path.join(root, name))
return result_files
if __name__ == '__main__':
# 测试繁体简体转换
# test_convert()
epub_file_path = "/Users/jinzhao/Downloads/天字第一当.epub"
convert_epub_simplified(epub_file_path)