python 多线程压缩指定文件夹
""" python 多线程压缩指定文件夹 """ # -*- coding:utf-8 -*- import shutil import time from concurrent.futures import ThreadPoolExecutor def compress_folder(folder_path, output_path): max_workers = 30 # 设置线程池的大小为4 with ThreadPoolExecutor(max_workers=max_workers) as executor: executor.submit(shutil.make_archive, output_path, 'zip', folder_path) if __name__ == '__main__': stime = time.time() # 调用示例: folder_path = r"E:\w\desktop" output_path = r'D:\desktop.zip' compress_folder(folder_path, output_path) etime = time.time() print(f'耗时{etime-stime}')
此代码适合不是非常大的文件,压缩操作将所有文件先读入内存,然后再进行压缩。如果文件较大或文件数量较多,可能会导致内存消耗过高的问题。
如果是比较大文件,和上面的区别是可以自定义多个文件一个压缩包,多个压缩包进行压缩可以采用以下代码:
""" python 多线程压缩指定文件夹 """ import os import zipfile from concurrent.futures import ThreadPoolExecutor import time def compress_files(file_list, zip_file_path): with zipfile.ZipFile(zip_file_path, "w", zipfile.ZIP_DEFLATED) as zip_file: for file in file_list: zip_file.write(file) print(f"Files compressed to: {zip_file_path}") def split_and_compress_folder(folder_path, files_per_zip, num_threads): # 获取文件夹中的所有文件 file_list = [] for root, dirs, files in os.walk(folder_path): for file in files: file_path = os.path.join(root, file) file_list.append(file_path) # 分割文件列表为多个子列表 split_files = [file_list[i:i + files_per_zip] for i in range(0, len(file_list), files_per_zip)] with ThreadPoolExecutor(max_workers=num_threads) as executor: futures = [] for i, files in enumerate(split_files): folder_name = os.path.basename(folder_path) zip_file_name = f"{folder_name}_part{i + 1}.zip" zip_file_path = os.path.join(folder_path, zip_file_name) future = executor.submit(compress_files, files, zip_file_path) futures.append(future) # 等待所有压缩任务完成 for future in futures: future.result() if __name__ == '__main__': stime = time.time() folder_path = r"E:\w\upload" # 替换为实际的文件夹路径 files_per_zip = 10000 # 每个分卷文件的文件数量 num_threads = 60 # 并发线程数 split_and_compress_folder(folder_path, files_per_zip, num_threads) etime = time.time() print(f'耗时{etime - stime}S , {(etime - stime) / 3600}')
以下是改进后的代码示例,实现了边读取文件边进行压缩,以减少内存占用:
import os import zipfile from concurrent.futures import ThreadPoolExecutor import time #一次性将所有文件读入内存进行压缩 def compress_files(file_list, zip_file_path): with zipfile.ZipFile(zip_file_path, "w", zipfile.ZIP_DEFLATED) as zip_file: for file in file_list: zip_file.write(file) print(f"Files compressed to: {zip_file_path}") # 每读取一个文件,就立即将其写入压缩文件,不再一次性将所有文件读入内存 通过计算文件总数和每个压缩文件应包含的文件数量,对文件列表进行切片,并逐个生成分卷压缩文件。 def compress_files1(file_list, zip_file_path): with zipfile.ZipFile(zip_file_path, "w", zipfile.ZIP_DEFLATED) as zip_file: for file_path in file_list: file_name = os.path.basename(file_path) zip_file.write(file_path, arcname=file_name) print(f"Files compressed to: {zip_file_path}") def split_and_compress_folder(folder_path, files_per_zip, num_threads): # 获取文件夹中的所有文件 file_list = [] for root, dirs, files in os.walk(folder_path): for file in files: file_path = os.path.join(root, file) file_list.append(file_path) # 分割文件列表为多个子列表 split_files = [file_list[i:i + files_per_zip] for i in range(0, len(file_list), files_per_zip)] with ThreadPoolExecutor(max_workers=num_threads) as executor: futures = [] for i, files in enumerate(split_files): folder_name = os.path.basename(folder_path) zip_file_name = f"{folder_name}_part{i+1}.zip" zip_file_path = os.path.join(folder_path, zip_file_name) # future = executor.submit(compress_files, files, zip_file_path) future = executor.submit(compress_files1, files, zip_file_path) futures.append(future) # 等待所有压缩任务完成 for future in futures: future.result() if __name__ == '__main__': stime = time.time() folder_path = r"E:\w\res" # 替换为实际的文件夹路径 files_per_zip = 10000 # 每个分卷文件的文件数量 num_threads = 60 # 并发线程数 split_and_compress_folder(folder_path, files_per_zip, num_threads) etime = time.time() print(f'耗时{etime-stime}S , {(etime-stime)/3600}')
改进后的代码在 compress_files
函数中,每读取一个文件,就立即将其写入压缩文件,不再一次性将所有文件读入内存。通过计算文件总数和每个压缩文件应包含的文件数量,对文件列表进行切片,并逐个生成分卷压缩文件。
请注意,以上代码仅用于说明概念,实际使用时还需要添加适当的错误处理和异常处理机制。另外,根据压缩文件的大小以及系统资源,可能需要调整 files_per_zip
的值,以平衡压缩速度和内存消耗。