113 lines
5.0 KiB
Python
113 lines
5.0 KiB
Python
import easyocr
|
||
import os
|
||
import time
|
||
|
||
# --- 配置项 ---
|
||
BASE_IMAGE_DIR = '/Users/zpc01/workspace/zzlh/compliance/assets/images/'
|
||
LANGUAGES = ['ch_sim', 'en'] # 需要识别的语言
|
||
USE_GPU = True # 是否尝试使用 GPU (如果可用)
|
||
OUTPUT_DETAIL = 0 # 0: 只输出文本列表, 1: 输出详细信息 (坐标, 文本, 置信度)
|
||
|
||
# --- 初始化 EasyOCR Reader ---
|
||
print("正在加载 EasyOCR 模型... 这可能需要一些时间。")
|
||
try:
|
||
reader = easyocr.Reader(LANGUAGES, gpu=USE_GPU)
|
||
print("EasyOCR 模型加载成功。")
|
||
except Exception as e:
|
||
print(f"加载 EasyOCR 模型时出错: {e}")
|
||
print("请确保已正确安装 EasyOCR 及其依赖项,并且模型文件可下载或已存在于 ~/.EasyOCR/model 目录下。")
|
||
exit()
|
||
|
||
# --- 遍历基础目录下的所有子文件夹 ---
|
||
print(f"开始处理目录: {BASE_IMAGE_DIR}")
|
||
|
||
if not os.path.isdir(BASE_IMAGE_DIR):
|
||
print(f"错误:基础目录 '{BASE_IMAGE_DIR}' 不存在或不是一个有效的目录。")
|
||
exit()
|
||
|
||
try:
|
||
subfolders = [f for f in os.listdir(BASE_IMAGE_DIR) if os.path.isdir(os.path.join(BASE_IMAGE_DIR, f))]
|
||
except OSError as e:
|
||
print(f"错误:无法访问目录 '{BASE_IMAGE_DIR}'。请检查权限。 {e}")
|
||
exit()
|
||
|
||
if not subfolders:
|
||
print(f"在 '{BASE_IMAGE_DIR}' 下没有找到任何子文件夹。")
|
||
exit()
|
||
|
||
print(f"找到 {len(subfolders)} 个子文件夹,将逐一处理...")
|
||
|
||
# --- 逐个处理子文件夹 ---
|
||
for folder_name in subfolders:
|
||
subdir_path = os.path.join(BASE_IMAGE_DIR, folder_name)
|
||
print(f"\n--- 正在处理文件夹: {folder_name} ---")
|
||
|
||
all_texts_in_folder = [] # 用于存储当前文件夹所有图片的识别结果
|
||
png_filenames_sorted = [] # 用于存储排序后的 PNG 文件名
|
||
|
||
# --- 获取并排序当前子文件夹下的所有 png 文件 ---
|
||
try:
|
||
all_items_in_subdir = os.listdir(subdir_path)
|
||
# 筛选出所有 png 文件名
|
||
png_filenames = [
|
||
f for f in all_items_in_subdir
|
||
if f.lower().endswith('.png') and os.path.isfile(os.path.join(subdir_path, f))
|
||
]
|
||
# 按字典序(字母顺序)排序
|
||
png_filenames.sort() # sort() 方法直接在原列表上排序
|
||
png_filenames_sorted = png_filenames # 赋值给新变量(或者直接使用 png_filenames)
|
||
|
||
if not png_filenames_sorted:
|
||
print(f" 在文件夹 '{folder_name}' 中未找到 PNG 图片。")
|
||
continue # 跳到下一个文件夹
|
||
|
||
except OSError as e:
|
||
print(f" 错误:无法访问子文件夹 '{subdir_path}' 或读取其内容。跳过此文件夹。 {e}")
|
||
continue # 跳到下一个文件夹
|
||
|
||
print(f" 找到 {len(png_filenames_sorted)} 个 PNG 文件,将按字典序处理...")
|
||
|
||
# --- 按排序后的顺序处理 PNG 文件 ---
|
||
for filename in png_filenames_sorted:
|
||
image_path = os.path.join(subdir_path, filename)
|
||
# 在打印信息中可以体现出是按顺序处理的
|
||
print(f" 正在识别图片 (顺序: {png_filenames_sorted.index(filename) + 1}/{len(png_filenames_sorted)}): {filename} ...")
|
||
|
||
try:
|
||
start_time = time.time()
|
||
result = reader.readtext(image_path, detail=OUTPUT_DETAIL)
|
||
end_time = time.time()
|
||
|
||
if result:
|
||
if OUTPUT_DETAIL == 0:
|
||
all_texts_in_folder.extend(result)
|
||
print(f" 识别到 {len(result)} 段文本,耗时: {end_time - start_time:.2f} 秒")
|
||
else:
|
||
texts_only = [text for _, text, _ in result]
|
||
all_texts_in_folder.extend(texts_only)
|
||
print(f" 识别到 {len(texts_only)} 段文本 (详细模式),耗时: {end_time - start_time:.2f} 秒")
|
||
else:
|
||
print(" 未识别到文本。")
|
||
|
||
except Exception as e:
|
||
print(f" 处理图片 '{filename}' 时发生错误: {e}")
|
||
continue # 继续处理下一个文件
|
||
|
||
# --- 将当前文件夹所有识别结果写入文本文件 ---
|
||
if all_texts_in_folder: # 只有在确实识别到了文本时才写入
|
||
output_txt_filename = f"{folder_name}_easy.txt"
|
||
output_txt_path = os.path.join(subdir_path, output_txt_filename)
|
||
|
||
try:
|
||
with open(output_txt_path, 'w', encoding='utf-8') as f:
|
||
for text in all_texts_in_folder:
|
||
f.write(text + '\n')
|
||
print(f" 已将所有识别结果写入文件: {output_txt_path}")
|
||
except IOError as e:
|
||
print(f" 错误:无法写入输出文件 '{output_txt_path}'. {e}")
|
||
# 如果 png_filenames_sorted 不为空,但 all_texts_in_folder 为空,说明找到了图片但没识别出内容
|
||
elif png_filenames_sorted:
|
||
print(f" 文件夹 '{folder_name}' 中的 PNG 图片均未识别到文本,不创建 .txt 文件。")
|
||
# (如果 png_filenames_sorted 为空,前面已经打印过 "未找到 PNG 图片" 的信息)
|
||
|
||
print("\n--- 所有文件夹处理完毕 ---") |