当前位置: 首页 > article >正文

OCR实现微信截图改名

pip install paddlepaddle -i https://pypi.tuna.tsinghua.edu.cn/simple/                                                                                   ──(Sat,Nov30)─┘
pip install shapely -i https://pypi.tuna.tsinghua.edu.cn/simple/
pip install paddleocr -i https://pypi.tuna.tsinghua.edu.cn/simple/
pip install easyocr 
import easyocr
import os
import cv2
import time
from paddleocr import PaddleOCR

def get_photo_paths(photos_folder_path):
    return [ f"{photos_folder_path}{os.sep}{photo_name}" for photo_name in os.listdir(photos_folder_path) if ".PNG" in photo_name]


def preprocess_image(image):
    # 转为灰度图
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # 二值化
    _, binary = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    # 去噪声
    denoised = cv2.medianBlur(binary, 3)
    return denoised

def image_cutting(image_path
                  ,up_ratio=0.03
                  ,down_ratio=0.13):
    image = cv2.imread(image_path)
    # 计算切割的宽度和高度
    width = int(image.shape[1] * 1)
    down = int(image.shape[0] * down_ratio)
    up = int(image.shape[0] * up_ratio)
    # 切割图片
    cropped_image = image[up:down, :width]
    # 显示切割后的图片
    # cv2.imshow('Cropped Image', cropped_image)
    preprocessed_image = preprocess_image(cropped_image)
    # plt.imshow(preprocessed_image)
    return preprocessed_image


def get_key_fields_from_easyorc(image,keyword=''):
    res = Ereader.readtext(image)
    texts = []
    if keyword != '':
        for fields in res:
            boundaries = fields[0] ## 边界坐标
            text = fields[1] ## 文字内容
            if keyword in text:
                return {True:text}
            else:
                texts.append(text)
        return {False:texts}
    else:
        text = res[0][1]
        return text 
    

def get_key_fields_from_PaddleOCR(image,keyword=''):
    res = Preader.ocr(image, cls=True)[0]
    texts = []
    if keyword != '':
        for fields in res:
            boundaries = fields[0] ## 边界坐标
            text = fields[1][0] ## 文字内容
            if keyword in text:
                return {True:text}
            else:
                texts.append(text)
        return {False:texts}
    else:
        text = res[0][1][0]
        return {True:text} 
    # res = Preader.ocr(image, cls=True)[0]
    # texts = []
    # for fields in res:
    #     boundaries = fields[0] ## 边界坐标
    #     text = fields[1][0] ## 文字内容
    #     if keyword in text:
    #         return {True:text}
    #     else:
    #         texts.append(text)
    # return {False:texts}

def time_counter(begin_time, end_time):
    # 根据传入的时间计算,通过run_time.round()函数取整
    runtime = round(end_time - begin_time)
    # 计算时分秒
    hour = runtime // 3600
    minute = (runtime - 3600 * hour) // 60
    second = runtime - 3600 * hour - 60 * minute
    # 输出
    return f'用时:{hour}小时{minute}分钟{second}秒'

def change_wechat_photo_name_logic(photos_folder_path
                                   ,keyword = '22级实习-'
                                   ,up_ratio=0.05
                                   ,down_ratio=0.13
                                   ,model_name = "paddleorc"):
    

    if model_name == "paddleorc":
        global Preader
        Preader = PaddleOCR(use_angle_cls=True,det=False, lang="ch")
    elif model_name == "easyorc":
        global Ereader
        Ereader = easyocr.Reader(['ch_sim','en']) # 只需要运行一次就可以将模型加载到内存中
    photo_paths = get_photo_paths(photos_folder_path)
    m = len(photo_paths)
    t = 0
    start_time = time.time()
    for photo_path in photo_paths:
    
        start_time_of_each_step = time.time()
        try:
            preprocessed_image = image_cutting(photo_path,up_ratio,down_ratio)
       
            if model_name == "easyorc":
                wechat_name = get_key_fields_from_easyorc(preprocessed_image,keyword) 
            elif model_name == "paddleorc":
                wechat_name = get_key_fields_from_PaddleOCR(preprocessed_image,keyword)
                
            if keyword != "":     
                split_wechate_name = wechat_name[True].split('-')
                friend_name = split_wechate_name[-1]
            else:
                friend_name = wechat_name[True]
            
            # folder_path = os.path.split(photo_path)[0]
            new_photo_path = os.path.join(photos_folder_path,f"{friend_name}.PNG")
            os.rename(photo_path,new_photo_path)
            t += 1
            end_time_of_each_step = time.time()
            time_for_this_time = time_counter(start_time_of_each_step,end_time_of_each_step)
            print(f"当前好友名为{friend_name},是第{t}个,完成{t/m*100}%,{time_for_this_time}")
        except Exception as e:
            print(f"\033[31m当前文件为:【{photo_path}】\n,错误:{e}\033[0m")
            # print(f"当前好友名为{friend_name},错误:{e}")
    print(f"总计{time_counter(start_time,end_time_of_each_step)},完成{t}个")    

if __name__ == '__main__':
    photos_folder_path = "/Users/magu/Downloads/淮职课程准备/1就业材料/就业实习留存材料/回访截图/张晶实习回访(11月1日-11月30)"
    change_wechat_photo_name_logic(photos_folder_path,keyword = '22级实习-',up_ratio=0.05,down_ratio=0.101)
    #change_wechat_photo_name_logic(photos_folder_path,keyword = '',up_ratio=0.05,down_ratio=0.10)


    




http://www.kler.cn/a/418852.html

相关文章:

  • 深度学习基础2
  • DataX实战|使用Python 构建简易的DataX数据血缘工具(一)
  • Mybatis 支持延迟加载的详细内容
  • 牛客--最长回文子串
  • Vue 3 中实现页面特定功能控制
  • switch、if...else、while、do....while、for如何实现多分支、双分支、单分支的循环语句
  • 新版 Navicat Premium 17 安装教程 (亲测可用)
  • spring-事务管理
  • JUC并发编程详解
  • 联表查询,外键
  • Spark SQL数据加载、存储概述
  • git 上传代码时报错
  • Python Web 框架
  • Proxy详解
  • Oracle12.2 RAC集群管理修改IP地址(DNS解析)
  • 洛谷 P1558 色板游戏(线段树)
  • 管理表空间和数据文件(二)
  • 江协科技最新OLED保姆级移植hal库
  • 阅文集团大数据面试题及参考答案
  • qt 的udp发送和接收
  • Vue SSR基础介绍与实践
  • Pycharm使用Jupyterlab报错:Jupyter command `jupyter-notebook` not found
  • 计算机毕业设计Python深度学习游戏推荐系统 Django PySpark游戏可视化 游戏数据分析 游戏爬虫 Scrapy 机器学习 人工智能 大数据毕设
  • AI 编译器学习笔记之十三 -- Pytorch 特性实现
  • [golang][MAC]Go环境搭建+VsCode配置
  • 设计模式学习[10]---迪米特法则+外观模式