关于一些整理图像及视频数据的代码块
关于一些整理图像及视频数据的代码块,包括统计文件夹里每个视频的时长输出表格,将多个数据文件夹的图片整理成表格展示,整理大于多少张的文件夹数据,统计多个文件夹的图片数据,对不规则的图像进行padding填充灰边
1.统计文件夹里每个视频的时长输出表格
需求:统计某个文件夹里所有视频的时长找到视频最多的时长区间
#-*- coding:utf-8 -*-
import os
import datetime
import sys
import argparse
from moviepy.editor import VideoFileClip
import pandas as pd
from itertools import groupby
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description='Compute Total Time of a Series of Videos')
parser.add_argument("--path", metavar="PATH", default="/data/sku_data/algorithm_dataset/30sku_test_20210811_same_sku/",
help="the root path of the videos(default: .)")
parser.add_argument("--type", metavar="TYPE", default=".mp4",
help="the type of the videos(default: .mp4)")
args = parser.parse_args()
filelist = []
for a, b, c in os.walk(args.path):
for name in c:
fname = os.path.join(a, name)
if fname.endswith(args.type):
filelist.append(fname)
ftime = 0.0
video_seconds=[]
for item in filelist:
clip = VideoFileClip(item)
video_name = item.split('/')[-1]
seconds = datetime.timedelta(seconds=clip.duration).total_seconds()
# video_seconds.append([video_name,seconds])
video_seconds.append(seconds)
#分段统计,每个60s
for k, g in groupby(sorted(video_seconds), key=lambda x: x // 60):
print('{}-{}: {}'.format(k*60, (k + 1) * 60, len(list(g))))
#具体的时长和文件名表格生成
# column_name = ['video_name', 'seconds_count']
# csv_name = 'video_seconds_counts.csv'
# xml_df = pd.DataFrame(video_seconds, columns=column_name)
# xml_df.to_csv(csv_name, index=None)
#统计总时长
# ftime += clip.duration
# print("%d seconds: " % ftime,str(datetime.timedelta(seconds=ftime)))
2.将多个数据文件夹的图片整理成表格展示
需求:有个excel表格有label及label_number列,根据label将底库数据抽取一张放在对应的行展示
import os
import xlsxwriter
from operator import itemgetter
import pandas as pd
path1="/data2/v6-data/new_gallery_11.xlsx"
df1=pd.read_excel(path1,sheet_name="Sheet1")
dict1=dict(zip(df1["label"],df1["label_number"]))
folder = '/data2/all_padding/'
def find_img_path(sku_folder):
images = []
for file in os.listdir(sku_folder):
if file.endswith('.jpg'):
path = os.path.join(sku_folder, file)
size = os.path.getsize(path)
images.append((file, size))
images = sorted(images, key=itemgetter(1), reverse=True)
img_path=os.path.join(sku_folder, images[0][0])
return img_path
img_path_all=[]
for filename in os.listdir(folder):
sku_folder=os.path.join(folder,filename)
if sku_folder.split("/")[-1]=="facebank.pth" or sku_folder.split("/")[-1]=="names.npy":
continue
img_path_all.append(find_img_path(sku_folder))
# print(sku_folder)
# print(find_img_path(sku_folder))
# print(img_path_all[0])
wb = xlsxwriter.Workbook('/data2/all_new_bj_clear/new_skuid_pic_.xls') #打开excel
pictureSheet = wb.add_worksheet("Sheet1")
pictureSheet.set_column('C:C', 16)
# pictureSheet.set_column('D:D', 16)
for i in range(0,len(img_path_all)):
print(img_path_all[i].split("/")[-2])
print(img_path_all[i].split("/")[-2],dict1[int(img_path_all[i].split("/")[-2])])
cell = 'A%d' % i
cell2 = 'B%d' % i
cell3 = 'C%d' % i
# cell4 = 'D%d' % i
pictureSheet.set_row(i, 95)
pictureSheet.write(cell, img_path_all[i].split("/")[-2])
pictureSheet.write(cell2, dict1[int(img_path_all[i].split("/")[-2])])
# tureSheet.insert_image(cell,img_path,{'x_offset': 5, 'y_offset': 1,'x_scale': 0.5, 'y_scale': 0.5})
pictureSheet.insert_image(cell3,img_path_all[i],{'x_offset': 5, 'y_offset': 1,'x_scale': 0.5, 'y_scale': 0.5}) # 指定A3格,x、y为缩放比例
# pictureSheet.insert_image(cell4,img_path_all[i],{'x_offset': 5, 'y_offset': 1,'x_scale': 0.5, 'y_scale': 0.5})
wb.close()
3.整理大于多少张的文件夹数据
需求:将文件夹里大于100张的文件夹进行移动汇总
import os
import shutil
path2='/data2/clear/'
path='/data2/crop/more500/'
# path3=''
count = []
for filename in os.listdir(path):
file_path = os.path.join(path, filename)
# print(file_path)
if os.path.isdir(file_path):
if len(os.listdir(file_path)) > 500:
count.append(filename)
# print(f"{file_path} is a directory")
else:
print(f"{file_path} is not a directory")
continue
print(len(count))
###移动文件
# result = [elem for elem in count1 if elem in count]
for i in count:
print(i)
older_path=os.path.join(path,i)
# # # # # # # # # shutil.rmtree(older_path)
new_path=os.path.join(path2,i)
print(older_path,new_path)
shutil.move(older_path,new_path)
4.统计多个文件夹的图片数据
需求:整理的检索数据,统计每个样本量
import os
from openpyxl import Workbook
folder_path = '/data2/v4-data/baojian_all_gallery/'
def count_images_in_folder(folder_path):
image_extensions = ['.jpg', '.jpeg', '.png', '.gif'] # 支持的图片格式
count = 0
for file_name in os.listdir(folder_path):
file_ext = os.path.splitext(file_name)[1].lower() # 获取文件扩展名
if file_ext in image_extensions:
count += 1
return count
def get_subfolder_paths(folder_path):
subfolder_paths = []
for subfolder_name in os.listdir(folder_path):
subfolder_path = os.path.join(folder_path, subfolder_name)
if os.path.isdir(subfolder_path):
subfolder_paths.append(subfolder_path)
return subfolder_paths
subfolder_paths = get_subfolder_paths(folder_path)
image_counts = []
for subfolder_path in subfolder_paths:
image_count = count_images_in_folder(subfolder_path)
image_counts.append((subfolder_path, image_count))
image_counts.sort(key=lambda x: x[1], reverse=True)
wb = Workbook()
ws = wb.active
ws.cell(row=1, column=1, value='文件夹名字')
ws.cell(row=1, column=2, value='图片数量')
for i, (subfolder_path, image_count) in enumerate(image_counts):
ws.cell(row=i+2, column=1, value=os.path.basename(subfolder_path))
ws.cell(row=i+2, column=2, value=image_count)
wb.save('/data2/gallery.xlsx')
5.对不规则的图像进行padding填充灰边
需求:图像归一化224*224
import os
import cv2
BLACK = [158,160,161]
INPUT_DIR = "/data2/all_new_bj_clear/all/"
OUTPUT_DIR = "/data2/all_new_bj_clear/all_padding/"
TARGET_SIZE = 224
os.makedirs(OUTPUT_DIR, exist_ok=True)
for root, dirs, files in os.walk(INPUT_DIR):
for filename in files:
try:
filename_ori = os.path.join(root, filename)
print(filename_ori)
image = cv2.imread(filename_ori)
if image is None:
print(f"Failed to read image: {filename_ori}")
continue
height, width, _ = image.shape
r0 = height / TARGET_SIZE
r1 = width / TARGET_SIZE
if r0 > r1:
width = int(width / r0)
height = TARGET_SIZE
else:
width = TARGET_SIZE
height = int(height / r1)
res = cv2.resize(image, (width, height), interpolation=cv2.INTER_CUBIC)
pading_h = int((TARGET_SIZE - height) / 2)
pading_w = int((TARGET_SIZE - width) / 2)
constant = cv2.copyMakeBorder(res, pading_h, pading_h, pading_w, pading_w, cv2.BORDER_CONSTANT, value=BLACK)
output_root = root.replace(INPUT_DIR, OUTPUT_DIR)
os.makedirs(output_root, exist_ok=True)
out_filename = os.path.join(output_root, filename)
cv2.imwrite(out_filename, constant)
print(f"Saved: {out_filename}")
except Exception as e:
print(f"Error processing {filename}: {e}")