Spaces:

CantonMonkey
/

VGG_based_multi_input_ASL_recognition

No application file

VGG_based_multi_input_ASL_recognition / data preprocessing.py

CantonMonkey

add app files

4db3489 8 months ago

4.46 kB

	import cv2
	import mediapipe as mp
	import numpy as np
	import os
	import random
	from tqdm import tqdm
	import shutil


	class HandProcessor:
	def __init__(self, min_detection_confidence=0.8, min_tracking_confidence=0.5):
	self.mpHands=mp.solutions.hands
	self.hands=self.mpHands.Hands(
	min_detection_confidence=min_detection_confidence,
	min_tracking_confidence=min_tracking_confidence
	)
	self.offset=10
	self.max_images_per_class=1400 # 1400 img for each class

	def get_bbox_coordinates(self, results, image_shape):
	"""Get bounding box coordinates for a hand landmark."""
	all_x, all_y=[], []
	if results.multi_hand_landmarks:
	for handLms in results.multi_hand_landmarks[0].landmark:
	all_x.append(int(handLms.x*image_shape[1]))
	all_y.append(int(handLms.y*image_shape[0]))

	if all_x and all_y:
	return min(all_x), min(all_y), max(all_x), max(all_y)
	return None

	def extract_landmarks(self, results):
	"""Extract normalized landmarks from detection results."""
	if results.multi_hand_landmarks:
	landmarks=[]
	for landmark in results.multi_hand_landmarks[0].landmark:
	landmarks.extend([landmark.x, landmark.y])
	return landmarks
	return None

	def process_image(self, img_path):
	"""Process a single image and return cropped image and landmarks."""
	img=cv2.imread(img_path)
	if img is None:
	return None, None

	imgRGB=cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
	results=self.hands.process(imgRGB)

	# get landmarks
	landmarks=self.extract_landmarks(results)
	if landmarks is None:
	return None, None

	# get bbox & crop
	bbox=self.get_bbox_coordinates(results, img.shape)
	if bbox is None:
	return None, None

	minX, minY, MaxX, MaxY=bbox
	h, w=img.shape[:2]
	minX=max(0, minX-self.offset)
	minY=max(0, minY-self.offset)
	MaxX=min(w, MaxX+self.offset)
	MaxY=min(h, MaxY+self.offset)

	imgCrop=img[minY:MaxY, minX:MaxX]
	if imgCrop.size==0:
	return None, None

	return imgCrop, landmarks

	def process_dataset(self, input_path, output_img_path, output_landmarks_path):
	"""Process entire dataset and save cropped images and landmarks."""
	os.makedirs(output_img_path, exist_ok=True)
	os.makedirs(output_landmarks_path, exist_ok=True)

	landmarks_dict={}

	for class_name in tqdm(os.listdir(input_path), desc="Processing classes"):
	input_class_path=os.path.join(input_path, class_name)
	if not os.path.isdir(input_class_path):
	continue

	output_class_path=os.path.join(output_img_path, class_name)
	os.makedirs(output_class_path, exist_ok=True)

	# img processing
	processed_count=0
	for img_name in os.listdir(input_class_path):
	if processed_count>=self.max_images_per_class:
	break

	input_img_path=os.path.join(input_class_path, img_name)
	cropped_img, landmarks=self.process_image(input_img_path)

	if cropped_img is not None and landmarks is not None:
	# export cropped image
	output_img_path_full=os.path.join(output_class_path, img_name)
	cv2.imwrite(output_img_path_full, cropped_img)

	# store landmarks
	relative_path=os.path.join(class_name, img_name)
	landmarks_dict[relative_path]=landmarks

	processed_count+=1

	# export landmarks
	np_path=os.path.join(output_landmarks_path, 'hand_landmarks.npz')
	np.savez_compressed(np_path, **landmarks_dict)

	print(f"Processed {len(landmarks_dict)} images")
	print(f"Cropped images saved to: {output_img_path}")
	print(f"Landmarks data saved to: {output_landmarks_path}")


	if __name__=="__main__":
	input_path="E:/ML_ASL_try/asl_alphabet_train/asl_alphabet_train/"
	output_img_path="E:/ML_ASL_try/new_cropped_images/"
	output_landmarks_path="E:/ML_ASL_try/new_landmarks_data/"

	processor=HandProcessor()
	processor.process_dataset(input_path, output_img_path, output_landmarks_path)