Spaces:
No application file
No application file
| import cv2 | |
| import mediapipe as mp | |
| import numpy as np | |
| import os | |
| import random | |
| from tqdm import tqdm | |
| import shutil | |
| class HandProcessor: | |
| def __init__(self, min_detection_confidence=0.8, min_tracking_confidence=0.5): | |
| self.mpHands=mp.solutions.hands | |
| self.hands=self.mpHands.Hands( | |
| min_detection_confidence=min_detection_confidence, | |
| min_tracking_confidence=min_tracking_confidence | |
| ) | |
| self.offset=10 | |
| self.max_images_per_class=1400 # 1400 img for each class | |
| def get_bbox_coordinates(self, results, image_shape): | |
| """Get bounding box coordinates for a hand landmark.""" | |
| all_x, all_y=[], [] | |
| if results.multi_hand_landmarks: | |
| for handLms in results.multi_hand_landmarks[0].landmark: | |
| all_x.append(int(handLms.x*image_shape[1])) | |
| all_y.append(int(handLms.y*image_shape[0])) | |
| if all_x and all_y: | |
| return min(all_x), min(all_y), max(all_x), max(all_y) | |
| return None | |
| def extract_landmarks(self, results): | |
| """Extract normalized landmarks from detection results.""" | |
| if results.multi_hand_landmarks: | |
| landmarks=[] | |
| for landmark in results.multi_hand_landmarks[0].landmark: | |
| landmarks.extend([landmark.x, landmark.y]) | |
| return landmarks | |
| return None | |
| def process_image(self, img_path): | |
| """Process a single image and return cropped image and landmarks.""" | |
| img=cv2.imread(img_path) | |
| if img is None: | |
| return None, None | |
| imgRGB=cv2.cvtColor(img, cv2.COLOR_BGR2RGB) | |
| results=self.hands.process(imgRGB) | |
| # get landmarks | |
| landmarks=self.extract_landmarks(results) | |
| if landmarks is None: | |
| return None, None | |
| # get bbox & crop | |
| bbox=self.get_bbox_coordinates(results, img.shape) | |
| if bbox is None: | |
| return None, None | |
| minX, minY, MaxX, MaxY=bbox | |
| h, w=img.shape[:2] | |
| minX=max(0, minX-self.offset) | |
| minY=max(0, minY-self.offset) | |
| MaxX=min(w, MaxX+self.offset) | |
| MaxY=min(h, MaxY+self.offset) | |
| imgCrop=img[minY:MaxY, minX:MaxX] | |
| if imgCrop.size==0: | |
| return None, None | |
| return imgCrop, landmarks | |
| def process_dataset(self, input_path, output_img_path, output_landmarks_path): | |
| """Process entire dataset and save cropped images and landmarks.""" | |
| os.makedirs(output_img_path, exist_ok=True) | |
| os.makedirs(output_landmarks_path, exist_ok=True) | |
| landmarks_dict={} | |
| for class_name in tqdm(os.listdir(input_path), desc="Processing classes"): | |
| input_class_path=os.path.join(input_path, class_name) | |
| if not os.path.isdir(input_class_path): | |
| continue | |
| output_class_path=os.path.join(output_img_path, class_name) | |
| os.makedirs(output_class_path, exist_ok=True) | |
| # img processing | |
| processed_count=0 | |
| for img_name in os.listdir(input_class_path): | |
| if processed_count>=self.max_images_per_class: | |
| break | |
| input_img_path=os.path.join(input_class_path, img_name) | |
| cropped_img, landmarks=self.process_image(input_img_path) | |
| if cropped_img is not None and landmarks is not None: | |
| # export cropped image | |
| output_img_path_full=os.path.join(output_class_path, img_name) | |
| cv2.imwrite(output_img_path_full, cropped_img) | |
| # store landmarks | |
| relative_path=os.path.join(class_name, img_name) | |
| landmarks_dict[relative_path]=landmarks | |
| processed_count+=1 | |
| # export landmarks | |
| np_path=os.path.join(output_landmarks_path, 'hand_landmarks.npz') | |
| np.savez_compressed(np_path, **landmarks_dict) | |
| print(f"Processed {len(landmarks_dict)} images") | |
| print(f"Cropped images saved to: {output_img_path}") | |
| print(f"Landmarks data saved to: {output_landmarks_path}") | |
| if __name__=="__main__": | |
| input_path="E:/ML_ASL_try/asl_alphabet_train/asl_alphabet_train/" | |
| output_img_path="E:/ML_ASL_try/new_cropped_images/" | |
| output_landmarks_path="E:/ML_ASL_try/new_landmarks_data/" | |
| processor=HandProcessor() | |
| processor.process_dataset(input_path, output_img_path, output_landmarks_path) |