Spaces:
Build error
Build error
| import numpy as np | |
| from tensorflow.keras.applications import ResNet50 | |
| from tensorflow.keras.preprocessing import image | |
| from tensorflow.keras.applications.resnet50 import preprocess_input | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| import os | |
| # Load the pre-trained ResNet50 model | |
| model = ResNet50(weights='imagenet', include_top=False, pooling='avg') | |
| # Function to extract feature vector from an image | |
| def extract_features(img_path, model): | |
| img = image.load_img(img_path, target_size=(224, 224)) | |
| img_data = image.img_to_array(img) | |
| img_data = np.expand_dims(img_data, axis=0) | |
| img_data = preprocess_input(img_data) | |
| features = model.predict(img_data) | |
| return features.flatten() | |
| # Directory containing images | |
| image_dir = './forward_facing' | |
| # Extract features for all images | |
| image_features = {} | |
| for img_file in os.listdir(image_dir): | |
| img_path = os.path.join(image_dir, img_file) | |
| features = extract_features(img_path, model) | |
| image_features[img_file] = features | |
| # Convert feature dictionary to list for processing | |
| feature_list = list(image_features.values()) | |
| file_list = list(image_features.keys()) | |
| # Calculate similarities | |
| num_images = len(file_list) | |
| similarity_matrix = np.zeros((num_images, num_images)) | |
| for i in range(num_images): | |
| for j in range(i, num_images): | |
| if i != j: | |
| similarity = cosine_similarity( | |
| [feature_list[i]], | |
| [feature_list[j]] | |
| )[0][0] | |
| similarity_matrix[i][j] = similarity | |
| similarity_matrix[j][i] = similarity | |
| # Identify and remove duplicates | |
| threshold = 0.9 # Similarity threshold for duplicates | |
| duplicates = set() | |
| for i in range(num_images): | |
| for j in range(i + 1, num_images): | |
| if similarity_matrix[i][j] > threshold: | |
| duplicates.add(file_list[j]) | |
| # Remove duplicates | |
| # for duplicate in duplicates: | |
| # os.remove(os.path.join(image_dir, duplicate)) | |
| print("Duplicate Images No => ", len(duplicates)) | |
| # print(f"Removed {len(duplicates)} duplicate images.") | |