Spaces:

orpatashnik
/

NestedAttentionEncoder

Sleeping

App Files Files Community

NestedAttentionEncoder / utils.py

orpatashnik

remove dlib dependency

399e621 10 months ago

raw

history blame contribute delete

4.73 kB

	import numpy as np
	from PIL import Image
	import scipy.ndimage
	import insightface
	import torch
	import scipy

	# Initialize InsightFace model
	face_analyzer = insightface.app.FaceAnalysis(name='buffalo_l', providers=['CPUExecutionProvider'])
	face_analyzer.prepare(ctx_id=0)


	def image_grid(imgs, rows, cols):
	assert len(imgs) == rows*cols

	w, h = imgs[0].size
	grid = Image.new('RGB', size=(colsw, rowsh))
	grid_w, grid_h = grid.size

	for i, img in enumerate(imgs):
	grid.paste(img, box=(i%colsw, i//colsh))
	return grid


	def get_generator(seed, device):

	if seed is not None:
	if isinstance(seed, list):
	generator = [
	torch.Generator(device).manual_seed(seed_item) for seed_item in seed
	]
	else:
	generator = torch.Generator(device).manual_seed(seed)
	else:
	generator = None

	return generator

	def get_landmark_pil_insight(pil_image):
	"""Get 68 facial landmarks using InsightFace."""
	img_np = np.array(pil_image.convert("RGB"))
	faces = face_analyzer.get(img_np)
	if not faces:
	return None
	landmarks = faces[0].kps # shape: (5, 2) or (68, 2) depending on model

	if landmarks.shape[0] < 68:
	# InsightFace returns only 5 points: [left_eye, right_eye, nose, left_mouth, right_mouth]
	left_eye, right_eye, nose, left_mouth, right_mouth = landmarks
	# Approximate 68 landmarks (basic heuristic or fallback)
	return np.array([
	left_eye, right_eye, nose, left_mouth, right_mouth
	])
	return landmarks

	def align_face(pil_image):
	"""Align a face from a PIL.Image, returning an aligned PIL.Image of size 512x512."""
	lm = get_landmark_pil_insight(pil_image)
	if lm is None or lm.shape[0] < 5:
	return pil_image

	eye_left, eye_right = lm[0], lm[1]
	eye_avg = (eye_left + eye_right) * 0.5
	eye_to_eye = eye_right - eye_left
	mouth_left, mouth_right = lm[3], lm[4]
	mouth_avg = (mouth_left + mouth_right) * 0.5
	eye_to_mouth = mouth_avg - eye_avg

	# The rest is your original alignment logic
	x = eye_to_eye - np.flipud(eye_to_mouth) * [-1, 1]
	x /= np.hypot(*x)
	x = max(np.hypot(eye_to_eye) * 2.0, np.hypot(eye_to_mouth) 1.8)
	y = np.flipud(x) * [-1, 1]
	c = eye_avg + eye_to_mouth * 0.1
	quad = np.stack([c - x - y, c - x + y, c + x + y, c + x - y])
	qsize = np.hypot(x) 2

	img = pil_image.convert("RGB")
	transform_size = 512
	output_size = 512
	enable_padding = True

	shrink = int(np.floor(qsize / output_size * 0.5))
	if shrink > 1:
	rsize = (int(np.rint(img.size[0] / shrink)), int(np.rint(img.size[1] / shrink)))
	img = img.resize(rsize, Image.Resampling.LANCZOS)
	quad /= shrink
	qsize /= shrink

	border = max(int(np.rint(qsize * 0.1)), 3)
	crop = (
	int(np.floor(min(quad[:, 0]))),
	int(np.floor(min(quad[:, 1]))),
	int(np.ceil(max(quad[:, 0]))),
	int(np.ceil(max(quad[:, 1])))
	)
	crop = (
	max(crop[0] - border, 0),
	max(crop[1] - border, 0),
	min(crop[2] + border, img.size[0]),
	min(crop[3] + border, img.size[1])
	)
	if crop[2] - crop[0] < img.size[0] or crop[3] - crop[1] < img.size[1]:
	img = img.crop(crop)
	quad -= crop[:2]

	pad = (
	int(np.floor(min(quad[:, 0]))),
	int(np.floor(min(quad[:, 1]))),
	int(np.ceil(max(quad[:, 0]))),
	int(np.ceil(max(quad[:, 1])))
	)
	pad = (
	max(-pad[0] + border, 0),
	max(-pad[1] + border, 0),
	max(pad[2] - img.size[0] + border, 0),
	max(pad[3] - img.size[1] + border, 0)
	)
	if enable_padding and max(pad) > border - 4:
	pad = np.maximum(pad, int(np.rint(qsize * 0.3)))
	img = np.pad(np.float32(img), ((pad[1], pad[3]), (pad[0], pad[2]), (0, 0)), 'reflect')
	h, w, _ = img.shape
	y, x, _ = np.ogrid[:h, :w, :1]
	mask = np.maximum(
	1.0 - np.minimum(np.float32(x) / pad[0], np.float32(w - 1 - x) / pad[2]),
	1.0 - np.minimum(np.float32(y) / pad[1], np.float32(h - 1 - y) / pad[3])
	)
	blur = qsize * 0.02
	img += (scipy.ndimage.gaussian_filter(img, [blur, blur, 0]) - img) * np.clip(mask * 3.0 + 1.0, 0.0, 1.0)
	img += (np.median(img, axis=(0, 1)) - img) * np.clip(mask, 0.0, 1.0)
	img = Image.fromarray(np.uint8(np.clip(np.rint(img), 0, 255)), 'RGB')
	quad += pad[:2]

	img = img.transform((transform_size, transform_size), Image.QUAD, (quad + 0.5).flatten(), Image.BILINEAR)
	if output_size < transform_size:
	img = img.resize((output_size, output_size), Image.Resampling.LANCZOS)

	return img