Commit 345bc657 authored by longtng's avatar longtng

add files

parent bf89ddf6
File added
./DS_Store
data/aligned_face
data/cluster
data/output_video
data/raw
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from scipy import misc
import sys
import os
import tensorflow as tf
import numpy as np
import utils.facenet as facenet
import align.detect_face as detect_face
import random
from time import sleep
import argparse
def main():
args = parse_args()
datadir = args.input_dir
output_dir_path = args.output_path
#minsize = 96 # minimum size of face
minsize = args.minsize
threshold = [0.6, 0.7, 0.7] # three steps's threshold
factor = 0.709 # scale factor
margin = 44
image_size = 182
output_dir = os.path.expanduser(output_dir_path)
if not os.path.exists(output_dir):
os.makedirs(output_dir)
dataset = facenet.get_dataset(datadir)
print('Creating networks and loading parameters')
with tf.Graph().as_default():
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
project_dir = os.path.dirname(os.path.abspath(__file__))
with sess.as_default():
pnet, rnet, onet = detect_face.create_mtcnn(sess, os.path.join(project_dir, "align"))
# Add a random key to the filename to allow alignment using multiple processes
random_key = np.random.randint(0, high=99999)
bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key)
print('Goodluck')
with open(bounding_boxes_filename, "w") as text_file:
nrof_images_total = 0
nrof_successfully_aligned = 0
for cls in dataset:
output_class_dir = os.path.join(output_dir, cls.name)
if not os.path.exists(output_class_dir):
os.makedirs(output_class_dir)
for image_path in cls.image_paths:
nrof_images_total += 1
filename = os.path.splitext(os.path.split(image_path)[1])[0]
output_filename = os.path.join(output_class_dir, filename + '.png')
print(image_path)
if not os.path.exists(output_filename):
try:
img = misc.imread(image_path,flatten=True)
print('read data dimension: ', img.ndim)
except (IOError, ValueError, IndexError) as e:
errorMessage = '{}: {}'.format(image_path, e)
print(errorMessage)
else:
if img.ndim < 2:
print('Unable to align "%s"' % image_path)
text_file.write('%s\n' % (output_filename))
continue
if img.ndim == 2:
img = facenet.to_rgb(img)
print('to_rgb data dimension: ', img.ndim)
img = img[:, :, 0:3]
print('after data dimension: ', img.ndim)
bounding_boxes, _ = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor)
nrof_faces = bounding_boxes.shape[0]
print('detected_face: %d' % nrof_faces)
if nrof_faces > 0:
det = bounding_boxes[:, 0:4]
img_size = np.asarray(img.shape)[0:2]
if nrof_faces > 1:
bounding_box_size = (det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1])
img_center = img_size / 2
offsets = np.vstack([(det[:, 0] + det[:, 2]) / 2 - img_center[1],
(det[:, 1] + det[:, 3]) / 2 - img_center[0]])
offset_dist_squared = np.sum(np.power(offsets, 2.0), 0)
index = np.argmax(bounding_box_size - offset_dist_squared * 2.0) # some extra weight on the centering
det = det[index, :]
det = np.squeeze(det)
bb_temp = np.zeros(4, dtype=np.int32)
bb_temp[0] = det[0]
bb_temp[1] = det[1]
bb_temp[2] = det[2]
bb_temp[3] = det[3]
cropped_temp = img[bb_temp[1]:bb_temp[3], bb_temp[0]:bb_temp[2], :]
scaled_temp = misc.imresize(cropped_temp, (image_size, image_size), interp='bilinear')
nrof_successfully_aligned += 1
misc.imsave(output_filename, scaled_temp)
text_file.write('%s %d %d %d %d\n' % (output_filename, bb_temp[0], bb_temp[1], bb_temp[2], bb_temp[3]))
else:
print('Unable to align "%s"' % image_path)
text_file.write('%s\n' % (output_filename))
print('Total number of images: %d' % nrof_images_total)
print('Number of successfully aligned images: %d' % nrof_successfully_aligned)
def parse_args():
"""Parse input arguments."""
parser = argparse.ArgumentParser()
parser.add_argument("--input_dir", type=str,
help='Path to the data directory containing images.',
default = "data/raw/lfw/" )
parser.add_argument('--output_path', type=str,
help='Path to save aligned face',
default="data/aligned_face/clean_data_lfw")
parser.add_argument('--minsize', type=int,
help='minimum size of face ', default=96)
args = parser.parse_args()
return args
if __name__ == '__main__':
main()
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
import numpy as np
import argparse
import utils.facenet as facenet
import align.detect_face as detect_face
import os
import sys
import math
import pickle
import csv
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
def main():
args = parse_args()
with tf.Graph().as_default():
with tf.Session() as sess:
random_key = np.random.randint(0, high=99999)
datadir = args.input_dir
embeddingdir = "data/embedding/"
modeldir = args.model_path
dataset = facenet.get_dataset(datadir)
paths, labels = facenet.get_image_paths_and_labels(dataset)
# # Create a list of class names
class_names = [cls.name.replace('_', ' ') for cls in dataset]
label_name = [class_names[i] for i in labels]
print('Number of classes: {}'.format(len(dataset)))
print('Number of images: {}'.format(len(paths)))
print('Loading feature extraction model')
facenet.load_model(modeldir)
images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
embedding_size = embeddings.get_shape()[1]
print(embedding_size)
# Run forward pass to calculate embeddings
print('Calculating features for images')
batch_size = 1000
image_size = 160
nrof_images = len(paths)
nrof_batches_per_epoch = int(math.ceil(1.0 * nrof_images / batch_size))
emb_array = np.zeros((nrof_images, embedding_size))
for i in range(nrof_batches_per_epoch):
print('{}/{}'.format(i,nrof_batches_per_epoch))
start_index = i * batch_size
end_index = min((i + 1) * batch_size, nrof_images)
paths_batch = paths[start_index:end_index]
images = facenet.load_data(paths_batch, False, False, image_size)
feed_dict = {images_placeholder: images, phase_train_placeholder: False}
emb_array[start_index:end_index, :] = sess.run(embeddings, feed_dict=feed_dict)
#store embedding and labels
np.savetxt(embeddingdir+'embedding.csv', emb_array, delimiter=",")
with open(embeddingdir+'label.csv', 'w') as f:
writer = csv.writer(f)
writer.writerows(zip(labels, label_name))
classifier_filename = 'classifier/classifier_1NN_grayscale{}.pkl'.format(random_key)
classifier_filename_exp = os.path.expanduser(classifier_filename)
print(emb_array.shape)
#Train classifier
print('Start training classifier')
model = KNeighborsClassifier(n_neighbors=1)
model.fit(emb_array, labels)
print('End training classifier')
# # Saving classifier model
with open(classifier_filename_exp, 'wb') as outfile:
pickle.dump((model, class_names), outfile)
print('Saved classifier model to file "%s"' % classifier_filename_exp)
print('Goodluck')
def parse_args():
"""Parse input arguments."""
parser = argparse.ArgumentParser()
parser.add_argument("--input_dir", type=str,
help='Path to the data directory containing images.',
default = "data/aligned_face/clean_data_lfw" )
parser.add_argument('--model_path', type=str,
help='Path to embedding model',
default="model/20180402-114759.pb")
args = parser.parse_args()
return args
if __name__ == '__main__':
main()
\ No newline at end of file
This diff is collapsed.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from scipy import misc
import cv2
import numpy as np
import argparse
import utils.facenet as facenet
import align.detect_face as detect_face
import os
from os.path import join as pjoin
import sys
import time
import copy
import pickle
from sklearn.svm import SVC
from sklearn.externals import joblib
def main():
args = parse_args()
print('Creating networks and loading parameters')
with tf.Graph().as_default():
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
with sess.as_default():
random_key = np.random.randint(0, high=999999)
project_dir = os.path.dirname(os.path.abspath(__file__))
pnet, rnet, onet = detect_face.create_mtcnn(sess, os.path.join(project_dir, "align"))
HumanNames = ['BrigitteBardot','CharlesDeGaulle','ElisabethReine','JeanPaulBelmondo'] #train human name
minsize = args.minsize # minimum size of face
threshold = [0.6, 0.7, 0.7] # three steps's threshold
factor = 0.709 # scale factor
margin = 44
frame_interval = 1
batch_size = 1000
image_size = 182
input_image_size = 160
print('Loading feature extraction model')
modeldir = args.model_path
facenet.load_model(modeldir)
images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
embedding_size = embeddings.get_shape()[1]
classifier_filename = args.classifer_path
bounding_boxes_filename = os.path.join(args.output_label_path, 'bounding_boxes_%05d.txt' % random_key)
classifier_filename_exp = os.path.expanduser(classifier_filename)
with open(classifier_filename_exp, 'rb') as infile:
(model, class_names) = pickle.load(infile)
print('load classifier file-> %s' % classifier_filename_exp)
video_path=args.video_path
video_capture = cv2.VideoCapture(video_path)
c = 0
with open(bounding_boxes_filename, "w") as text_file:
print('Start Recognition!')
number_face_count=0
while video_capture.isOpened():
ret, frame = video_capture.read()
if not ret:
cv2.destroyAllWindows()
video_capture.release()
break
#frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5) #resize frame (optional)
store_frame =False
if frame.ndim == 2:
frame = facenet.to_rgb(frame)
frame = frame[:, :, 0:3]
bounding_boxes, _ = detect_face.detect_face(frame, minsize, pnet, rnet, onet, threshold, factor)
nrof_faces = bounding_boxes.shape[0]
print('Detected_FaceNum: %d' % nrof_faces)
if nrof_faces > 0:
det = bounding_boxes[:, 0:4]
img_size = np.asarray(frame.shape)[0:2]
bb = np.zeros((nrof_faces,4), dtype=np.int32)
for i in range(nrof_faces):
number_face_count += 1
emb_array = np.zeros((1, embedding_size))
bb[i][0] = det[i][0]
bb[i][1] = det[i][1]
bb[i][2] = det[i][2]
bb[i][3] = det[i][3]
# inner exception
if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(frame[0]) or bb[i][3] >= len(frame):
print('face is inner of range!')
continue
cropped=frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]
cropped = facenet.flip(cropped, False)
scaled = misc.imresize(cropped, (image_size, image_size), interp='bilinear')
scaled = cv2.resize(scaled, (input_image_size,input_image_size),
interpolation=cv2.INTER_CUBIC)
scaled = facenet.prewhiten(scaled)
scaled_reshape = (scaled.reshape(-1,input_image_size,input_image_size,3))
feed_dict = {images_placeholder: scaled_reshape, phase_train_placeholder: False}
emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict)
prediction = model.predict(emb_array)
nearest,_ = model.kneighbors(emb_array, return_distance=True)
if class_names[int(prediction[0])]in HumanNames:
cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (255, 0, 0), 2) #boxing face
text_x = bb[i][0]
text_y = bb[i][3] + 20
cv2.putText(frame, class_names[int(prediction[0])], (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL,
1, (255, 0, 0), thickness=1, lineType=2)
text_file.write('%d %d %d %d %d %s\n'%(c,bb[i][0],bb[i][1],bb[i][2],bb[i][3],class_names[int(prediction[0])]))
store_frame = True
else:
cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) #boxing face
text_file.write('%d %d %d %d %d %s\n'%(c,bb[i][0],bb[i][1],bb[i][2],bb[i][3],"Unknown"))
else:
print('Unable to align')
if store_frame:
misc.imsave(os.path.join(args.output_label_path, 'frame%d.png' % c),frame)
c+=1
video_capture.release()
cv2.destroyAllWindows()
def parse_args():
"""Parse input arguments."""
parser = argparse.ArgumentParser()
parser.add_argument("--video_path", type=str,
help='Path to the directory containing video.',
default = "video/MGAFE0008218--AS01.mp4" )
parser.add_argument('--model_path', type=str,
help='Path to embedding model',
default="model/20180402-114759.pb")
parser.add_argument('--minsize', type=int,
help='minimum size of face ', default=50)
parser.add_argument('--classifer_path', type=str,
help='Path to KNN classifier',
default="classifier/classifier_1NN_grayscale46891.pkl")
parser.add_argument('--output_label_path', type=str,
help='Path to directory containing output labels and frame',
default="data/output_label")
args = parser.parse_args()
return args
if __name__ == '__main__':
main()
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from scipy import misc
import cv2
import numpy as np
import argparse
import utils.facenet as facenet
import align.detect_face as detect_face
import os
from os.path import join as pjoin
import sys
import time
import copy
import pickle
from sklearn.svm import SVC
from sklearn.externals import joblib
def main():
args = parse_args()
print('Creating networks and loading parameters')
with tf.Graph().as_default():
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
with sess.as_default():
random_key = np.random.randint(0, high=999999)
project_dir = os.path.dirname(os.path.abspath(__file__))
pnet, rnet, onet = detect_face.create_mtcnn(sess, os.path.join(project_dir, "align"))
HumanNames = ['BrigitteBardot','CharlesDeGaulle','ElisabethReine','JeanPaulBelmondo'] #train human name
minsize = args.minsize # minimum size of face
threshold = [0.6, 0.7, 0.7] # three steps's threshold
factor = 0.709 # scale factor
margin = 44
frame_interval = 1
batch_size = 1000
image_size = 182
input_image_size = 160
print('Loading feature extraction model')
modeldir = args.model_path
facenet.load_model(modeldir)
images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
embedding_size = embeddings.get_shape()[1]
classifier_filename = args.classifer_path
bounding_boxes_filename = os.path.join(args.output_label_path, 'bounding_boxes_%05d.txt' % random_key)
classifier_filename_exp = os.path.expanduser(classifier_filename)
with open(classifier_filename_exp, 'rb') as infile:
(model, class_names) = pickle.load(infile)
print('load classifier file-> %s' % classifier_filename_exp)
video_path=args.video_path
video_capture = cv2.VideoCapture(video_path)
c = 0
print('Start Recognition!')
number_face_count=0
while video_capture.isOpened():
ret, frame = video_capture.read()
if not ret:
cv2.destroyAllWindows()
video_capture.release()
print("The end of video")
break
#frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5) #resize frame (optional)
if frame.ndim == 2:
frame = facenet.to_rgb(frame)
frame = frame[:, :, 0:3]
bounding_boxes, _ = detect_face.detect_face(frame, minsize, pnet, rnet, onet, threshold, factor)
nrof_faces = bounding_boxes.shape[0]
print('Detected_FaceNum: %d' % nrof_faces)
if nrof_faces > 0:
det = bounding_boxes[:, 0:4]
img_size = np.asarray(frame.shape)[0:2]
bb = np.zeros((nrof_faces,4), dtype=np.int32)
for i in range(nrof_faces):
number_face_count += 1
emb_array = np.zeros((1, embedding_size))
bb[i][0] = det[i][0]
bb[i][1] = det[i][1]
bb[i][2] = det[i][2]
bb[i][3] = det[i][3]
# inner exception
if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(frame[0]) or bb[i][3] >= len(frame):
print('face is inner of range!')
continue
cropped=frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]
cropped = facenet.flip(cropped, False)
scaled = misc.imresize(cropped, (image_size, image_size), interp='bilinear')
scaled = cv2.resize(scaled, (input_image_size,input_image_size),
interpolation=cv2.INTER_CUBIC)
scaled = facenet.prewhiten(scaled)
scaled_reshape = (scaled.reshape(-1,input_image_size,input_image_size,3))
feed_dict = {images_placeholder: scaled_reshape, phase_train_placeholder: False}
emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict)
prediction = model.predict(emb_array)
nearest,_ = model.kneighbors(emb_array, return_distance=True)
if class_names[prediction[0]]in HumanNames:
cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (255, 0, 0), 2) #boxing face
text_x = bb[i][0]
text_y = bb[i][3] + 20
cv2.putText(frame, class_names[prediction[0]], (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL,
1, (255, 0, 0), thickness=1, lineType=2)
else:
cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) #boxing face
else:
print('Unable to align')
cv2.imshow('Video', frame)
if cv2.waitKey(25) & 0xFF == ord('q'):
break
c+=1
video_capture.release()
cv2.destroyAllWindows()
def parse_args():
"""Parse input arguments."""
parser = argparse.ArgumentParser()
parser.add_argument("--video_path", type=str,
help='Path to the directory containing video.',
default = "video/MGAFE0008218--AS01.mp4" )
parser.add_argument('--model_path', type=str,
help='Path to embedding model',
default="model/20180402-114759.pb")
parser.add_argument('--minsize', type=int,
help='minimum size of face ', default=50)
parser.add_argument('--classifer_path', type=str,
help='Path to KNN classifier',
default="classifier/classifier_1NN_grayscale46891.pkl")
parser.add_argument('--output_label_path', type=str,
help='Path to directory containing output labels and frame',
default="data/output_label")
args = parser.parse_args()
return args
if __name__ == '__main__':
main()
# FaceNet Demo
### 1. Create raw image directory.
Create a directory for your raw images so that images from different people are in different subdirectories.The name of subdirectory is the label name for that person.
```sh
$ tree data/raw
person-1
├── image-1.jpg
├── image-2.png
...
└── image-p.png
...
person-m
├── image-1.png
├── image-2.jpg
...
└── image-q.png
```
### 2. Preprocess the raw images
Align and drop the faces in the raw images that we prepared using MTCNN.
```sh
python3 Make_aligndata.py --input_dir data/raw/ --output_path data/aligned_face/clean_data/ --minsize 96
```
Arguments:
- minsize: Minimum size of face, default 96
- input_dir: Path to the data directory containing images
- output_path: Path to save aligned faces
### 3. Generate Representations and train 1NN classifier
In order to embed the aligned faces and train the classifier:
```sh
python3 Make_classifier.py --input_dir data/aligned_face/clean_data --model_path model/20180402-114759.pb
```
Arguments:
- input_dir: Path to the data directory containing images
- model_path: Path to embedding model
The extracted features will be generated in **"data/embedding"** and classifier will be in **"classifier/classifier_1NN_grayscale{}.pkl"**
### 4.Inference
For prediction, please execute the following command. Labels of detected faces will be displayed in the file **bounding_boxes_{}.txt** located in **output_label_path** , each line has **<frame, coordinate, name>** format. In case of **name** belonging to our **interested classes**, frames will be stored for manual verification. Otherwise, it will be classified as **unknow**
```sh
python3 Make_prediction.py --video_path video/MGAFE0008218--AS01.mp4 --model_path model/20180402-114759.pb --minsize 50 --classifer_path classifier/classifier_1NN_grayscale46891.pkl --output_label_path data/output_label
```
Arguments:
- video_path: Path to the directory containing video.
- model_path: Path to embedding model
- minsize: Minimum size of face, default 50
- classifer_path: Path to KNN classifier
- output_label_path: path to the label and saved frames
### 5. Apply clustering method
Now we apply **SORT** algorithm to track every face and put them into clusters. Clusters will be generated and stored in **"cluster/{video_name}/clusterid"**. After that, the system will try to guess the label for each cluster using **majority rule**. The predicted labels will be printed on the screen.
The command that helps us to do this is:
```sh
python3 Make_cluster.py --video_dir ./video --output_path data/cluster --model_path model/20180402-114759.pb --classifer_path classifier/classifier_1NN_grayscale46891.pkl --minsize 50 --margin 40 --dominant_ratio 0.5
```
Argument:
- minsize: Minimum size of face
- margin: Margin for face tracking