Commit f8c5ff2f authored by Chiara Galdi's avatar Chiara Galdi
Browse files

Upload New File

parent d380c881
import cv2, random
import numpy as np
import pandas as pd
## Extract non overlapping patches from image ##
def image_to_patch(img, size):
# Define the number L of patches on the witdh of the image
L = int(np.floor(img.shape[0]/size))
# Define the number M of patches on the height of the image
M = int(np.floor(img.shape[1]/size))
# Create a numy array to store the patches
patches = np.ones((L*M,size,size,3),dtype=np.uint8)
# Extract the patches and store it in the numpy array
for i in range(L):
for j in range(M):
patches[i*M+j]=img[i*size:(i+1)*size,j*size:(j+1)*size,:]
return patches
## Creation of dataset for single network
def dataset_patches(dataset, ori_path, fin_path, folder, size):
## List to store image, brands, models and devices
image_data = []
models_data = []
devices_data =[]
brands_data = []
nb_img = 0
## Select the loop according to the .csv you created ##
for id_img, model, device, brand in dataset[['img','label_models','label_brands', 'label_devices']].values:
## print the number of images processed
if nb_img%100==0:
print(nb_img)
## Load the image from the path folder (ori_path)
img = cv2.imread(ori_path+id_img)
## Compute HSV from the image
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
## Compute the mean brightness from the image
bright = hsv[:,:,2].mean()
## Extract the patches from the image
patches = image_to_patch(img,size)
# Number of the patch
nb= 0
## Loop for each patch
for k in range(patches.shape[0]):
## Actual patch
tmp = patches[k]
## Compute mean brightness of the actual patch
hsv_tmp = cv2.cvtColor(tmp, cv2.COLOR_BGR2HSV)
hsv_tmp = hsv_tmp[:,:,2].mean()
## If the brigthness of the actual patch is higher the brightness of the image
## And lower than saturation, fixed to 240 then we select it
if ((hsv_tmp > bright*1.2) & (hsv_tmp < 240)):
# Store the path of the patch (folder + name)
image_data.append(folder + id_img[:-4]+"_"+str(nb)+".jpg")
# Store the value of brand, model, device
models_data.append(model)
devices_data.append(device)
brands_data.append(brand)
# Store the patch in the folder
cv2.imwrite(fin_path+folder+ id_img[:-4]+"_"+str(nb)+".jpg", tmp)
# Increase number of patch
nb+=1
# Increase number of image
nb_img +=1
print("Done")
## Create the dataframe and return it
dataset = pd.DataFrame(np.transpose(np.stack([image_data, devices_data, models_data, brands_data])), columns=['img','label_devices','label_models','label_brands'])
return dataset
## Creation of dataset of difficult pairs for two-stream network
def difficult_patch_pairs_device(dataset, path, nb):
## List to store image1, image2 and label (similar or dissimilar)
image_data1 = []
image_data2 = []
label_dev = []
## Extract models from the dataset
model = dataset.label_model.unique()
## Loop on the models
for i in model:
## Create a subset with images only labeled with the actual model
tmp = dataset[dataset.label_model == i]
## Extract devices from this subset
nb_device = tmp.label_device.unique()
## Loop on the devices
for j in nb_device:
## If the number of devices is not superior to 1
if len(nb_device) <= 1:
## Creation of another subset with images only labaled with
## the brand of the actual model
tmp = dataset[dataset.label_brand == tmp.label_brand.values[0]]
## Replicate the subset until reach enough images as wanted
while len(tmp) < nb*2:
tmp = pd.concat([tmp, tmp], ignore_index=True)
## Split the subset in two
res1, res2 = np.split(tmp.sample(nb*2), [nb, ])
## Create the labels (similar dissimilar) according to the device
label = np.multiply(res1.label_device.values == res2.label_device.values, 1, dtype=np.int64)
## Store path of the images 1
image_data1.extend(res1.img.values)
## Store path of the images 2
image_data2.extend(res2.img.values)
## Store the labels
label_dev.extend(label)
## Create the dataframe and return it
final_df = pd.DataFrame(np.transpose(np.stack([image_data1, image_data2])),columns=['img1','img2'])
final_df['label'] = label_dev
## Print the number of labels similar/dissimilar
nbp = len(final_df[final_df.label==1].values)
nbn = len(final_df[final_df.label==0].values)
print("Nbr neg", nbn)
print("Nbr pos", nbp)
## Save the dataframe
final_df.to_csv(path)
print("Saved")
## Creation of dataset of classical pairs for two-stream network
def patch_pairs_device(dataset, path, nb):
## List to store image1, image2 and label (similar or dissimilar)
image_data1 = []
image_data2 = []
label_dev = []
## Extract devices from the dataset
device = dataset.label_device.unique()
## Loop on the devices
for i in device:
nb2 = int(nb/5)
## Create a random subset from the dataset
tmp2 = dataset.sample(nb)
## Create a subset with images only labeled with the actual device
tmp = dataset[dataset.label_device == i]
## Replicate the subset until reach enough images as wanted
while len(tmp) < nb+nb2:
tmp = pd.concat([tmp, tmp], ignore_index=True)
## Select the right number from both subsets
tmp2 = pd.concat([tmp2, tmp.sample(nb2)], ignore_index=True)
tmp = tmp.sample(nb+nb2)
## Create the labels (similar dissimilar) according to the device
label = np.multiply(tmp.label_device.values == tmp2.label_device.values, 1, dtype=np.int64)
## Store path of the images 1
image_data1.extend(tmp.img.values)
## Store path of the images 2
image_data2.extend(tmp2.img.values)
## Store the labels
label_dev.extend(label)
## Create the dataframe and return it
final_df = pd.DataFrame(np.transpose(np.stack([image_data1, image_data2])),columns=['img1','img2'])
final_df['label'] = label_dev
## Print the number of labels similar/dissimilar
nbp = len(final_df[final_df.label==1].values)
nbn = len(final_df[final_df.label==0].values)
print("Nbr neg", nbn)
print("Nbr pos", nbp)
## Save the dataframe
final_df.to_csv(path)
print("Saved")
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment