import os import torch import faiss import numpy as np from utils import compute_batch, get_data import json def save_features(layers, images, datasetdir): step = 500 for i in range(0, len(images), step): print(f"Saving vectors: {i} / {len(images)}") image_set = images[i:i+step] all_features = [[] for x in range(len(layers))] for image in image_set: batch = compute_batch(image) data = get_data(batch.unsqueeze(0)) for j, layer in enumerate(layers): s = 9 if layer == 30: s = 5 avg = torch.nn.AvgPool3d((1, s, s), stride=(1, s, s)) flat = torch.nn.Flatten() features = flat(avg(data[layer])) features = features.cpu().numpy() all_features[j].append(features[0]) for j, features in enumerate(all_features): features = np.array(features) name = f"faiss-layer{layers[j]}.index" if i == 0: index = faiss.IndexFlatL2(features.shape[1]) faiss.write_index(index, name) index = faiss.read_index(name) index.add(features) faiss.write_index(index, name) if __name__ == '__main__': datasetdir = '../catalog_images_17k-Processed' layers = [1, 3, 4, 6, 8, 9, 11, 13, 15, 16, 18, 20, 22, 23, 25, 27, 29, 30] ''' with open('../ES-Data/documents-labeled.json') as file: data = json.load(file) images = [] for d in data: filename = os.path.join(datasetdir, data[d]['file']) images.append(filename) save_features(layers, images, datasetdir) ''' for layer in layers: name = f"faiss-layer{layer}.index" index = faiss.read_index(name) print(index.ntotal)