import os import torch import numpy as np import torch.nn as nn import json from torchvision import models from utils import compute_batch from elasticsearch_utils import ESAPI import faiss datasetdir = '../catalog_images_17k-Processed' chosen_categories = ['sandal', 'flip-flop', 'boot', 'dress', 'shirt', 'shoe', 'backpack', 'trouser', 'underwear', 'handbag'] device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model_modified = models.vgg16(pretrained=True) model_modified.classifier = torch.nn.Sequential(nn.Linear(25088, 4096, bias=True), nn.ReLU(inplace=True), nn.Dropout(), nn.Linear(4096, 100, bias=True)) model_modified.load_state_dict(torch.load('best-word-model.pt')) model_modified.to(device) model_modified.eval() es = ESAPI() #Prepare inputs all = np.arange(len(os.listdir(datasetdir))) no_label_ids = es.get_all_taxonomy_ids('no-label') index = faiss.read_index('../Word-Vectors/word_vectors.index') def get_taxonomy(query): input_batch = compute_batch(query) input_batch = input_batch.unsqueeze(0) input_batch = input_batch.to(device) output = model_modified(input_batch) output = torch.squeeze(output) output = output.cpu().detach().numpy() distances, indexes = index.search(np.array([output]), 1) word = chosen_categories[indexes[0][0]] return word with open('../ES-Data/documents.json', 'r') as file: data = json.load(file) for id in no_label_ids: filename = data[str(id)]['file'] filepath = os.path.join(datasetdir,filename) word = get_taxonomy(filepath) data[str(id)]['taxonomy'] = word with open('../ES-Data/documents-labeled.json', 'w') as file: json.dump(data, file, indent=3)