import numpy as np
import plots as plots
import pickle as pickle
import torch
import gc
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression

# constants
TURNS_PER_CONV = 8
MEASURES_PER_TURN = 10
PROJ_DIR = "/content/drive/My Drive/faculdade/fct-miei/04_ano4_(year4)/semestre1/ri/infos_projeto"

def project(REL_DOCS_PER_TURN, UPDATE_ELASTICSEARCH_RESULTS, elastic, test_bed, topics, relevance_judgments, idx, setName, plots=False):
  # total
  _recall = 0
  _p10 = 0
  _ndcg5 = 0

  # counters
  _ntopics = 0
  _nturns = 0
  _ntotalTurns = 0

  # metrics
  _p10s = np.array([])
  _aps = np.array([])
  _ndcg5s = np.array([])
  _precisions = np.array([])
  _recalls = np.array([])

  # conv and turns numbers and names
  _convNumbers = []
  _convNames = np.array([])

  for topic in topics:
    convID = topic['number']
    if convID not in idx:
      continue
    _convNumbers.append(convID)
    _convNames = np.append(_convNames, str(convID) + " " + topic['title'])

    _turnPrecisions = np.array([])
    _turnRecalls = np.array([])
        
    for turn in topic['turn'][:TURNS_PER_CONV]:
      turnID = turn['number']
      utterance = turn['raw_utterance']
      topicTurnID = '%d_%d'% (convID, turnID)
      
      aux = relevance_judgments.loc[relevance_judgments['topic_turn_id'] == (topicTurnID)]
      num_rel = aux.loc[aux['rel'] != 0]['docid'].count()

      _convNames = np.append(_convNames, topicTurnID + " " + utterance)
      _ntotalTurns += 1
      
      if num_rel == 0:
        _p10s = np.append(_p10s, np.nan)
        _aps = np.append(_aps, np.nan)
        _ndcg5s = np.append(_ndcg5s, np.nan)
        _turnPrecisions = np.append(_turnPrecisions, np.ones(MEASURES_PER_TURN) * np.nan)
        _turnRecalls = np.append(_turnRecalls, np.ones(MEASURES_PER_TURN) * np.nan)
        continue

      if UPDATE_ELASTICSEARCH_RESULTS:
        result = elastic.search_body(query=utterance, numDocs = REL_DOCS_PER_TURN)
        pickle.dump(result, open(PROJ_DIR + "/pkls/" + setName + "/" + str(REL_DOCS_PER_TURN) + "/" + topicTurnID + ".pkl", "wb"))
      else:
        result = pickle.load(open(PROJ_DIR + "/pkls/" + setName + "/" + str(REL_DOCS_PER_TURN) + "/" + topicTurnID + ".pkl", "rb"))

      if np.size(result) == 0 or num_rel == 0:
        _p10s = np.append(_p10s, 0.0)
        _aps = np.append(_aps, 0.0)
        _ndcg5s = np.append(_ndcg5s, 0.0)
        _turnPrecisions = np.append(_turnPrecisions, np.zeros(MEASURES_PER_TURN))
        _turnRecalls = np.append(_turnRecalls, np.zeros(MEASURES_PER_TURN))
        if plots:
          print(topicTurnID, utterance, num_rel, "NO RESULTS")
        continue
      else:
        if plots:
          print(topicTurnID, utterance, num_rel)

      [p10, recall, ap, ndcg5, precisions, recalls] = test_bed.eval(result[['_id','_score']], topicTurnID)

      # calculate conv precisions and recalls
      _turnPrecisions = np.append(_turnPrecisions, precisions)
      _turnRecalls = np.append(_turnRecalls, recalls)

      """plots.plotLines(PROJ_DIR, REL_DOCS_PER_TURN, "precision", [range(11)], [_turnPrecisions], ["LMD"])
      plots.plotLines(PROJ_DIR, REL_DOCS_PER_TURN, "recall", [range(11)], [_turnRecalls], ["LMD"])
      plots.plotLines(PROJ_DIR, REL_DOCS_PER_TURN, "precision-recall", [_turnRecalls], [_turnPrecisions], ["LMD"])"""
      if plots:
        print('P@10=', p10, '  Recall=', recall, ' AP=', ap, '  NDCG@5=',ndcg5)
      # total
      _recall = _recall + recall
      _p10 = _p10 + p10
      _ndcg5 = _ndcg5 + ndcg5

      # metrics
      _p10s = np.append(_p10s, p10)
      _aps = np.append(_aps, ap)
      _ndcg5s = np.append(_ndcg5s, ndcg5)

      # counters
      _nturns = _nturns + 1
    
    while _ntotalTurns % TURNS_PER_CONV != 0:
      _ntotalTurns += 1
      _p10s = np.append(_p10s, np.nan)
      _aps = np.append(_aps, np.nan)
      _ndcg5s = np.append(_ndcg5s, np.nan)
      _turnPrecisions = np.append(_turnPrecisions, np.ones(MEASURES_PER_TURN) * np.nan)
      _turnRecalls = np.append(_turnRecalls, np.ones(MEASURES_PER_TURN) * np.nan)
      _convNames = np.append(_convNames, "NO RESULT")
    
    # compute conv means
    _turnPrecisions = np.reshape(_turnPrecisions, (TURNS_PER_CONV, MEASURES_PER_TURN))
    _turnRecalls = np.reshape(_turnRecalls, (TURNS_PER_CONV, MEASURES_PER_TURN))

    # add precisions and recalls to global matrix
    _precisions = np.append(_precisions, np.nanmean(_turnPrecisions, axis=0))
    _recalls = np.append(_recalls, np.nanmean(_turnRecalls, axis=0))

    # counters
    _ntopics += 1

  # metrics
  _p10s = np.reshape(_p10s, (_ntopics, TURNS_PER_CONV))
  _aps = np.reshape(_aps, (_ntopics, TURNS_PER_CONV))
  _ndcg5s = np.reshape(_ndcg5s, (_ntopics, TURNS_PER_CONV))
  _precisions = np.reshape(_precisions, (_ntopics, MEASURES_PER_TURN))
  _recalls = np.reshape(_recalls, (_ntopics, MEASURES_PER_TURN))

  # convs and turns names
  _convNames = np.reshape(_convNames, (_ntopics, TURNS_PER_CONV + 1))

  # generate plots
  if plots:
    plots.plotMetricAlongConversation(PROJ_DIR, REL_DOCS_PER_TURN, setName, "Average Precision", [_aps], ["LMD"], _convNumbers)
    plots.plotMetricAlongConversation(PROJ_DIR, REL_DOCS_PER_TURN, setName, "normalized Discounted Cumulative Gain", [_ndcg5s], ["LMD"], _convNumbers)
    plots.plotMetricEachConversation(PROJ_DIR, REL_DOCS_PER_TURN, setName, "Average Precision", [_aps], ["LMD"], _convNumbers, _convNames)
    plots.plotMetricEachConversation(PROJ_DIR, REL_DOCS_PER_TURN, setName, "normalized Discounted Cumulative Gain", [_ndcg5s], ["LMD"], _convNumbers, _convNames)
    plots.plotPrecisionRecall(PROJ_DIR, REL_DOCS_PER_TURN, setName, [_recalls], [_precisions], ["LMD"], _convNumbers)

  # total mean
  _p10 = _p10/_nturns
  _recall = _recall/_nturns
  _ndcg5 = _ndcg5/_nturns


  if plots:
    print()
    print('P@10=', _p10, '  Recall=', _recall, '  NDCG@5=', _ndcg5)
  return _aps, _ndcg5s, _recalls, _precisions, _convNumbers, _convNames

def project2Train(REL_DOCS_PER_TURN, UPDATE_BERT_RESULTS, tokenizer, model, device, topics, relevanceJudgments, topicsIDs, setName):
  # variables
  totalDocs = 0
  triplets = np.array([])
  features = np.array([])
  classes = np.array([])

  print("Train")
  print("-- Building triplets")
  for topic in topics:
    convID = topic['number']

    if convID not in topicsIDs:
      continue
    
    for turn in topic['turn'][:TURNS_PER_CONV]:
      turnID = turn['number']
      topicTurnID = '%d_%d'% (convID, turnID)
      
      info = relevanceJudgments.loc[relevanceJudgments['topic_turn_id'] == (topicTurnID)]
      numberRel = info.loc[info['rel'] != 0]['docid'].count()

      if numberRel == 0:
        continue
      
      pickleFile = pickle.load(open(PROJ_DIR + "/pkls/" + setName + "/" + str(REL_DOCS_PER_TURN) + "/" + topicTurnID + ".pkl", "rb"))

      if np.size(pickleFile) == 0:
        continue

      question = turn['raw_utterance']
      for docID in info['docid']:
        docInfo = info.loc[info['docid'] == docID]
        passageList = pickleFile[pickleFile['_id'] == docID]['_source.body'].values
        if len(passageList) > 0:
          passage = passageList[0]
          classes = np.append(classes, (docInfo['rel'].values[0] > 0) * 1)
          triplets = np.append(triplets, np.array([str(question), str(passage)]))
          totalDocs += 1

  triplets = np.reshape(triplets, (totalDocs, 2))
  classes = np.reshape(classes, (totalDocs, ))

  print("-- Getting BERT embeddings")
  if UPDATE_BERT_RESULTS:
    for triplet in triplets:
      bertInput = convert_to_bert_input(sentences=triplet, max_seq_length=512, tokenizer=tokenizer, padding='max_length', truncation=False)
      bertInput['input_ids'] = bertInput['input_ids'].to(device)
      bertInput['attention_mask'] = bertInput['attention_mask'].to(device)
      bertInput['token_type_ids'] = bertInput['token_type_ids'].to(device)
      bertOutput = model(**bertInput)
      featuresLine = bertOutput["last_hidden_state"][0, 0].detach().clone().cpu().numpy()
      features = np.append(features, featuresLine)
    features = np.reshape(features, (totalDocs, 768))
    pickle.dump(features, open(PROJ_DIR + "/bert/" + setName + "/" + str(REL_DOCS_PER_TURN) + "/all.pkl", "wb"))
  else:
    features = pickle.load(open(PROJ_DIR + "/bert/" + setName + "/" + str(REL_DOCS_PER_TURN) + "/all.pkl", "rb"))

  print("-- Training classifier")
  folds = 7
  maxIter = 500
  bestValidError = 1
  bestC = -10
  for C in range(-10, 1):
    trainError = validError = 0
    for train, valid in StratifiedKFold(n_splits=folds).split(classes, classes):
      lr = LogisticRegression(C=10**C, random_state=0, max_iter=maxIter)
      lr.fit(features[train, :], classes[train])
      trainError += 1 - lr.score(features[train, :], classes[train])
      validError += 1 - lr.score(features[valid, :], classes[valid])
    if validError / folds < bestValidError:
      bestValidError = validError / folds
      bestC = C
  classifier = LogisticRegression(random_state=0, max_iter=maxIter, C=10**bestC)
  classifier.fit(features, classes)
  print("-- Training done (C: 10^{} ; valid: {})".format(bestC, bestValidError))
  gc.collect()
  return classifier

def project2Test(REL_DOCS_PER_TURN, UPDATE_BERT_RESULTS, classifier, tokenizer, model, device, topics, test_bed, relevanceJudgments, topicsIDs, setName):
  # counters
  _ntopics = 0
  _nturns = 0
  _ntotalTurns = 0

  # metrics
  _p10s = np.array([])
  _aps = np.array([])
  _ndcg5s = np.array([])
  _precisions = np.array([])
  _recalls = np.array([])
  
  print("Test")
  for topic in topics:
    convID = topic['number']

    if convID not in topicsIDs:
      continue
    
    _turnPrecisions = np.array([])
    _turnRecalls = np.array([])
    for turn in topic['turn'][:TURNS_PER_CONV]:
      # variables
      totalDocs = 0
      triplets = np.array([])
      features = np.array([])
      pred = np.array([])

      turnID = turn['number']
      topicTurnID = '%d_%d'% (convID, turnID)

      info = relevanceJudgments.loc[relevanceJudgments['topic_turn_id'] == (topicTurnID)]
      numberRel = info.loc[info['rel'] != 0]['docid'].count()

      _ntotalTurns += 1
      if numberRel == 0:
        _p10s = np.append(_p10s, np.nan)
        _aps = np.append(_aps, np.nan)
        _ndcg5s = np.append(_ndcg5s, np.nan)
        _turnPrecisions = np.append(_turnPrecisions, np.ones(MEASURES_PER_TURN) * np.nan)
        _turnRecalls = np.append(_turnRecalls, np.ones(MEASURES_PER_TURN) * np.nan)
        continue
      
      pickleFile = pickle.load(open(PROJ_DIR + "/pkls/" + setName + "/" + str(REL_DOCS_PER_TURN) + "/" + topicTurnID + ".pkl", "rb"))

      if np.size(pickleFile) == 0:
        _p10s = np.append(_p10s, 0.0)
        _aps = np.append(_aps, 0.0)
        _ndcg5s = np.append(_ndcg5s, 0.0)
        _turnPrecisions = np.append(_turnPrecisions, np.zeros(MEASURES_PER_TURN))
        _turnRecalls = np.append(_turnRecalls, np.zeros(MEASURES_PER_TURN))
        continue
      
      print("-- {}".format(topicTurnID))
      print("-- -- Building triplets")
      """question = turn['raw_utterance']
      for docID in pickleFile['_id']:
        docInfo = info.loc[info['docid'] == docID]
        passageList = pickleFile[pickleFile['_id'] == docID]['_source.body'].values
        if len(passageList) > 0:
          passage = passageList[0]
          triplets = np.append(triplets, np.array([str(question), str(passage), str(docID)]))
          rel = 0
          if len(docInfo['rel'].values) > 0:
            rel = docInfo['rel'].values[0]
          pred = np.append(pred, rel)
          totalDocs += 1

      triplets = np.reshape(triplets, (totalDocs, 3))
      pred = np.reshape(pred, (totalDocs, ))"""

      print("-- -- Getting BERT embeddings")
      if UPDATE_BERT_RESULTS:
        passages = []
        for docID in pickleFile['_id']:
          docInfo = info.loc[info['docid'] == docID]
          passages.append(pickleFile[pickleFile['_id'] == docID]['_source.body'].values[0])

        for passage in passages:
          bertInput = convert_to_bert_input(sentences=[question, passage], max_seq_length=512, tokenizer=tokenizer, padding='max_length', truncation=False)
          bertInput['input_ids'] = bertInput['input_ids'].to(device)
          bertInput['attention_mask'] = bertInput['attention_mask'].to(device)
          bertInput['token_type_ids'] = bertInput['token_type_ids'].to(device)
          bertOutput = model(**bertInput)
          featuresLine = bertOutput["last_hidden_state"][0, 0].detach().clone().cpu().numpy()
          features = np.append(features, featuresLine)
        features = np.reshape(features, (totalDocs, 768))
        pickle.dump(features, open(PROJ_DIR + "/bert/" + setName + "/" + str(REL_DOCS_PER_TURN) + "/" + topicTurnID + ".pkl", "wb"))
      else:
        features = pickle.load(open(PROJ_DIR + "/bert/" + setName + "/" + str(REL_DOCS_PER_TURN) + "/" + topicTurnID + ".pkl", "rb"))

      print("-- -- Testing classifier")
      #pred = classifier.predict(features)
      prob = classifier.predict_proba(features)[:, 1]
      
      #if len(pred[pred > 0]) > 0:
      print("-- -- Sorting passages")
      newOrder = np.lexsort(np.reshape(prob, (1, prob.shape[0])))[::-1]
      ##triplets = triplets[newOrder, :]
      ##pred = pred[newOrder]
      triplets = np.column_stack((pickleFile['_id'], prob))
      triplets = triplets[newOrder, :]
      triplets = pd.DataFrame(data=triplets, columns=['_id', '_score'])

      print("-- -- Plotting")
      # 0 question ; 1 passage ; 2 docid | predictions | probabilities
      ##[p10, recall, ap, ndcg5, precisions, recalls] = test_bed.eval2(triplets, pred, prob)
      [p10, recall, ap, ndcg5, precisions, recalls] = test_bed.eval(triplets, topicTurnID)

      # calculate conv precisions and recalls
      _turnPrecisions = np.append(_turnPrecisions, precisions)
      _turnRecalls = np.append(_turnRecalls, recalls)

      # metrics
      _p10s = np.append(_p10s, p10)
      _aps = np.append(_aps, ap)
      _ndcg5s = np.append(_ndcg5s, ndcg5)

      # counters
      _nturns += 1
      """else:
        print("-- -- Sorting passages")
        print("-- -- Plotting")
        _p10s = np.append(_p10s, np.nan)
        _aps = np.append(_aps, np.nan)
        _ndcg5s = np.append(_ndcg5s, np.nan)
        _turnPrecisions = np.append(_turnPrecisions, np.ones(MEASURES_PER_TURN) * np.nan)
        _turnRecalls = np.append(_turnRecalls, np.ones(MEASURES_PER_TURN) * np.nan)
      """
    while _ntotalTurns % TURNS_PER_CONV != 0:
      _ntotalTurns += 1
      _p10s = np.append(_p10s, np.nan)
      _aps = np.append(_aps, np.nan)
      _ndcg5s = np.append(_ndcg5s, np.nan)
      _turnPrecisions = np.append(_turnPrecisions, np.ones(MEASURES_PER_TURN) * np.nan)
      _turnRecalls = np.append(_turnRecalls, np.ones(MEASURES_PER_TURN) * np.nan)
    
    # compute conv means
    _turnPrecisions = np.reshape(_turnPrecisions, (TURNS_PER_CONV, MEASURES_PER_TURN))
    _turnRecalls = np.reshape(_turnRecalls, (TURNS_PER_CONV, MEASURES_PER_TURN))

    # add precisions and recalls to global matrix
    _precisions = np.append(_precisions, np.nanmean(_turnPrecisions, axis=0))
    _recalls = np.append(_recalls, np.nanmean(_turnRecalls, axis=0))

    # counters
    _ntopics += 1

    gc.collect()
  
  # metrics
  _p10s = np.reshape(_p10s, (_ntopics, TURNS_PER_CONV))
  _aps = np.reshape(_aps, (_ntopics, TURNS_PER_CONV))
  _ndcg5s = np.reshape(_ndcg5s, (_ntopics, TURNS_PER_CONV))
  _precisions = np.reshape(_precisions, (_ntopics, MEASURES_PER_TURN))
  _recalls = np.reshape(_recalls, (_ntopics, MEASURES_PER_TURN))

  return _aps, _ndcg5s, _recalls, _precisions


def project3(REL_DOCS_PER_TURN, UPDATE_ELASTICSEARCH_RESULTS, rewriter, tokenizer, model, device, nlp, classifier, elastic, testBed, topics, relevanceJudgments, topicsIDs, setName, convNumbers, convNames):
  ## METHOD 1
  method1Metrics = project3Method1(REL_DOCS_PER_TURN, UPDATE_ELASTICSEARCH_RESULTS, rewriter, tokenizer, model, device, nlp, classifier, elastic, testBed, topics, relevanceJudgments, topicsIDs, setName, convNumbers, convNames)

  APs = [method1Metrics["aps"]["lmd"], method1Metrics["aps"]["bert"]]
  nDCGs = [method1Metrics["ndcg5s"]["lmd"], method1Metrics["ndcg5s"]["bert"]]
  Recalls = [method1Metrics["recalls"]["lmd"], method1Metrics["recalls"]["bert"]]
  Precisions = [method1Metrics["precisions"]["lmd"], method1Metrics["precisions"]["bert"]]
  doPlots(REL_DOCS_PER_TURN, setName, "Phase 3 Method 1", APs, nDCGs, Recalls, Precisions, ["LMD", "BERT"], convNumbers, convNames)
  
  ## METHOD 2
  method2Metrics = project3Method2(REL_DOCS_PER_TURN, UPDATE_ELASTICSEARCH_RESULTS, rewriter, tokenizer, model, device, nlp, classifier, elastic, testBed, topics, relevanceJudgments, topicsIDs, setName, convNumbers, convNames)

  APs = [method2Metrics["aps"]["lmd"], method2Metrics["aps"]["bert"]]
  nDCGs = [method2Metrics["ndcg5s"]["lmd"], method2Metrics["ndcg5s"]["bert"]]
  Recalls = [method2Metrics["recalls"]["lmd"], method2Metrics["recalls"]["bert"]]
  Precisions = [method2Metrics["precisions"]["lmd"], method2Metrics["precisions"]["bert"]]
  doPlots(REL_DOCS_PER_TURN, setName, "Phase 3 Method 2", APs, nDCGs, Recalls, Precisions, ["LMD", "BERT"], convNumbers, convNames)
  
  ## METHOD 3
  method3Metrics = project3Method3(REL_DOCS_PER_TURN, UPDATE_ELASTICSEARCH_RESULTS, rewriter, tokenizer, model, device, nlp, classifier, elastic, testBed, topics, relevanceJudgments, topicsIDs, setName, convNumbers, convNames)

  APs = [method3Metrics["aps"]["lmd"], method3Metrics["aps"]["bert"]]
  nDCGs = [method3Metrics["ndcg5s"]["lmd"], method3Metrics["ndcg5s"]["bert"]]
  Recalls = [method3Metrics["recalls"]["lmd"], method3Metrics["recalls"]["bert"]]
  Precisions = [method3Metrics["precisions"]["lmd"], method3Metrics["precisions"]["bert"]]
  doPlots(REL_DOCS_PER_TURN, setName, "Phase 3 Method 3", APs, nDCGs, Recalls, Precisions, ["LMD", "BERT"], convNumbers, convNames)
  
  # combined
  APs = [method1Metrics["aps"]["lmd"], method2Metrics["aps"]["lmd"], method3Metrics["aps"]["lmd"]]
  nDCGs = [method1Metrics["ndcg5s"]["lmd"], method2Metrics["ndcg5s"]["lmd"], method3Metrics["ndcg5s"]["lmd"]]
  Recalls = [method1Metrics["recalls"]["lmd"], method2Metrics["recalls"]["lmd"], method3Metrics["recalls"]["lmd"]]
  Precisions = [method1Metrics["precisions"]["lmd"], method2Metrics["precisions"]["lmd"], method3Metrics["precisions"]["lmd"]]
  doPlots(REL_DOCS_PER_TURN, setName, "Phase 3 LMD", APs, nDCGs, Recalls, Precisions, ["Method 1", "Method 2", "Method 3"], convNumbers, convNames)
  
  APs = [method1Metrics["aps"]["bert"], method2Metrics["aps"]["bert"], method3Metrics["aps"]["bert"]]
  nDCGs = [method1Metrics["ndcg5s"]["bert"], method2Metrics["ndcg5s"]["bert"], method3Metrics["ndcg5s"]["bert"]]
  Recalls = [method1Metrics["recalls"]["bert"], method2Metrics["recalls"]["bert"], method3Metrics["recalls"]["bert"]]
  Precisions = [method1Metrics["precisions"]["bert"], method2Metrics["precisions"]["bert"], method3Metrics["precisions"]["bert"]]
  doPlots(REL_DOCS_PER_TURN, setName, "Phase 3 BERT", APs, nDCGs, Recalls, Precisions, ["Method 1", "Method 2", "Method 3"], convNumbers, convNames)
  

def project3Method1(REL_DOCS_PER_TURN, UPDATE_ELASTICSEARCH_RESULTS, rewriter, tokenizer, model, device, nlp, classifier, elastic, testBed, topics, relevanceJudgments, topicsIDs, setName, convNumbers, convNames):
  ## METHOD 1
  print("Method 1")
  # counters
  _LMDntopics = 0
  _LMDnturns = 0
  _LMDntotalTurns = 0
  
  _BERTntopics = 0
  _BERTnturns = 0
  _BERTntotalTurns = 0

  # metrics
  _LMDp10s = np.array([])
  _LMDaps = np.array([])
  _LMDndcg5s = np.array([])
  _LMDprecisions = np.array([])
  _LMDrecalls = np.array([])

  _BERTp10s = np.array([])
  _BERTaps = np.array([])
  _BERTndcg5s = np.array([])
  _BERTprecisions = np.array([])
  _BERTrecalls = np.array([])

  for topic in topics:
    convID = topic['number']

    if convID not in topicsIDs:
      continue
    
    _LMDturnPrecisions = np.array([])
    _LMDturnRecalls = np.array([])

    _BERTturnPrecisions = np.array([])
    _BERTturnRecalls = np.array([])

    print("-- {}".format(convID))
    
    firstUtterance = ""
    for turn in topic['turn'][:TURNS_PER_CONV]:
      turnID = turn['number']
      topicTurnID = '%d_%d'% (convID, turnID)

      info = relevanceJudgments.loc[relevanceJudgments['topic_turn_id'] == (topicTurnID)]
      numberRel = info.loc[info['rel'] != 0]['docid'].count()

      _LMDntotalTurns += 1
      if numberRel == 0:
        _LMDp10s = np.append(_LMDp10s, np.nan)
        _LMDaps = np.append(_LMDaps, np.nan)
        _LMDndcg5s = np.append(_LMDndcg5s, np.nan)
        _LMDturnPrecisions = np.append(_LMDturnPrecisions, np.ones(MEASURES_PER_TURN) * np.nan)
        _LMDturnRecalls = np.append(_LMDturnRecalls, np.ones(MEASURES_PER_TURN) * np.nan)
        
        _BERTp10s = np.append(_BERTp10s, np.nan)
        _BERTaps = np.append(_BERTaps, np.nan)
        _BERTndcg5s = np.append(_BERTndcg5s, np.nan)
        _BERTturnPrecisions = np.append(_BERTturnPrecisions, np.ones(MEASURES_PER_TURN) * np.nan)
        _BERTturnRecalls = np.append(_BERTturnRecalls, np.ones(MEASURES_PER_TURN) * np.nan)
        continue

      utterance = turn['raw_utterance']
      if turnID == 1:
        firstUtterance = utterance
      else:
        utterance += " " + firstUtterance
      print("-- -- {}: {}".format(turnID, utterance))

      # LMD
      [p10, recall, ap, ndcg5, precisions, recalls] = getMetricsNormal(MEASURES_PER_TURN, classifier, tokenizer, model, device, elastic, testBed, utterance, utterance, REL_DOCS_PER_TURN, topicTurnID, relevanceJudgments)
      
      _LMDp10s = np.append(_LMDp10s, p10)
      _LMDaps = np.append(_LMDaps, ap)
      _LMDndcg5s = np.append(_LMDndcg5s, ndcg5)
      _LMDturnPrecisions = np.append(_LMDturnPrecisions, precisions)
      _LMDturnRecalls = np.append(_LMDturnRecalls, recalls)
      _LMDnturns += 1

      # BERT
      [p10, recall, ap, ndcg5, precisions, recalls] = getMetricsNormal(MEASURES_PER_TURN, classifier, tokenizer, model, device, elastic, testBed, utterance, utterance, REL_DOCS_PER_TURN, topicTurnID, relevanceJudgments, True)

      _BERTp10s = np.append(_BERTp10s, p10)
      _BERTaps = np.append(_BERTaps, ap)
      _BERTndcg5s = np.append(_BERTndcg5s, ndcg5)
      _BERTturnPrecisions = np.append(_BERTturnPrecisions, precisions)
      _BERTturnRecalls = np.append(_BERTturnRecalls, recalls)
      _BERTnturns += 1
    
    while _LMDntotalTurns % TURNS_PER_CONV != 0:
      _LMDntotalTurns += 1
      _LMDp10s = np.append(_LMDp10s, np.nan)
      _LMDaps = np.append(_LMDaps, np.nan)
      _LMDndcg5s = np.append(_LMDndcg5s, np.nan)
      _LMDturnPrecisions = np.append(_LMDturnPrecisions, np.ones(MEASURES_PER_TURN) * np.nan)
      _LMDturnRecalls = np.append(_LMDturnRecalls, np.ones(MEASURES_PER_TURN) * np.nan)

      _BERTntotalTurns += 1
      _BERTp10s = np.append(_BERTp10s, np.nan)
      _BERTaps = np.append(_BERTaps, np.nan)
      _BERTndcg5s = np.append(_BERTndcg5s, np.nan)
      _BERTturnPrecisions = np.append(_BERTturnPrecisions, np.ones(MEASURES_PER_TURN) * np.nan)
      _BERTturnRecalls = np.append(_BERTturnRecalls, np.ones(MEASURES_PER_TURN) * np.nan)
    
    # compute conv means
    _LMDturnPrecisions = np.reshape(_LMDturnPrecisions, (TURNS_PER_CONV, MEASURES_PER_TURN))
    _LMDturnRecalls = np.reshape(_LMDturnRecalls, (TURNS_PER_CONV, MEASURES_PER_TURN))

    _BERTturnPrecisions = np.reshape(_BERTturnPrecisions, (TURNS_PER_CONV, MEASURES_PER_TURN))
    _BERTturnRecalls = np.reshape(_BERTturnRecalls, (TURNS_PER_CONV, MEASURES_PER_TURN))

    # add precisions and recalls to global matrix
    _LMDprecisions = np.append(_LMDprecisions, np.nanmean(_LMDturnPrecisions, axis=0))
    _LMDrecalls = np.append(_LMDrecalls, np.nanmean(_LMDturnRecalls, axis=0))

    _BERTprecisions = np.append(_BERTprecisions, np.nanmean(_BERTturnPrecisions, axis=0))
    _BERTrecalls = np.append(_BERTrecalls, np.nanmean(_BERTturnRecalls, axis=0))

    # counters
    _LMDntopics += 1

    _BERTntopics += 1
  
  # reshape
  _LMDp10s = np.reshape(_LMDp10s, (_LMDntopics, TURNS_PER_CONV))
  _LMDaps = np.reshape(_LMDaps, (_LMDntopics, TURNS_PER_CONV))
  _LMDndcg5s = np.reshape(_LMDndcg5s, (_LMDntopics, TURNS_PER_CONV))
  _LMDprecisions = np.reshape(_LMDprecisions, (_LMDntopics, MEASURES_PER_TURN))
  _LMDrecalls = np.reshape(_LMDrecalls, (_LMDntopics, MEASURES_PER_TURN))
  
  _BERTp10s = np.reshape(_BERTp10s, (_BERTntopics, TURNS_PER_CONV))
  _BERTaps = np.reshape(_BERTaps, (_BERTntopics, TURNS_PER_CONV))
  _BERTndcg5s = np.reshape(_BERTndcg5s, (_BERTntopics, TURNS_PER_CONV))
  _BERTprecisions = np.reshape(_BERTprecisions, (_BERTntopics, MEASURES_PER_TURN))
  _BERTrecalls = np.reshape(_BERTrecalls, (_BERTntopics, MEASURES_PER_TURN))

  method1Metrics = {
    "aps": {
      "lmd": _LMDaps,
      "bert": _BERTaps
    },
    "ndcg5s": {
      "lmd": _LMDndcg5s,
      "bert": _BERTndcg5s
    },
    "recalls": {
      "lmd": _LMDrecalls,
      "bert": _BERTrecalls
    },
    "precisions": {
      "lmd": _LMDprecisions,
      "bert": _BERTprecisions
    }
  }
  return method1Metrics

def project3Method2(REL_DOCS_PER_TURN, UPDATE_ELASTICSEARCH_RESULTS, rewriter, tokenizer, model, device, nlp, classifier, elastic, testBed, topics, relevanceJudgments, topicsIDs, setName, convNumbers, convNames):
  ## METHOD 2
  print("Method 2")
  # counters
  _LMDntopics = 0
  _LMDnturns = 0
  _LMDntotalTurns = 0
  
  _BERTntopics = 0
  _BERTnturns = 0
  _BERTntotalTurns = 0

  # metrics
  _LMDp10s = np.array([])
  _LMDaps = np.array([])
  _LMDndcg5s = np.array([])
  _LMDprecisions = np.array([])
  _LMDrecalls = np.array([])

  _BERTp10s = np.array([])
  _BERTaps = np.array([])
  _BERTndcg5s = np.array([])
  _BERTprecisions = np.array([])
  _BERTrecalls = np.array([])

  for topic in topics:
    convID = topic['number']

    if convID not in topicsIDs:
      continue
    
    _LMDturnPrecisions = np.array([])
    _LMDturnRecalls = np.array([])

    _BERTturnPrecisions = np.array([])
    _BERTturnRecalls = np.array([])

    print("-- {}".format(convID))
    
    firstEntities = []
    for turn in topic['turn'][:TURNS_PER_CONV]:
      turnID = turn['number']
      topicTurnID = '%d_%d'% (convID, turnID)

      info = relevanceJudgments.loc[relevanceJudgments['topic_turn_id'] == (topicTurnID)]
      numberRel = info.loc[info['rel'] != 0]['docid'].count()

      _LMDntotalTurns += 1
      if numberRel == 0:
        _LMDp10s = np.append(_LMDp10s, np.nan)
        _LMDaps = np.append(_LMDaps, np.nan)
        _LMDndcg5s = np.append(_LMDndcg5s, np.nan)
        _LMDturnPrecisions = np.append(_LMDturnPrecisions, np.ones(MEASURES_PER_TURN) * np.nan)
        _LMDturnRecalls = np.append(_LMDturnRecalls, np.ones(MEASURES_PER_TURN) * np.nan)
        
        _BERTp10s = np.append(_BERTp10s, np.nan)
        _BERTaps = np.append(_BERTaps, np.nan)
        _BERTndcg5s = np.append(_BERTndcg5s, np.nan)
        _BERTturnPrecisions = np.append(_BERTturnPrecisions, np.ones(MEASURES_PER_TURN) * np.nan)
        _BERTturnRecalls = np.append(_BERTturnRecalls, np.ones(MEASURES_PER_TURN) * np.nan)
        continue

      utterance = turn['raw_utterance']
      if turnID == 1:
        """for entity in nlp(utterance).ents:
          firstEntities += entity.text + " "
        utterance += " " + firstEntities"""
        firstEntities = [str(ent) for ent in nlp(utterance).ents]
      print("-- -- {}: {} ; {}".format(turnID, utterance, firstEntities))

      # LMD
      [p10, recall, ap, ndcg5, precisions, recalls] = getMetricsEntities(MEASURES_PER_TURN, classifier, tokenizer, model, device, elastic, testBed, utterance, utterance, firstEntities, REL_DOCS_PER_TURN, topicTurnID, relevanceJudgments)
      
      _LMDp10s = np.append(_LMDp10s, p10)
      _LMDaps = np.append(_LMDaps, ap)
      _LMDndcg5s = np.append(_LMDndcg5s, ndcg5)
      _LMDturnPrecisions = np.append(_LMDturnPrecisions, precisions)
      _LMDturnRecalls = np.append(_LMDturnRecalls, recalls)
      _LMDnturns += 1

      # BERT
      [p10, recall, ap, ndcg5, precisions, recalls] = getMetricsEntities(MEASURES_PER_TURN, classifier, tokenizer, model, device, elastic, testBed, utterance, utterance, firstEntities, REL_DOCS_PER_TURN, topicTurnID, relevanceJudgments, True)

      _BERTp10s = np.append(_BERTp10s, p10)
      _BERTaps = np.append(_BERTaps, ap)
      _BERTndcg5s = np.append(_BERTndcg5s, ndcg5)
      _BERTturnPrecisions = np.append(_BERTturnPrecisions, precisions)
      _BERTturnRecalls = np.append(_BERTturnRecalls, recalls)
      _BERTnturns += 1
    
    while _LMDntotalTurns % TURNS_PER_CONV != 0:
      _LMDntotalTurns += 1
      _LMDp10s = np.append(_LMDp10s, np.nan)
      _LMDaps = np.append(_LMDaps, np.nan)
      _LMDndcg5s = np.append(_LMDndcg5s, np.nan)
      _LMDturnPrecisions = np.append(_LMDturnPrecisions, np.ones(MEASURES_PER_TURN) * np.nan)
      _LMDturnRecalls = np.append(_LMDturnRecalls, np.ones(MEASURES_PER_TURN) * np.nan)

      _BERTntotalTurns += 1
      _BERTp10s = np.append(_BERTp10s, np.nan)
      _BERTaps = np.append(_BERTaps, np.nan)
      _BERTndcg5s = np.append(_BERTndcg5s, np.nan)
      _BERTturnPrecisions = np.append(_BERTturnPrecisions, np.ones(MEASURES_PER_TURN) * np.nan)
      _BERTturnRecalls = np.append(_BERTturnRecalls, np.ones(MEASURES_PER_TURN) * np.nan)
    
    # compute conv means
    _LMDturnPrecisions = np.reshape(_LMDturnPrecisions, (TURNS_PER_CONV, MEASURES_PER_TURN))
    _LMDturnRecalls = np.reshape(_LMDturnRecalls, (TURNS_PER_CONV, MEASURES_PER_TURN))

    _BERTturnPrecisions = np.reshape(_BERTturnPrecisions, (TURNS_PER_CONV, MEASURES_PER_TURN))
    _BERTturnRecalls = np.reshape(_BERTturnRecalls, (TURNS_PER_CONV, MEASURES_PER_TURN))

    # add precisions and recalls to global matrix
    _LMDprecisions = np.append(_LMDprecisions, np.nanmean(_LMDturnPrecisions, axis=0))
    _LMDrecalls = np.append(_LMDrecalls, np.nanmean(_LMDturnRecalls, axis=0))

    _BERTprecisions = np.append(_BERTprecisions, np.nanmean(_BERTturnPrecisions, axis=0))
    _BERTrecalls = np.append(_BERTrecalls, np.nanmean(_BERTturnRecalls, axis=0))

    # counters
    _LMDntopics += 1

    _BERTntopics += 1
  
  # reshape
  _LMDp10s = np.reshape(_LMDp10s, (_LMDntopics, TURNS_PER_CONV))
  _LMDaps = np.reshape(_LMDaps, (_LMDntopics, TURNS_PER_CONV))
  _LMDndcg5s = np.reshape(_LMDndcg5s, (_LMDntopics, TURNS_PER_CONV))
  _LMDprecisions = np.reshape(_LMDprecisions, (_LMDntopics, MEASURES_PER_TURN))
  _LMDrecalls = np.reshape(_LMDrecalls, (_LMDntopics, MEASURES_PER_TURN))
  
  _BERTp10s = np.reshape(_BERTp10s, (_BERTntopics, TURNS_PER_CONV))
  _BERTaps = np.reshape(_BERTaps, (_BERTntopics, TURNS_PER_CONV))
  _BERTndcg5s = np.reshape(_BERTndcg5s, (_BERTntopics, TURNS_PER_CONV))
  _BERTprecisions = np.reshape(_BERTprecisions, (_BERTntopics, MEASURES_PER_TURN))
  _BERTrecalls = np.reshape(_BERTrecalls, (_BERTntopics, MEASURES_PER_TURN))

  method2Metrics = {
    "aps": {
      "lmd": _LMDaps,
      "bert": _BERTaps
    },
    "ndcg5s": {
      "lmd": _LMDndcg5s,
      "bert": _BERTndcg5s
    },
    "recalls": {
      "lmd": _LMDrecalls,
      "bert": _BERTrecalls
    },
    "precisions": {
      "lmd": _LMDprecisions,
      "bert": _BERTprecisions
    }
  }
  return method2Metrics

def project3Method3(REL_DOCS_PER_TURN, UPDATE_ELASTICSEARCH_RESULTS, rewriter, tokenizer, model, device, nlp, classifier, elastic, testBed, topics, relevanceJudgments, topicsIDs, setName, convNumbers, convNames):
  ## METHOD 3
  print("Method 3")
  # counters
  _LMDntopics = 0
  _LMDnturns = 0
  _LMDntotalTurns = 0
  
  _BERTntopics = 0
  _BERTnturns = 0
  _BERTntotalTurns = 0

  # metrics
  _LMDp10s = np.array([])
  _LMDaps = np.array([])
  _LMDndcg5s = np.array([])
  _LMDprecisions = np.array([])
  _LMDrecalls = np.array([])

  _BERTp10s = np.array([])
  _BERTaps = np.array([])
  _BERTndcg5s = np.array([])
  _BERTprecisions = np.array([])
  _BERTrecalls = np.array([])

  for topic in topics:
    convID = topic['number']

    if convID not in topicsIDs:
      continue
    
    _LMDturnPrecisions = np.array([])
    _LMDturnRecalls = np.array([])

    _BERTturnPrecisions = np.array([])
    _BERTturnRecalls = np.array([])

    print("-- {}".format(convID))
    
    convUterrances = []
    for turn in topic['turn'][:TURNS_PER_CONV]:
      turnID = turn['number']
      topicTurnID = '%d_%d'% (convID, turnID)

      info = relevanceJudgments.loc[relevanceJudgments['topic_turn_id'] == (topicTurnID)]
      numberRel = info.loc[info['rel'] != 0]['docid'].count()

      _LMDntotalTurns += 1
      if numberRel == 0:
        _LMDp10s = np.append(_LMDp10s, np.nan)
        _LMDaps = np.append(_LMDaps, np.nan)
        _LMDndcg5s = np.append(_LMDndcg5s, np.nan)
        _LMDturnPrecisions = np.append(_LMDturnPrecisions, np.ones(MEASURES_PER_TURN) * np.nan)
        _LMDturnRecalls = np.append(_LMDturnRecalls, np.ones(MEASURES_PER_TURN) * np.nan)
        
        _BERTp10s = np.append(_BERTp10s, np.nan)
        _BERTaps = np.append(_BERTaps, np.nan)
        _BERTndcg5s = np.append(_BERTndcg5s, np.nan)
        _BERTturnPrecisions = np.append(_BERTturnPrecisions, np.ones(MEASURES_PER_TURN) * np.nan)
        _BERTturnRecalls = np.append(_BERTturnRecalls, np.ones(MEASURES_PER_TURN) * np.nan)
        continue

      utterance = rewriter.rewrite_query_with_T5(turn['raw_utterance'], convUterrances[:len(convUterrances)])
      convUterrances.append(turn['raw_utterance'])
      print("-- -- {}: {}".format(turnID, utterance))

      # LMD
      [p10, recall, ap, ndcg5, precisions, recalls] = getMetricsNormal(MEASURES_PER_TURN, classifier, tokenizer, model, device, elastic, testBed, utterance, utterance, REL_DOCS_PER_TURN, topicTurnID, relevanceJudgments)
      
      _LMDp10s = np.append(_LMDp10s, p10)
      _LMDaps = np.append(_LMDaps, ap)
      _LMDndcg5s = np.append(_LMDndcg5s, ndcg5)
      _LMDturnPrecisions = np.append(_LMDturnPrecisions, precisions)
      _LMDturnRecalls = np.append(_LMDturnRecalls, recalls)
      _LMDnturns += 1

      # BERT
      [p10, recall, ap, ndcg5, precisions, recalls] = getMetricsNormal(MEASURES_PER_TURN, classifier, tokenizer, model, device, elastic, testBed, utterance, utterance, REL_DOCS_PER_TURN, topicTurnID, relevanceJudgments, True)

      _BERTp10s = np.append(_BERTp10s, p10)
      _BERTaps = np.append(_BERTaps, ap)
      _BERTndcg5s = np.append(_BERTndcg5s, ndcg5)
      _BERTturnPrecisions = np.append(_BERTturnPrecisions, precisions)
      _BERTturnRecalls = np.append(_BERTturnRecalls, recalls)
      _BERTnturns += 1
    
    while _LMDntotalTurns % TURNS_PER_CONV != 0:
      _LMDntotalTurns += 1
      _LMDp10s = np.append(_LMDp10s, np.nan)
      _LMDaps = np.append(_LMDaps, np.nan)
      _LMDndcg5s = np.append(_LMDndcg5s, np.nan)
      _LMDturnPrecisions = np.append(_LMDturnPrecisions, np.ones(MEASURES_PER_TURN) * np.nan)
      _LMDturnRecalls = np.append(_LMDturnRecalls, np.ones(MEASURES_PER_TURN) * np.nan)

      _BERTntotalTurns += 1
      _BERTp10s = np.append(_BERTp10s, np.nan)
      _BERTaps = np.append(_BERTaps, np.nan)
      _BERTndcg5s = np.append(_BERTndcg5s, np.nan)
      _BERTturnPrecisions = np.append(_BERTturnPrecisions, np.ones(MEASURES_PER_TURN) * np.nan)
      _BERTturnRecalls = np.append(_BERTturnRecalls, np.ones(MEASURES_PER_TURN) * np.nan)
    
    # compute conv means
    _LMDturnPrecisions = np.reshape(_LMDturnPrecisions, (TURNS_PER_CONV, MEASURES_PER_TURN))
    _LMDturnRecalls = np.reshape(_LMDturnRecalls, (TURNS_PER_CONV, MEASURES_PER_TURN))

    _BERTturnPrecisions = np.reshape(_BERTturnPrecisions, (TURNS_PER_CONV, MEASURES_PER_TURN))
    _BERTturnRecalls = np.reshape(_BERTturnRecalls, (TURNS_PER_CONV, MEASURES_PER_TURN))

    # add precisions and recalls to global matrix
    _LMDprecisions = np.append(_LMDprecisions, np.nanmean(_LMDturnPrecisions, axis=0))
    _LMDrecalls = np.append(_LMDrecalls, np.nanmean(_LMDturnRecalls, axis=0))

    _BERTprecisions = np.append(_BERTprecisions, np.nanmean(_BERTturnPrecisions, axis=0))
    _BERTrecalls = np.append(_BERTrecalls, np.nanmean(_BERTturnRecalls, axis=0))

    # counters
    _LMDntopics += 1

    _BERTntopics += 1
  
  # reshape
  _LMDp10s = np.reshape(_LMDp10s, (_LMDntopics, TURNS_PER_CONV))
  _LMDaps = np.reshape(_LMDaps, (_LMDntopics, TURNS_PER_CONV))
  _LMDndcg5s = np.reshape(_LMDndcg5s, (_LMDntopics, TURNS_PER_CONV))
  _LMDprecisions = np.reshape(_LMDprecisions, (_LMDntopics, MEASURES_PER_TURN))
  _LMDrecalls = np.reshape(_LMDrecalls, (_LMDntopics, MEASURES_PER_TURN))
  
  _BERTp10s = np.reshape(_BERTp10s, (_BERTntopics, TURNS_PER_CONV))
  _BERTaps = np.reshape(_BERTaps, (_BERTntopics, TURNS_PER_CONV))
  _BERTndcg5s = np.reshape(_BERTndcg5s, (_BERTntopics, TURNS_PER_CONV))
  _BERTprecisions = np.reshape(_BERTprecisions, (_BERTntopics, MEASURES_PER_TURN))
  _BERTrecalls = np.reshape(_BERTrecalls, (_BERTntopics, MEASURES_PER_TURN))

  method3Metrics = {
    "aps": {
      "lmd": _LMDaps,
      "bert": _BERTaps
    },
    "ndcg5s": {
      "lmd": _LMDndcg5s,
      "bert": _BERTndcg5s
    },
    "recalls": {
      "lmd": _LMDrecalls,
      "bert": _BERTrecalls
    },
    "precisions": {
      "lmd": _LMDprecisions,
      "bert": _BERTprecisions
    }
  }
  return method3Metrics


def getMetrics(testBed, result, topicTurnID):
  if np.size(result) == 0:
    return [0, 0, 0, 0, np.zeros(MEASURES_PER_TURN), np.zeros(MEASURES_PER_TURN)]
  [p10, recall, ap, ndcg5, precisions, recalls] = testBed.eval(result[['_id','_score']], topicTurnID)
  return [p10, recall, ap, ndcg5, precisions, recalls]

def getMetricsNormal(MEASURES_PER_TURN, classifier, tokenizer, model, device, elastic, testBed, question, utterance, numDocs, topicTurnID, relevanceJudgments, runBERT=False):
  result = elastic.search_body(query=utterance, numDocs=numDocs)
  if runBERT:
    result = getBERTResult(classifier, tokenizer, model, device, question, result, topicTurnID, relevanceJudgments)
  return getMetrics(testBed, result, topicTurnID)

def getMetricsEntities(MEASURES_PER_TURN, classifier, tokenizer, model, device, elastic, testBed, question, utterance, entities, numDocs, topicTurnID, relevanceJudgments, runBERT=False):
  if len(entities) == 0:
    return getMetricsNormal(MEASURES_PER_TURN, classifier, tokenizer, model, device, elastic, testBed, question, utterance, numDocs, topicTurnID, relevanceJudgments, runBERT)
  result = elastic.search_with_boosted_entities(utterance, entities, np.ones(len(entities)), numDocs)
  if runBERT:
    result = getBERTResult(classifier, tokenizer, model, device, question, result, topicTurnID, relevanceJudgments)
  return getMetrics(testBed, result, topicTurnID)

def getBERTResult(classifier, tokenizer, model, device, question, pickleFile, topicTurnID, relevanceJudgments):
  info = relevanceJudgments.loc[relevanceJudgments['topic_turn_id'] == (topicTurnID)]
  passages = []
  for docID in pickleFile['_id']:
    docInfo = info.loc[info['docid'] == docID]
    passages.append(pickleFile[pickleFile['_id'] == docID]['_source.body'].values[0])

  features = np.array([])
  for passage in passages:
    bertInput = convert_to_bert_input(sentences=[question, passage], max_seq_length=512, tokenizer=tokenizer, padding='max_length', truncation=False)
    bertInput['input_ids'] = bertInput['input_ids'].to(device)
    bertInput['attention_mask'] = bertInput['attention_mask'].to(device)
    bertInput['token_type_ids'] = bertInput['token_type_ids'].to(device)
    bertOutput = model(**bertInput)
    featuresLine = bertOutput["last_hidden_state"][0, 0].detach().clone().cpu().numpy()
    features = np.append(features, featuresLine)
  features = np.reshape(features, (len(passages), 768))
  
  prob = classifier.predict_proba(features)[:, 1]
  newOrder = np.lexsort(np.reshape(prob, (1, prob.shape[0])))[::-1]
  result = np.column_stack((pickleFile['_id'], prob))
  result = result[newOrder, :]
  result = pd.DataFrame(data=result, columns=['_id', '_score'])

  gc.collect()
  return result


def doPlots(REL_DOCS_PER_TURN, SET_NAME, preName, APs, nDCGs, Recalls, Precisions, methods, convNumbers, convNames):
  plots.plotMetricAlongConversation(PROJ_DIR, REL_DOCS_PER_TURN, SET_NAME, "Average Precision", APs, methods, convNumbers, preName + " - ")
  plots.plotMetricAlongConversation(PROJ_DIR, REL_DOCS_PER_TURN, SET_NAME, "normalized Discounted Cumulative Gain", nDCGs, methods, convNumbers, preName + " - ")
  plots.plotMetricEachConversation(PROJ_DIR, REL_DOCS_PER_TURN, SET_NAME, preName + " - Average Precision", APs, methods, convNumbers, convNames)
  plots.plotMetricEachConversation(PROJ_DIR, REL_DOCS_PER_TURN, SET_NAME, preName + " - normalized Discounted Cumulative Gain", nDCGs, methods, convNumbers, convNames)
  plots.plotPrecisionRecall(PROJ_DIR, REL_DOCS_PER_TURN, SET_NAME, Recalls, Precisions, methods, convNumbers, preName + " - ")


"""question = turn['raw_utterance']
for docID in info['docid']:
  docInfo = info.loc[info['docid'] == docID]
  passageList = pickleFile[pickleFile['_id'] == docID]['_source.body'].values
  if len(passageList) > 0:
    passage = passageList[0]
    rel = (docInfo['rel'].values[0] > 0) * 1
    triplets = np.append(triplets, np.array([str(question), str(passage), rel]))
    totalTurns += 1

triplets = np.reshape(triplets, (totalTurns, 3))

print("Getting BERT embeddings")
for triplet in triplets:
  bertInput = convert_to_bert_input(sentences=triplet[:-1], max_seq_length=512, tokenizer=tokenizer, padding='max_length', truncation=False)
  bertInput['input_ids'] = bertInput['input_ids'].to(device)
  bertInput['attention_mask'] = bertInput['attention_mask'].to(device)
  bertInput['token_type_ids'] = bertInput['token_type_ids'].to(device)
  bertOutput = model(**bertInput)
  featuresLine = bertOutput["last_hidden_state"][0, 0].detach().clone().cpu().numpy()
  features = np.append(features, featuresLine)
features = np.reshape(features, (totalTurns, 768))

print("Training classifier")
classifier.fit(features, triplets[:, 2])
print("Training done")"""
  

"""for docID in info['docid']:
  if docID[:3] != "CAR":
    print(docID)
print(elastic.get_doc_body('MARCO_955948'))
print(info['docid'])
print()"""
#passages = pickleFile['_source']['body']
#relevances = info['rel']
#print("passages:\n{}\nrel\n{}\n\n".format(topicTurnID, passage, relevance))


def convert_to_bert_input(sentences=None, max_seq_length=512, tokenizer=None, add_cls=True, padding='do_not_pad', truncation=False):
  """Receive a list of [query_text, doc_text] in variable sentences
      Returns a dictionary of tensors
      It can only receive a single pair [query_text, doc_text] each time
  """
  return tokenizer.encode_plus(sentences, add_special_tokens=add_cls, padding=padding, max_length=max_seq_length, truncation=truncation,
                               return_tensors='pt', return_token_type_ids=True, return_attention_mask=True)