#!/usr/bin/env python # usage: ./evaluation_ate_absita.py RESULT_FILE GOLD_FILE import subprocess subprocess.run(["pip3", "install","--user", 'ndjson']) subprocess.run(["pip3", "install","--user", 'pandas']) subprocess.run(["pip3", "install","--user", 'numpy']) import ndjson import pandas as pd import numpy as np import sys dataframe_predictions = pd.DataFrame() dataframe_gold = pd.DataFrame() with open(sys.argv[1]) as f: reader = ndjson.reader(f) for post in reader: df = pd.DataFrame([post], columns=post.keys()) dataframe_predictions = pd.concat([dataframe_predictions, df], axis=0,ignore_index=True) try: p_aspects = dataframe_predictions['aspects'] except: p_aspects = [] try: p_aspects_position = dataframe_predictions['aspects_position'] except: p_aspects_position = [] try: p_aspects_polarity = dataframe_predictions['polarities'] except: p_aspects_polarity = [] try: p_scores = dataframe_predictions['score'] except: p_scores = [] try: p_ids = dataframe_predictions['id_sentence'] except: p_ids = [] sys.exit("Sentence IDs are missing!") with open(sys.argv[2]) as f: reader = ndjson.reader(f) for post in reader: df = pd.DataFrame([post], columns=post.keys()) dataframe_gold = pd.concat([dataframe_gold, df], axis=0,ignore_index=True) g_aspects = dataframe_gold['aspects'] g_aspects_position = dataframe_gold['aspects_position'] g_aspects_polarity = dataframe_gold['polarities'] g_scores = dataframe_gold['score'] g_ids = dataframe_gold['id_sentence'] if (p_ids[0] != g_ids[0]): sys.exit("Sentences in the two files must be in the same order!") #TASK1 precisions=[] recalls=[] f1s=[] for q in range(0,len(p_aspects)): #Inizializza predictions #found_nouns = p_aspects[q] positions_start = [] positions_end = [] aspects_positions = p_aspects_position[q] for item in aspects_positions: positions_start.append(item[0]) positions_end.append(item[1]) #Inizializza gold g_starts = [] g_ends = [] #Inizializza gold gg_aspects_positions = g_aspects_position[q] for item in gg_aspects_positions: g_starts.append(item[0]) g_ends.append(item[1]) #Inizializza evaluation script ok = 0 partial = 0 ga = len(g_aspects[q]) sa = len(p_aspects[q]) f1 = 0 pa = 0 ra = 0 matched = [] #Ciclo sulle predizioni for k in range(0, len(positions_start)): #Se lo start è uguale if positions_start[k] in g_starts: ind = g_starts.index(positions_start[k]) #se è uguale anche la fine ok, parziale altrimenti if g_ends[ind] == positions_end[k]: if ind not in matched: ok = ok +1 matched.append(ind) else: if ind not in matched: partial = partial +1 matched.append(ind) #Se end è uguale la fine elif positions_end[k] in g_ends: ind = g_ends.index(positions_end[k]) if ind not in matched: partial = partial +1 matched.append(ind) #Se è più corto o più lungo del gold else: for z in range(0,len(g_starts)): #Predizione più lunga if g_starts[z] < positions_start[k] and positions_end[k] < g_ends[z]: if z not in matched: partial = partial +1 matched.append(z) break #Predizione più corta elif positions_start[k] < g_starts[z] and g_ends[z] < positions_end[k]: if z not in matched: partial = partial +1 matched.append(z) break #Check specific cases if ga== 0 and sa== 0: pa =1 ra = 1 f1 = 1 elif sa == 0: pa = 0 ra = (ok + (0.5* partial))/ga f1 = 0 elif ga == 0: pa = (ok + (0.5* partial))/sa ra = 0 f1 = 0 else: pa = (ok + (0.5* partial))/sa ra = (ok + (0.5* partial))/ga if pa == 0 and ra == 0: f1 = 0 else: f1 = (2*pa*ra)/(pa+ra) precisions.append(pa) recalls.append(ra) f1s.append(f1) print("Task1 ATE:") try: avg_p = np.average(precisions) avg_r = np.average(recalls) avg_f1 = np.average(f1s) except: avg_p = -1 avg_r = -1 avg_f1 = -1 print("\tMacro-Precision: {:.5f}".format(avg_p)) print("\tMacro-Recall: {:.5f}".format(avg_r)) print("\tMacro-F1-score: {:.5f}".format(avg_f1)) print("---------------------------------------") #TASK2 precisions=[] recalls=[] f1s=[] for q in range(0,len(p_aspects_polarity)): #Inizializza predictions found_nouns = p_aspects[q] positions_start = [] positions_end = [] polarities_pred_pos = [] polarities_pred_neg = [] aspects_positions = p_aspects_position[q] for item in aspects_positions: positions_start.append(item[0]) positions_end.append(item[1]) aspects_polarity = p_aspects_polarity[q] for item in aspects_polarity: polarities_pred_pos.append(item[0]) polarities_pred_neg.append(item[1]) #Inizializza gold g_starts = [] g_ends = [] g_pos =[] g_neg = [] #Inizializza gold gg_aspects_positions = g_aspects_position[q] for item in gg_aspects_positions: g_starts.append(item[0]) g_ends.append(item[1]) gg_aspects_polarities=g_aspects_polarity[q] #Inizializza gold polarities for item in gg_aspects_polarities: g_pos.append(item[0]) g_neg.append(item[1]) #Inizializza evaluation script ok = 0 partial = 0 ga = len(g_aspects[q]) sa = len(found_nouns) f1 = 0 pa = 0 ra = 0 matched = [] #Ciclo sulle predizioni for k in range(0, len(positions_start)): #Se lo start è uguale if positions_start[k] in g_starts: ind = g_starts.index(positions_start[k]) #se è uguale anche la fine ok, parziale altriemnti if g_ends[ind] == positions_end[k]: if ind not in matched: if g_pos[ind] == polarities_pred_pos[k] and g_neg[ind] == polarities_pred_neg[k]: ok = ok +1 matched.append(ind) else: if ind not in matched: if g_pos[ind] == polarities_pred_pos[k] and g_neg[ind] == polarities_pred_neg[k]: partial = partial +1 matched.append(ind) #Se end è uguale la fine elif positions_end[k] in g_ends: ind = g_ends.index(positions_end[k]) if ind not in matched: if g_pos[ind] == polarities_pred_pos[k] and g_neg[ind] == polarities_pred_neg[k]: partial = partial +1 matched.append(ind) #Se è più corto o più lungo del gold else: for z in range(0,len(g_starts)): #Predizione più lunga if g_starts[z] < positions_start[k] and positions_end[k] < g_ends[z]: if z not in matched: if g_pos[z] == polarities_pred_pos[k] and g_neg[ind] == polarities_pred_neg[k]: partial = partial +1 matched.append(z) break #Predizione più corta elif positions_start[k] < g_starts[z] and g_ends[z] < positions_end[k]: if z not in matched: if g_pos[z] == polarities_pred_pos[k] and g_neg[ind] == polarities_pred_neg[k]: partial = partial +1 matched.append(z) break #Check specific cases if ga== 0 and sa== 0: pa =1 ra = 1 f1 = 1 elif sa == 0: pa = 0 ra = (ok + (0.5* partial))/ga f1 = 0 elif ga == 0: pa = (ok + (0.5* partial))/sa ra = 0 f1 = 0 else: pa = (ok + (0.5* partial))/sa ra = (ok + (0.5* partial))/ga if pa == 0 and ra == 0: f1 = 0 else: f1 = (2*pa*ra)/(pa+ra) precisions.append(pa) recalls.append(ra) f1s.append(f1) print("Task2 ABSA:") try: avg_p = np.average(precisions) avg_r = np.average(recalls) avg_f1 = np.average(f1s) except: avg_p = -1 avg_r = -1 avg_f1 = -1 print("\tMacro-Precision: {:.5f}".format(avg_p)) print("\tMacro-Recall: {:.5f}".format(avg_r)) print("\tMacro-F1-score: {:.5f}".format(avg_f1)) print("---------------------------------------") #TASK 3 #RMSE function def rmse(predictions, targets): return np.sqrt(((predictions - targets) ** 2).mean()) if len(p_scores) != 0: rmse_val = rmse(g_scores, p_scores) else: rmse_val = -1 print("Task3 SA:") print("\tRMSE: {:.5f}".format(rmse_val)) print("---------------------------------------")