{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "ATE_ABSITA_SA_baseline.ipynb", "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" } }, "cells": [ { "cell_type": "code", "metadata": { "id": "t_vCY7lRiEC3", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "outputId": "e444d144-e81b-4ea9-d26f-bee78b86fc2c" }, "source": [ "!pip install ndjson\n", "import pandas as pd\n", "import ndjson\n", "import numpy as np\n", "dataframe = pd.DataFrame()\n", "\n", "#LOADING TRAINING SET\n", "with open(\"ate_absita_training.ndjson\") as f:\n", " reader = ndjson.reader(f)\n", "\n", " for post in reader:\n", " df = pd.DataFrame([post], columns=post.keys())\n", " dataframe = pd.concat([dataframe, df],\n", " axis=0,\n", " ignore_index=True)" ], "execution_count": 4, "outputs": [ { "output_type": "stream", "text": [ "Requirement already satisfied: ndjson in /usr/local/lib/python3.6/dist-packages (0.3.1)\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "KGgBSejQiaOB", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 589 }, "outputId": "09ffa5d0-cb20-4c17-d286-edca1b3c4a70" }, "source": [ "dataframe" ], "execution_count": 5, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", " | sentence | \n", "id_sentence | \n", "score | \n", "polarities | \n", "aspects_position | \n", "aspects | \n", "
---|---|---|---|---|---|---|
0 | \n", "Ottimo prodotto di marca, la qualità é veramen... | \n", "4b7254a1-3f31-4143-ab22-a8558aa4a73b | \n", "5 | \n", "[[0, 0], [0, 1], [1, 0]] | \n", "[[120, 142], [71, 79], [29, 36]] | \n", "[provvisto di una tasca, capiente, qualità] | \n", "
1 | \n", "Ottimo rasoio dal semplice utilizzo. Rade molt... | \n", "4b74d99d-891f-4526-bbd3-549fa244cd1c | \n", "5 | \n", "[[1, 0], [1, 0], [1, 0], [1, 0]] | \n", "[[18, 26], [37, 41], [79, 86], [99, 105]] | \n", "[semplice, Rade, Pratico, pulire] | \n", "
2 | \n", "Un quarto delle dimensioni dello Show original... | \n", "4b7ff44f-fa9f-4ef0-97c8-e295e70ccc9b | \n", "5 | \n", "[[1, 0], [1, 0], [1, 0], [0, 0]] | \n", "[[118, 132], [51, 62], [65, 70], [16, 26]] | \n", "[modalità notte, prestazioni, suono, dimensioni] | \n", "
3 | \n", "Il prodotto si presenta esattamente come in fo... | \n", "4b80c2fe-62eb-44ee-b7b0-6e7de7dfd156 | \n", "5 | \n", "[[1, 0], [1, 0], [1, 0]] | \n", "[[147, 158], [132, 140], [24, 48]] | \n", "[vestibilità, capienza, esattamente come in foto] | \n", "
4 | \n", "Superlativa, velocità in scrittura superiore a... | \n", "4b848496-b45c-4cc7-b378-7a047e2033c4 | \n", "5 | \n", "[[1, 0]] | \n", "[[13, 21]] | \n", "[velocità] | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
3049 | \n", "Da la impressione che sia robusto ed impermeab... | \n", "ff8810f6-a402-40d0-bac2-b7babce8d4cb | \n", "2 | \n", "[[0, 0], [0, 0]] | \n", "[[37, 49], [26, 33]] | \n", "[impermeabile, robusto] | \n", "
3050 | \n", "Purtroppo non andavano bene per la mia stampante | \n", "ff9f0918-3277-43e6-82b4-44300bcb0a76 | \n", "5 | \n", "[[0, 1]] | \n", "[[10, 27]] | \n", "[non andavano bene] | \n", "
3051 | \n", "Lo stiamo usando in famiglia da qualche mese, ... | \n", "ffb7deb3-d2c5-49d7-94c0-b55511f22600 | \n", "3 | \n", "[[0, 1], [0, 1], [1, 0]] | \n", "[[126, 134], [105, 112], [83, 90]] | \n", "[rumoroso, pesante, qualità] | \n", "
3052 | \n", "elettrodomestico che già dall'apparenza risult... | \n", "ffd880a5-57ad-4d4c-b677-e875b181de3f | \n", "5 | \n", "[[1, 0], [1, 0], [1, 0], [1, 0]] | \n", "[[106, 125], [146, 153], [199, 207], [57, 64]] | \n", "[il suo lavoro lo fa, potente, utilizzo, qualità] | \n", "
3053 | \n", "Ottima alternativa a device di fascia alta.. | \n", "fffc61ff-f6ea-4c41-9527-8f486f057de1 | \n", "5 | \n", "[[1, 0]] | \n", "[[7, 42]] | \n", "[alternativa a device di fascia alta] | \n", "
3054 rows × 6 columns
\n", "