mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-07-08 01:22:43 +00:00
1823 lines
46 KiB
Plaintext
1823 lines
46 KiB
Plaintext
![]() |
{
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"# Using [Unstructured.io](https://www.unstructured.io/) to process arXiv Papers and Perform Topic Modelling! "
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### Import General Use Packages"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 1,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"import arxiv # Interact with arXiv api to scrape papers\n",
|
|||
|
"from sentence_transformers import SentenceTransformer # Use Hugging Face Embedding for Topic Modelling\n",
|
|||
|
"from bertopic import BERTopic # Package for Topic Modelling\n",
|
|||
|
"from tqdm import tqdm #Progress Bar When Iterating\n",
|
|||
|
"import glob #Identify Files in Directory\n",
|
|||
|
"import os #Delete Files in Directory\n",
|
|||
|
"import pandas as pd #Dataframe Manipulation"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### Import [Unstructured](https://unstructured-io.github.io/unstructured/installing.html) Bricks"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 2,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"from unstructured.partition.auto import partition #Base Function to Partition PDF\n",
|
|||
|
"from unstructured.staging.base import convert_to_dict #Convert List Unstructured Elements Into List of Dicts for Easy Parsing\n",
|
|||
|
"from unstructured.cleaners.core import clean, remove_punctuation, clean_non_ascii_chars #Cleaning Bricks\n",
|
|||
|
"import re #Create Custom Cleaning Brick\n",
|
|||
|
"import nltk #Toolkit for more advanced pre-processing\n",
|
|||
|
"from nltk.corpus import stopwords #list of stopwords to remove\n",
|
|||
|
"from typing import List #Type Hinting"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### Setup NLTK"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 3,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"[nltk_data] Downloading package stopwords to\n",
|
|||
|
"[nltk_data] /Users/pravinsanthanam/nltk_data...\n",
|
|||
|
"[nltk_data] Package stopwords is already up-to-date!\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"True"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 3,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"nltk.download('stopwords')"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### Create Function to Extract PDFs About Machine Learning from arXiv"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 6,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"def get_arxiv_paper_texts(query: str, max_results: int = 100) -> List[str]:\n",
|
|||
|
" \"\"\"Function to Use arXiv API to Fetch Papers Related to Query, Download and Pre-Process\n",
|
|||
|
"\n",
|
|||
|
" Args:\n",
|
|||
|
" query (str): query for arXiv API\n",
|
|||
|
" max_results (int, optional): Number of Papers to get back. Defaults to 100.\n",
|
|||
|
"\n",
|
|||
|
" Returns:\n",
|
|||
|
" paper_texts (list[str]): Return list of narrative texts for each paper\n",
|
|||
|
" \"\"\"\n",
|
|||
|
" #Get List of Arxiv Papers Matching Our Query\n",
|
|||
|
" arxiv_papers = list(\n",
|
|||
|
" arxiv.Search(\n",
|
|||
|
" query = query,\n",
|
|||
|
" max_results = max_results,\n",
|
|||
|
" sort_by = arxiv.SortCriterion.Relevance,\n",
|
|||
|
" sort_order = arxiv.SortOrder.Descending\n",
|
|||
|
" )\n",
|
|||
|
" .results()\n",
|
|||
|
" )\n",
|
|||
|
"\n",
|
|||
|
" #Loop Through PDFs, Download and Pre-Process and Then Delete\n",
|
|||
|
" paper_texts = []\n",
|
|||
|
" for paper in tqdm(arxiv_papers):\n",
|
|||
|
" paper.download_pdf()\n",
|
|||
|
" pdf_file = glob.glob('*.pdf')[0]\n",
|
|||
|
" elements = partition(pdf_file) #Partition PDF Using Unstructured\n",
|
|||
|
" isd = convert_to_dict(elements) #Convert List of Elements to List of Dictionaries\n",
|
|||
|
" narrative_texts = [element['text'] for element in isd if element['type'] == 'NarrativeText'] #Only Keep Narrative Text and Combine Into One String\n",
|
|||
|
" os.remove(pdf_file) #Delete PDF\n",
|
|||
|
" paper_texts += narrative_texts\n",
|
|||
|
" return paper_texts\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### Run Scrape + PreProcess Function to Get List of Paper Text To Feed Through Topic Modelling Algorithm"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 7,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"100%|██████████| 10/10 [04:59<00:00, 29.92s/it]\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"paper_texts = get_arxiv_paper_texts(query='natural language processing', max_results=10)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### Run Narrative Texts Through Custom Cleaner Brick Using Unstructured"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 8,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Number of Narrative Texts to Run Through Topic Modelling: 1711\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"#Stopwords to Remove\n",
|
|||
|
"stop_words = set(stopwords.words('english'))\n",
|
|||
|
"\n",
|
|||
|
"#Function to Apply Whatever Cleaning Brick Functionality to Each Narrative Text Element\n",
|
|||
|
"def custom_clean_brick(narrative_text: str) -> str:\n",
|
|||
|
" \"\"\"Apply Mix of Unstructured Cleaning Bricks With Some Custom Functionality to Pre-Process Narrative Text\n",
|
|||
|
"\n",
|
|||
|
" Args:\n",
|
|||
|
" narrative_text (str): Narrative Text or Any Other Sentence\n",
|
|||
|
"\n",
|
|||
|
" Returns:\n",
|
|||
|
" cleaned_text (str): Text after going through all the cleaning procedures\n",
|
|||
|
" \"\"\"\n",
|
|||
|
" remove_numbers = lambda text: re.sub(r'\\d+', \"\", text) #lambda function to remove all punctuation\n",
|
|||
|
" cleaned_text = remove_numbers(narrative_text) #Apply Custom Lambda\n",
|
|||
|
" cleaned_text = clean(cleaned_text, extra_whitespace=True, dashes=True, bullets=True, trailing_punctuation=True, lowercase=True) #Apply Basic Clean Brick With All the Options\n",
|
|||
|
" cleaned_text = remove_punctuation(cleaned_text) #Remove all punctuation\n",
|
|||
|
" cleaned_text = ' '.join([word for word in cleaned_text.split() if word not in stop_words]) #remove stop words\n",
|
|||
|
" return cleaned_text\n",
|
|||
|
"\n",
|
|||
|
"#Apply Function to Paper Texts\n",
|
|||
|
"cleaned_paper_texts = [custom_clean_brick(text) for text in paper_texts]\n",
|
|||
|
"\n",
|
|||
|
"#Count Narratve Texts\n",
|
|||
|
"print(\"Number of Narrative Texts to Run Through Topic Modelling: {}\".format(len(cleaned_paper_texts)))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### Setup [BerTopic](https://maartengr.github.io/BERTopic/index.html)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 9,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"#Choose Which Hugging Face Model You Want to Use\n",
|
|||
|
"sentence_model = SentenceTransformer(\"all-MiniLM-L6-v2\")\n",
|
|||
|
"\n",
|
|||
|
"#Initialize Model\n",
|
|||
|
"topic_model = BERTopic(embedding_model=sentence_model, top_n_words=10, nr_topics=10, verbose=True)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### Run Document Text Through Topic Model To Get Major Topics Discussed in Narrative Texts"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 10,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"application/vnd.jupyter.widget-view+json": {
|
|||
|
"model_id": "a6ebe3cb185049bd8d37742f2451cbe0",
|
|||
|
"version_major": 2,
|
|||
|
"version_minor": 0
|
|||
|
},
|
|||
|
"text/plain": [
|
|||
|
"Batches: 0%| | 0/54 [00:00<?, ?it/s]"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"2023-04-14 14:27:29,129 - BERTopic - Transformed documents to Embeddings\n",
|
|||
|
"2023-04-14 14:27:33,621 - BERTopic - Reduced dimensionality\n",
|
|||
|
"2023-04-14 14:27:33,647 - BERTopic - Clustered reduced embeddings\n",
|
|||
|
"2023-04-14 14:27:34,255 - BERTopic - Reduced number of topics from 32 to 10\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"#Fit Topic Model and Transform List of Paper Narrative Texts Into Topic and Probabilities\n",
|
|||
|
"topic_model.fit(cleaned_paper_texts)\n",
|
|||
|
"\n",
|
|||
|
"#Store Document-Topic Info\n",
|
|||
|
"doc_topic_info = topic_model.get_document_info(cleaned_paper_texts)\n",
|
|||
|
"\n",
|
|||
|
"#Store Topic Info\n",
|
|||
|
"topic_info = pd.DataFrame(topic_model.get_topics())\n",
|
|||
|
"topic_info = topic_info.applymap(lambda x: x[0])\n",
|
|||
|
"topic_info.columns = ['topic_{}'.format(col+1) for col in topic_info.columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### Checkout Keywords for Each Topic"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 11,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>topic_0</th>\n",
|
|||
|
" <th>topic_1</th>\n",
|
|||
|
" <th>topic_2</th>\n",
|
|||
|
" <th>topic_3</th>\n",
|
|||
|
" <th>topic_4</th>\n",
|
|||
|
" <th>topic_5</th>\n",
|
|||
|
" <th>topic_6</th>\n",
|
|||
|
" <th>topic_7</th>\n",
|
|||
|
" <th>topic_8</th>\n",
|
|||
|
" <th>topic_9</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <td>neural</td>\n",
|
|||
|
" <td>language</td>\n",
|
|||
|
" <td>state</td>\n",
|
|||
|
" <td>function</td>\n",
|
|||
|
" <td>cost</td>\n",
|
|||
|
" <td>publication</td>\n",
|
|||
|
" <td>graph</td>\n",
|
|||
|
" <td>llama</td>\n",
|
|||
|
" <td>tangkhul</td>\n",
|
|||
|
" <td>want</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <td>network</td>\n",
|
|||
|
" <td>natural</td>\n",
|
|||
|
" <td>rnn</td>\n",
|
|||
|
" <td>distribution</td>\n",
|
|||
|
" <td>function</td>\n",
|
|||
|
" <td>april</td>\n",
|
|||
|
" <td>computation</td>\n",
|
|||
|
" <td>like</td>\n",
|
|||
|
" <td>compound</td>\n",
|
|||
|
" <td>edu</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <td>function</td>\n",
|
|||
|
" <td>model</td>\n",
|
|||
|
" <td>memory</td>\n",
|
|||
|
" <td>output</td>\n",
|
|||
|
" <td>sgd</td>\n",
|
|||
|
" <td>syst</td>\n",
|
|||
|
" <td>node</td>\n",
|
|||
|
" <td>south</td>\n",
|
|||
|
" <td>root</td>\n",
|
|||
|
" <td>dsontagcoursesinferenceslidespseudolikelihoodn...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <td>networks</td>\n",
|
|||
|
" <td>word</td>\n",
|
|||
|
" <td>vector</td>\n",
|
|||
|
" <td>class</td>\n",
|
|||
|
" <td>training</td>\n",
|
|||
|
" <td>technol</td>\n",
|
|||
|
" <td>nodes</td>\n",
|
|||
|
" <td>animal</td>\n",
|
|||
|
" <td>morphological</td>\n",
|
|||
|
" <td>regardlessly</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" <td>one</td>\n",
|
|||
|
" <td>planning</td>\n",
|
|||
|
" <td>input</td>\n",
|
|||
|
" <td>tanh</td>\n",
|
|||
|
" <td>expected</td>\n",
|
|||
|
" <td>date</td>\n",
|
|||
|
" <td>backward</td>\n",
|
|||
|
" <td>america</td>\n",
|
|||
|
" <td>verbs</td>\n",
|
|||
|
" <td>satisfied</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>5</th>\n",
|
|||
|
" <td>input</td>\n",
|
|||
|
" <td>words</td>\n",
|
|||
|
" <td>network</td>\n",
|
|||
|
" <td>data</td>\n",
|
|||
|
" <td>optimization</td>\n",
|
|||
|
" <td>vol</td>\n",
|
|||
|
" <td>function</td>\n",
|
|||
|
" <td>translation</td>\n",
|
|||
|
" <td>noun</td>\n",
|
|||
|
" <td>november</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>6</th>\n",
|
|||
|
" <td>vector</td>\n",
|
|||
|
" <td>based</td>\n",
|
|||
|
" <td>recurrent</td>\n",
|
|||
|
" <td>yˆ</td>\n",
|
|||
|
" <td>algorithm</td>\n",
|
|||
|
" <td>intell</td>\n",
|
|||
|
" <td>backpropagation</td>\n",
|
|||
|
" <td>french</td>\n",
|
|||
|
" <td>roots</td>\n",
|
|||
|
" <td>tune</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>7</th>\n",
|
|||
|
" <td>language</td>\n",
|
|||
|
" <td>processing</td>\n",
|
|||
|
" <td>sequence</td>\n",
|
|||
|
" <td>loss</td>\n",
|
|||
|
" <td>set</td>\n",
|
|||
|
" <td>acm</td>\n",
|
|||
|
" <td>algorithm</td>\n",
|
|||
|
" <td>cute</td>\n",
|
|||
|
" <td>adjectives</td>\n",
|
|||
|
" <td>return</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>8</th>\n",
|
|||
|
" <td>model</td>\n",
|
|||
|
" <td>models</td>\n",
|
|||
|
" <td>neural</td>\n",
|
|||
|
" <td>activation</td>\n",
|
|||
|
" <td>validation</td>\n",
|
|||
|
" <td>article</td>\n",
|
|||
|
" <td>parameters</td>\n",
|
|||
|
" <td>google</td>\n",
|
|||
|
" <td>formation</td>\n",
|
|||
|
" <td>fully</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>9</th>\n",
|
|||
|
" <td>training</td>\n",
|
|||
|
" <td>data</td>\n",
|
|||
|
" <td>lstm</td>\n",
|
|||
|
" <td>softmax</td>\n",
|
|||
|
" <td>rate</td>\n",
|
|||
|
" <td>trans</td>\n",
|
|||
|
" <td>output</td>\n",
|
|||
|
" <td>domesticated</td>\n",
|
|||
|
" <td>language</td>\n",
|
|||
|
" <td>results</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" topic_0 topic_1 topic_2 topic_3 topic_4 topic_5 \\\n",
|
|||
|
"0 neural language state function cost publication \n",
|
|||
|
"1 network natural rnn distribution function april \n",
|
|||
|
"2 function model memory output sgd syst \n",
|
|||
|
"3 networks word vector class training technol \n",
|
|||
|
"4 one planning input tanh expected date \n",
|
|||
|
"5 input words network data optimization vol \n",
|
|||
|
"6 vector based recurrent yˆ algorithm intell \n",
|
|||
|
"7 language processing sequence loss set acm \n",
|
|||
|
"8 model models neural activation validation article \n",
|
|||
|
"9 training data lstm softmax rate trans \n",
|
|||
|
"\n",
|
|||
|
" topic_6 topic_7 topic_8 \\\n",
|
|||
|
"0 graph llama tangkhul \n",
|
|||
|
"1 computation like compound \n",
|
|||
|
"2 node south root \n",
|
|||
|
"3 nodes animal morphological \n",
|
|||
|
"4 backward america verbs \n",
|
|||
|
"5 function translation noun \n",
|
|||
|
"6 backpropagation french roots \n",
|
|||
|
"7 algorithm cute adjectives \n",
|
|||
|
"8 parameters google formation \n",
|
|||
|
"9 output domesticated language \n",
|
|||
|
"\n",
|
|||
|
" topic_9 \n",
|
|||
|
"0 want \n",
|
|||
|
"1 edu \n",
|
|||
|
"2 dsontagcoursesinferenceslidespseudolikelihoodn... \n",
|
|||
|
"3 regardlessly \n",
|
|||
|
"4 satisfied \n",
|
|||
|
"5 november \n",
|
|||
|
"6 tune \n",
|
|||
|
"7 return \n",
|
|||
|
"8 fully \n",
|
|||
|
"9 results "
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"display(topic_info)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### Visualize Topics"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 13,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"application/vnd.plotly.v1+json": {
|
|||
|
"config": {
|
|||
|
"plotlyServerURL": "https://plot.ly"
|
|||
|
},
|
|||
|
"data": [
|
|||
|
{
|
|||
|
"customdata": [
|
|||
|
[
|
|||
|
0,
|
|||
|
"language | natural | model | word | planning",
|
|||
|
723
|
|||
|
],
|
|||
|
[
|
|||
|
1,
|
|||
|
"state | rnn | memory | vector | input",
|
|||
|
198
|
|||
|
],
|
|||
|
[
|
|||
|
2,
|
|||
|
"function | distribution | output | class | tanh",
|
|||
|
122
|
|||
|
],
|
|||
|
[
|
|||
|
3,
|
|||
|
"cost | function | sgd | training | expected",
|
|||
|
61
|
|||
|
],
|
|||
|
[
|
|||
|
4,
|
|||
|
"publication | april | syst | technol | date",
|
|||
|
57
|
|||
|
],
|
|||
|
[
|
|||
|
5,
|
|||
|
"graph | computation | node | nodes | backward",
|
|||
|
46
|
|||
|
],
|
|||
|
[
|
|||
|
6,
|
|||
|
"llama | like | south | animal | america",
|
|||
|
29
|
|||
|
],
|
|||
|
[
|
|||
|
7,
|
|||
|
"tangkhul | compound | root | morphological | verbs",
|
|||
|
17
|
|||
|
],
|
|||
|
[
|
|||
|
8,
|
|||
|
"want | edu | dsontagcoursesinferenceslidespseudolikelihoodnotespdf | regardlessly | satisfied",
|
|||
|
13
|
|||
|
]
|
|||
|
],
|
|||
|
"hovertemplate": "<b>Topic %{customdata[0]}</b><br>%{customdata[1]}<br>Size: %{customdata[2]}",
|
|||
|
"legendgroup": "",
|
|||
|
"marker": {
|
|||
|
"color": "#B0BEC5",
|
|||
|
"line": {
|
|||
|
"color": "DarkSlateGrey",
|
|||
|
"width": 2
|
|||
|
},
|
|||
|
"size": [
|
|||
|
723,
|
|||
|
198,
|
|||
|
122,
|
|||
|
61,
|
|||
|
57,
|
|||
|
46,
|
|||
|
29,
|
|||
|
17,
|
|||
|
13
|
|||
|
],
|
|||
|
"sizemode": "area",
|
|||
|
"sizeref": 0.451875,
|
|||
|
"symbol": "circle"
|
|||
|
},
|
|||
|
"mode": "markers",
|
|||
|
"name": "",
|
|||
|
"orientation": "v",
|
|||
|
"showlegend": false,
|
|||
|
"type": "scatter",
|
|||
|
"x": [
|
|||
|
14.759990692138672,
|
|||
|
14.329012870788574,
|
|||
|
10.99558162689209,
|
|||
|
9.891719818115234,
|
|||
|
11.191701889038086,
|
|||
|
9.449606895446777,
|
|||
|
11.662773132324219,
|
|||
|
14.039092063903809,
|
|||
|
12.023329734802246
|
|||
|
],
|
|||
|
"xaxis": "x",
|
|||
|
"y": [
|
|||
|
1.6729466915130615,
|
|||
|
2.2927768230438232,
|
|||
|
5.36309289932251,
|
|||
|
5.59792423248291,
|
|||
|
4.721500873565674,
|
|||
|
5.3096089363098145,
|
|||
|
5.3371052742004395,
|
|||
|
1.8039934635162354,
|
|||
|
4.149565696716309
|
|||
|
],
|
|||
|
"yaxis": "y"
|
|||
|
}
|
|||
|
],
|
|||
|
"layout": {
|
|||
|
"annotations": [
|
|||
|
{
|
|||
|
"showarrow": false,
|
|||
|
"text": "D1",
|
|||
|
"x": 8.03216586112976,
|
|||
|
"y": 3.929808777570724,
|
|||
|
"yshift": 10
|
|||
|
},
|
|||
|
{
|
|||
|
"showarrow": false,
|
|||
|
"text": "D2",
|
|||
|
"x": 12.503077578544616,
|
|||
|
"xshift": 10,
|
|||
|
"y": 6.437612867355346
|
|||
|
}
|
|||
|
],
|
|||
|
"height": 650,
|
|||
|
"hoverlabel": {
|
|||
|
"bgcolor": "white",
|
|||
|
"font": {
|
|||
|
"family": "Rockwell",
|
|||
|
"size": 16
|
|||
|
}
|
|||
|
},
|
|||
|
"legend": {
|
|||
|
"itemsizing": "constant",
|
|||
|
"tracegroupgap": 0
|
|||
|
},
|
|||
|
"margin": {
|
|||
|
"t": 60
|
|||
|
},
|
|||
|
"shapes": [
|
|||
|
{
|
|||
|
"line": {
|
|||
|
"color": "#CFD8DC",
|
|||
|
"width": 2
|
|||
|
},
|
|||
|
"type": "line",
|
|||
|
"x0": 12.503077578544616,
|
|||
|
"x1": 12.503077578544616,
|
|||
|
"y0": 1.4220046877861023,
|
|||
|
"y1": 6.437612867355346
|
|||
|
},
|
|||
|
{
|
|||
|
"line": {
|
|||
|
"color": "#9E9E9E",
|
|||
|
"width": 2
|
|||
|
},
|
|||
|
"type": "line",
|
|||
|
"x0": 8.03216586112976,
|
|||
|
"x1": 16.973989295959473,
|
|||
|
"y0": 3.929808777570724,
|
|||
|
"y1": 3.929808777570724
|
|||
|
}
|
|||
|
],
|
|||
|
"sliders": [
|
|||
|
{
|
|||
|
"active": 0,
|
|||
|
"pad": {
|
|||
|
"t": 50
|
|||
|
},
|
|||
|
"steps": [
|
|||
|
{
|
|||
|
"args": [
|
|||
|
{
|
|||
|
"marker.color": [
|
|||
|
[
|
|||
|
"red",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5"
|
|||
|
]
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"label": "Topic 0",
|
|||
|
"method": "update"
|
|||
|
},
|
|||
|
{
|
|||
|
"args": [
|
|||
|
{
|
|||
|
"marker.color": [
|
|||
|
[
|
|||
|
"#B0BEC5",
|
|||
|
"red",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5"
|
|||
|
]
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"label": "Topic 1",
|
|||
|
"method": "update"
|
|||
|
},
|
|||
|
{
|
|||
|
"args": [
|
|||
|
{
|
|||
|
"marker.color": [
|
|||
|
[
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"red",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5"
|
|||
|
]
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"label": "Topic 2",
|
|||
|
"method": "update"
|
|||
|
},
|
|||
|
{
|
|||
|
"args": [
|
|||
|
{
|
|||
|
"marker.color": [
|
|||
|
[
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"red",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5"
|
|||
|
]
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"label": "Topic 3",
|
|||
|
"method": "update"
|
|||
|
},
|
|||
|
{
|
|||
|
"args": [
|
|||
|
{
|
|||
|
"marker.color": [
|
|||
|
[
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"red",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5"
|
|||
|
]
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"label": "Topic 4",
|
|||
|
"method": "update"
|
|||
|
},
|
|||
|
{
|
|||
|
"args": [
|
|||
|
{
|
|||
|
"marker.color": [
|
|||
|
[
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"red",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5"
|
|||
|
]
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"label": "Topic 5",
|
|||
|
"method": "update"
|
|||
|
},
|
|||
|
{
|
|||
|
"args": [
|
|||
|
{
|
|||
|
"marker.color": [
|
|||
|
[
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"red",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5"
|
|||
|
]
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"label": "Topic 6",
|
|||
|
"method": "update"
|
|||
|
},
|
|||
|
{
|
|||
|
"args": [
|
|||
|
{
|
|||
|
"marker.color": [
|
|||
|
[
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"red",
|
|||
|
"#B0BEC5"
|
|||
|
]
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"label": "Topic 7",
|
|||
|
"method": "update"
|
|||
|
},
|
|||
|
{
|
|||
|
"args": [
|
|||
|
{
|
|||
|
"marker.color": [
|
|||
|
[
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"#B0BEC5",
|
|||
|
"red"
|
|||
|
]
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"label": "Topic 8",
|
|||
|
"method": "update"
|
|||
|
}
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"template": {
|
|||
|
"data": {
|
|||
|
"bar": [
|
|||
|
{
|
|||
|
"error_x": {
|
|||
|
"color": "rgb(36,36,36)"
|
|||
|
},
|
|||
|
"error_y": {
|
|||
|
"color": "rgb(36,36,36)"
|
|||
|
},
|
|||
|
"marker": {
|
|||
|
"line": {
|
|||
|
"color": "white",
|
|||
|
"width": 0.5
|
|||
|
},
|
|||
|
"pattern": {
|
|||
|
"fillmode": "overlay",
|
|||
|
"size": 10,
|
|||
|
"solidity": 0.2
|
|||
|
}
|
|||
|
},
|
|||
|
"type": "bar"
|
|||
|
}
|
|||
|
],
|
|||
|
"barpolar": [
|
|||
|
{
|
|||
|
"marker": {
|
|||
|
"line": {
|
|||
|
"color": "white",
|
|||
|
"width": 0.5
|
|||
|
},
|
|||
|
"pattern": {
|
|||
|
"fillmode": "overlay",
|
|||
|
"size": 10,
|
|||
|
"solidity": 0.2
|
|||
|
}
|
|||
|
},
|
|||
|
"type": "barpolar"
|
|||
|
}
|
|||
|
],
|
|||
|
"carpet": [
|
|||
|
{
|
|||
|
"aaxis": {
|
|||
|
"endlinecolor": "rgb(36,36,36)",
|
|||
|
"gridcolor": "white",
|
|||
|
"linecolor": "white",
|
|||
|
"minorgridcolor": "white",
|
|||
|
"startlinecolor": "rgb(36,36,36)"
|
|||
|
},
|
|||
|
"baxis": {
|
|||
|
"endlinecolor": "rgb(36,36,36)",
|
|||
|
"gridcolor": "white",
|
|||
|
"linecolor": "white",
|
|||
|
"minorgridcolor": "white",
|
|||
|
"startlinecolor": "rgb(36,36,36)"
|
|||
|
},
|
|||
|
"type": "carpet"
|
|||
|
}
|
|||
|
],
|
|||
|
"choropleth": [
|
|||
|
{
|
|||
|
"colorbar": {
|
|||
|
"outlinewidth": 1,
|
|||
|
"tickcolor": "rgb(36,36,36)",
|
|||
|
"ticks": "outside"
|
|||
|
},
|
|||
|
"type": "choropleth"
|
|||
|
}
|
|||
|
],
|
|||
|
"contour": [
|
|||
|
{
|
|||
|
"colorbar": {
|
|||
|
"outlinewidth": 1,
|
|||
|
"tickcolor": "rgb(36,36,36)",
|
|||
|
"ticks": "outside"
|
|||
|
},
|
|||
|
"colorscale": [
|
|||
|
[
|
|||
|
0,
|
|||
|
"#440154"
|
|||
|
],
|
|||
|
[
|
|||
|
0.1111111111111111,
|
|||
|
"#482878"
|
|||
|
],
|
|||
|
[
|
|||
|
0.2222222222222222,
|
|||
|
"#3e4989"
|
|||
|
],
|
|||
|
[
|
|||
|
0.3333333333333333,
|
|||
|
"#31688e"
|
|||
|
],
|
|||
|
[
|
|||
|
0.4444444444444444,
|
|||
|
"#26828e"
|
|||
|
],
|
|||
|
[
|
|||
|
0.5555555555555556,
|
|||
|
"#1f9e89"
|
|||
|
],
|
|||
|
[
|
|||
|
0.6666666666666666,
|
|||
|
"#35b779"
|
|||
|
],
|
|||
|
[
|
|||
|
0.7777777777777778,
|
|||
|
"#6ece58"
|
|||
|
],
|
|||
|
[
|
|||
|
0.8888888888888888,
|
|||
|
"#b5de2b"
|
|||
|
],
|
|||
|
[
|
|||
|
1,
|
|||
|
"#fde725"
|
|||
|
]
|
|||
|
],
|
|||
|
"type": "contour"
|
|||
|
}
|
|||
|
],
|
|||
|
"contourcarpet": [
|
|||
|
{
|
|||
|
"colorbar": {
|
|||
|
"outlinewidth": 1,
|
|||
|
"tickcolor": "rgb(36,36,36)",
|
|||
|
"ticks": "outside"
|
|||
|
},
|
|||
|
"type": "contourcarpet"
|
|||
|
}
|
|||
|
],
|
|||
|
"heatmap": [
|
|||
|
{
|
|||
|
"colorbar": {
|
|||
|
"outlinewidth": 1,
|
|||
|
"tickcolor": "rgb(36,36,36)",
|
|||
|
"ticks": "outside"
|
|||
|
},
|
|||
|
"colorscale": [
|
|||
|
[
|
|||
|
0,
|
|||
|
"#440154"
|
|||
|
],
|
|||
|
[
|
|||
|
0.1111111111111111,
|
|||
|
"#482878"
|
|||
|
],
|
|||
|
[
|
|||
|
0.2222222222222222,
|
|||
|
"#3e4989"
|
|||
|
],
|
|||
|
[
|
|||
|
0.3333333333333333,
|
|||
|
"#31688e"
|
|||
|
],
|
|||
|
[
|
|||
|
0.4444444444444444,
|
|||
|
"#26828e"
|
|||
|
],
|
|||
|
[
|
|||
|
0.5555555555555556,
|
|||
|
"#1f9e89"
|
|||
|
],
|
|||
|
[
|
|||
|
0.6666666666666666,
|
|||
|
"#35b779"
|
|||
|
],
|
|||
|
[
|
|||
|
0.7777777777777778,
|
|||
|
"#6ece58"
|
|||
|
],
|
|||
|
[
|
|||
|
0.8888888888888888,
|
|||
|
"#b5de2b"
|
|||
|
],
|
|||
|
[
|
|||
|
1,
|
|||
|
"#fde725"
|
|||
|
]
|
|||
|
],
|
|||
|
"type": "heatmap"
|
|||
|
}
|
|||
|
],
|
|||
|
"heatmapgl": [
|
|||
|
{
|
|||
|
"colorbar": {
|
|||
|
"outlinewidth": 1,
|
|||
|
"tickcolor": "rgb(36,36,36)",
|
|||
|
"ticks": "outside"
|
|||
|
},
|
|||
|
"colorscale": [
|
|||
|
[
|
|||
|
0,
|
|||
|
"#440154"
|
|||
|
],
|
|||
|
[
|
|||
|
0.1111111111111111,
|
|||
|
"#482878"
|
|||
|
],
|
|||
|
[
|
|||
|
0.2222222222222222,
|
|||
|
"#3e4989"
|
|||
|
],
|
|||
|
[
|
|||
|
0.3333333333333333,
|
|||
|
"#31688e"
|
|||
|
],
|
|||
|
[
|
|||
|
0.4444444444444444,
|
|||
|
"#26828e"
|
|||
|
],
|
|||
|
[
|
|||
|
0.5555555555555556,
|
|||
|
"#1f9e89"
|
|||
|
],
|
|||
|
[
|
|||
|
0.6666666666666666,
|
|||
|
"#35b779"
|
|||
|
],
|
|||
|
[
|
|||
|
0.7777777777777778,
|
|||
|
"#6ece58"
|
|||
|
],
|
|||
|
[
|
|||
|
0.8888888888888888,
|
|||
|
"#b5de2b"
|
|||
|
],
|
|||
|
[
|
|||
|
1,
|
|||
|
"#fde725"
|
|||
|
]
|
|||
|
],
|
|||
|
"type": "heatmapgl"
|
|||
|
}
|
|||
|
],
|
|||
|
"histogram": [
|
|||
|
{
|
|||
|
"marker": {
|
|||
|
"line": {
|
|||
|
"color": "white",
|
|||
|
"width": 0.6
|
|||
|
}
|
|||
|
},
|
|||
|
"type": "histogram"
|
|||
|
}
|
|||
|
],
|
|||
|
"histogram2d": [
|
|||
|
{
|
|||
|
"colorbar": {
|
|||
|
"outlinewidth": 1,
|
|||
|
"tickcolor": "rgb(36,36,36)",
|
|||
|
"ticks": "outside"
|
|||
|
},
|
|||
|
"colorscale": [
|
|||
|
[
|
|||
|
0,
|
|||
|
"#440154"
|
|||
|
],
|
|||
|
[
|
|||
|
0.1111111111111111,
|
|||
|
"#482878"
|
|||
|
],
|
|||
|
[
|
|||
|
0.2222222222222222,
|
|||
|
"#3e4989"
|
|||
|
],
|
|||
|
[
|
|||
|
0.3333333333333333,
|
|||
|
"#31688e"
|
|||
|
],
|
|||
|
[
|
|||
|
0.4444444444444444,
|
|||
|
"#26828e"
|
|||
|
],
|
|||
|
[
|
|||
|
0.5555555555555556,
|
|||
|
"#1f9e89"
|
|||
|
],
|
|||
|
[
|
|||
|
0.6666666666666666,
|
|||
|
"#35b779"
|
|||
|
],
|
|||
|
[
|
|||
|
0.7777777777777778,
|
|||
|
"#6ece58"
|
|||
|
],
|
|||
|
[
|
|||
|
0.8888888888888888,
|
|||
|
"#b5de2b"
|
|||
|
],
|
|||
|
[
|
|||
|
1,
|
|||
|
"#fde725"
|
|||
|
]
|
|||
|
],
|
|||
|
"type": "histogram2d"
|
|||
|
}
|
|||
|
],
|
|||
|
"histogram2dcontour": [
|
|||
|
{
|
|||
|
"colorbar": {
|
|||
|
"outlinewidth": 1,
|
|||
|
"tickcolor": "rgb(36,36,36)",
|
|||
|
"ticks": "outside"
|
|||
|
},
|
|||
|
"colorscale": [
|
|||
|
[
|
|||
|
0,
|
|||
|
"#440154"
|
|||
|
],
|
|||
|
[
|
|||
|
0.1111111111111111,
|
|||
|
"#482878"
|
|||
|
],
|
|||
|
[
|
|||
|
0.2222222222222222,
|
|||
|
"#3e4989"
|
|||
|
],
|
|||
|
[
|
|||
|
0.3333333333333333,
|
|||
|
"#31688e"
|
|||
|
],
|
|||
|
[
|
|||
|
0.4444444444444444,
|
|||
|
"#26828e"
|
|||
|
],
|
|||
|
[
|
|||
|
0.5555555555555556,
|
|||
|
"#1f9e89"
|
|||
|
],
|
|||
|
[
|
|||
|
0.6666666666666666,
|
|||
|
"#35b779"
|
|||
|
],
|
|||
|
[
|
|||
|
0.7777777777777778,
|
|||
|
"#6ece58"
|
|||
|
],
|
|||
|
[
|
|||
|
0.8888888888888888,
|
|||
|
"#b5de2b"
|
|||
|
],
|
|||
|
[
|
|||
|
1,
|
|||
|
"#fde725"
|
|||
|
]
|
|||
|
],
|
|||
|
"type": "histogram2dcontour"
|
|||
|
}
|
|||
|
],
|
|||
|
"mesh3d": [
|
|||
|
{
|
|||
|
"colorbar": {
|
|||
|
"outlinewidth": 1,
|
|||
|
"tickcolor": "rgb(36,36,36)",
|
|||
|
"ticks": "outside"
|
|||
|
},
|
|||
|
"type": "mesh3d"
|
|||
|
}
|
|||
|
],
|
|||
|
"parcoords": [
|
|||
|
{
|
|||
|
"line": {
|
|||
|
"colorbar": {
|
|||
|
"outlinewidth": 1,
|
|||
|
"tickcolor": "rgb(36,36,36)",
|
|||
|
"ticks": "outside"
|
|||
|
}
|
|||
|
},
|
|||
|
"type": "parcoords"
|
|||
|
}
|
|||
|
],
|
|||
|
"pie": [
|
|||
|
{
|
|||
|
"automargin": true,
|
|||
|
"type": "pie"
|
|||
|
}
|
|||
|
],
|
|||
|
"scatter": [
|
|||
|
{
|
|||
|
"fillpattern": {
|
|||
|
"fillmode": "overlay",
|
|||
|
"size": 10,
|
|||
|
"solidity": 0.2
|
|||
|
},
|
|||
|
"type": "scatter"
|
|||
|
}
|
|||
|
],
|
|||
|
"scatter3d": [
|
|||
|
{
|
|||
|
"line": {
|
|||
|
"colorbar": {
|
|||
|
"outlinewidth": 1,
|
|||
|
"tickcolor": "rgb(36,36,36)",
|
|||
|
"ticks": "outside"
|
|||
|
}
|
|||
|
},
|
|||
|
"marker": {
|
|||
|
"colorbar": {
|
|||
|
"outlinewidth": 1,
|
|||
|
"tickcolor": "rgb(36,36,36)",
|
|||
|
"ticks": "outside"
|
|||
|
}
|
|||
|
},
|
|||
|
"type": "scatter3d"
|
|||
|
}
|
|||
|
],
|
|||
|
"scattercarpet": [
|
|||
|
{
|
|||
|
"marker": {
|
|||
|
"colorbar": {
|
|||
|
"outlinewidth": 1,
|
|||
|
"tickcolor": "rgb(36,36,36)",
|
|||
|
"ticks": "outside"
|
|||
|
}
|
|||
|
},
|
|||
|
"type": "scattercarpet"
|
|||
|
}
|
|||
|
],
|
|||
|
"scattergeo": [
|
|||
|
{
|
|||
|
"marker": {
|
|||
|
"colorbar": {
|
|||
|
"outlinewidth": 1,
|
|||
|
"tickcolor": "rgb(36,36,36)",
|
|||
|
"ticks": "outside"
|
|||
|
}
|
|||
|
},
|
|||
|
"type": "scattergeo"
|
|||
|
}
|
|||
|
],
|
|||
|
"scattergl": [
|
|||
|
{
|
|||
|
"marker": {
|
|||
|
"colorbar": {
|
|||
|
"outlinewidth": 1,
|
|||
|
"tickcolor": "rgb(36,36,36)",
|
|||
|
"ticks": "outside"
|
|||
|
}
|
|||
|
},
|
|||
|
"type": "scattergl"
|
|||
|
}
|
|||
|
],
|
|||
|
"scattermapbox": [
|
|||
|
{
|
|||
|
"marker": {
|
|||
|
"colorbar": {
|
|||
|
"outlinewidth": 1,
|
|||
|
"tickcolor": "rgb(36,36,36)",
|
|||
|
"ticks": "outside"
|
|||
|
}
|
|||
|
},
|
|||
|
"type": "scattermapbox"
|
|||
|
}
|
|||
|
],
|
|||
|
"scatterpolar": [
|
|||
|
{
|
|||
|
"marker": {
|
|||
|
"colorbar": {
|
|||
|
"outlinewidth": 1,
|
|||
|
"tickcolor": "rgb(36,36,36)",
|
|||
|
"ticks": "outside"
|
|||
|
}
|
|||
|
},
|
|||
|
"type": "scatterpolar"
|
|||
|
}
|
|||
|
],
|
|||
|
"scatterpolargl": [
|
|||
|
{
|
|||
|
"marker": {
|
|||
|
"colorbar": {
|
|||
|
"outlinewidth": 1,
|
|||
|
"tickcolor": "rgb(36,36,36)",
|
|||
|
"ticks": "outside"
|
|||
|
}
|
|||
|
},
|
|||
|
"type": "scatterpolargl"
|
|||
|
}
|
|||
|
],
|
|||
|
"scatterternary": [
|
|||
|
{
|
|||
|
"marker": {
|
|||
|
"colorbar": {
|
|||
|
"outlinewidth": 1,
|
|||
|
"tickcolor": "rgb(36,36,36)",
|
|||
|
"ticks": "outside"
|
|||
|
}
|
|||
|
},
|
|||
|
"type": "scatterternary"
|
|||
|
}
|
|||
|
],
|
|||
|
"surface": [
|
|||
|
{
|
|||
|
"colorbar": {
|
|||
|
"outlinewidth": 1,
|
|||
|
"tickcolor": "rgb(36,36,36)",
|
|||
|
"ticks": "outside"
|
|||
|
},
|
|||
|
"colorscale": [
|
|||
|
[
|
|||
|
0,
|
|||
|
"#440154"
|
|||
|
],
|
|||
|
[
|
|||
|
0.1111111111111111,
|
|||
|
"#482878"
|
|||
|
],
|
|||
|
[
|
|||
|
0.2222222222222222,
|
|||
|
"#3e4989"
|
|||
|
],
|
|||
|
[
|
|||
|
0.3333333333333333,
|
|||
|
"#31688e"
|
|||
|
],
|
|||
|
[
|
|||
|
0.4444444444444444,
|
|||
|
"#26828e"
|
|||
|
],
|
|||
|
[
|
|||
|
0.5555555555555556,
|
|||
|
"#1f9e89"
|
|||
|
],
|
|||
|
[
|
|||
|
0.6666666666666666,
|
|||
|
"#35b779"
|
|||
|
],
|
|||
|
[
|
|||
|
0.7777777777777778,
|
|||
|
"#6ece58"
|
|||
|
],
|
|||
|
[
|
|||
|
0.8888888888888888,
|
|||
|
"#b5de2b"
|
|||
|
],
|
|||
|
[
|
|||
|
1,
|
|||
|
"#fde725"
|
|||
|
]
|
|||
|
],
|
|||
|
"type": "surface"
|
|||
|
}
|
|||
|
],
|
|||
|
"table": [
|
|||
|
{
|
|||
|
"cells": {
|
|||
|
"fill": {
|
|||
|
"color": "rgb(237,237,237)"
|
|||
|
},
|
|||
|
"line": {
|
|||
|
"color": "white"
|
|||
|
}
|
|||
|
},
|
|||
|
"header": {
|
|||
|
"fill": {
|
|||
|
"color": "rgb(217,217,217)"
|
|||
|
},
|
|||
|
"line": {
|
|||
|
"color": "white"
|
|||
|
}
|
|||
|
},
|
|||
|
"type": "table"
|
|||
|
}
|
|||
|
]
|
|||
|
},
|
|||
|
"layout": {
|
|||
|
"annotationdefaults": {
|
|||
|
"arrowhead": 0,
|
|||
|
"arrowwidth": 1
|
|||
|
},
|
|||
|
"autotypenumbers": "strict",
|
|||
|
"coloraxis": {
|
|||
|
"colorbar": {
|
|||
|
"outlinewidth": 1,
|
|||
|
"tickcolor": "rgb(36,36,36)",
|
|||
|
"ticks": "outside"
|
|||
|
}
|
|||
|
},
|
|||
|
"colorscale": {
|
|||
|
"diverging": [
|
|||
|
[
|
|||
|
0,
|
|||
|
"rgb(103,0,31)"
|
|||
|
],
|
|||
|
[
|
|||
|
0.1,
|
|||
|
"rgb(178,24,43)"
|
|||
|
],
|
|||
|
[
|
|||
|
0.2,
|
|||
|
"rgb(214,96,77)"
|
|||
|
],
|
|||
|
[
|
|||
|
0.3,
|
|||
|
"rgb(244,165,130)"
|
|||
|
],
|
|||
|
[
|
|||
|
0.4,
|
|||
|
"rgb(253,219,199)"
|
|||
|
],
|
|||
|
[
|
|||
|
0.5,
|
|||
|
"rgb(247,247,247)"
|
|||
|
],
|
|||
|
[
|
|||
|
0.6,
|
|||
|
"rgb(209,229,240)"
|
|||
|
],
|
|||
|
[
|
|||
|
0.7,
|
|||
|
"rgb(146,197,222)"
|
|||
|
],
|
|||
|
[
|
|||
|
0.8,
|
|||
|
"rgb(67,147,195)"
|
|||
|
],
|
|||
|
[
|
|||
|
0.9,
|
|||
|
"rgb(33,102,172)"
|
|||
|
],
|
|||
|
[
|
|||
|
1,
|
|||
|
"rgb(5,48,97)"
|
|||
|
]
|
|||
|
],
|
|||
|
"sequential": [
|
|||
|
[
|
|||
|
0,
|
|||
|
"#440154"
|
|||
|
],
|
|||
|
[
|
|||
|
0.1111111111111111,
|
|||
|
"#482878"
|
|||
|
],
|
|||
|
[
|
|||
|
0.2222222222222222,
|
|||
|
"#3e4989"
|
|||
|
],
|
|||
|
[
|
|||
|
0.3333333333333333,
|
|||
|
"#31688e"
|
|||
|
],
|
|||
|
[
|
|||
|
0.4444444444444444,
|
|||
|
"#26828e"
|
|||
|
],
|
|||
|
[
|
|||
|
0.5555555555555556,
|
|||
|
"#1f9e89"
|
|||
|
],
|
|||
|
[
|
|||
|
0.6666666666666666,
|
|||
|
"#35b779"
|
|||
|
],
|
|||
|
[
|
|||
|
0.7777777777777778,
|
|||
|
"#6ece58"
|
|||
|
],
|
|||
|
[
|
|||
|
0.8888888888888888,
|
|||
|
"#b5de2b"
|
|||
|
],
|
|||
|
[
|
|||
|
1,
|
|||
|
"#fde725"
|
|||
|
]
|
|||
|
],
|
|||
|
"sequentialminus": [
|
|||
|
[
|
|||
|
0,
|
|||
|
"#440154"
|
|||
|
],
|
|||
|
[
|
|||
|
0.1111111111111111,
|
|||
|
"#482878"
|
|||
|
],
|
|||
|
[
|
|||
|
0.2222222222222222,
|
|||
|
"#3e4989"
|
|||
|
],
|
|||
|
[
|
|||
|
0.3333333333333333,
|
|||
|
"#31688e"
|
|||
|
],
|
|||
|
[
|
|||
|
0.4444444444444444,
|
|||
|
"#26828e"
|
|||
|
],
|
|||
|
[
|
|||
|
0.5555555555555556,
|
|||
|
"#1f9e89"
|
|||
|
],
|
|||
|
[
|
|||
|
0.6666666666666666,
|
|||
|
"#35b779"
|
|||
|
],
|
|||
|
[
|
|||
|
0.7777777777777778,
|
|||
|
"#6ece58"
|
|||
|
],
|
|||
|
[
|
|||
|
0.8888888888888888,
|
|||
|
"#b5de2b"
|
|||
|
],
|
|||
|
[
|
|||
|
1,
|
|||
|
"#fde725"
|
|||
|
]
|
|||
|
]
|
|||
|
},
|
|||
|
"colorway": [
|
|||
|
"#1F77B4",
|
|||
|
"#FF7F0E",
|
|||
|
"#2CA02C",
|
|||
|
"#D62728",
|
|||
|
"#9467BD",
|
|||
|
"#8C564B",
|
|||
|
"#E377C2",
|
|||
|
"#7F7F7F",
|
|||
|
"#BCBD22",
|
|||
|
"#17BECF"
|
|||
|
],
|
|||
|
"font": {
|
|||
|
"color": "rgb(36,36,36)"
|
|||
|
},
|
|||
|
"geo": {
|
|||
|
"bgcolor": "white",
|
|||
|
"lakecolor": "white",
|
|||
|
"landcolor": "white",
|
|||
|
"showlakes": true,
|
|||
|
"showland": true,
|
|||
|
"subunitcolor": "white"
|
|||
|
},
|
|||
|
"hoverlabel": {
|
|||
|
"align": "left"
|
|||
|
},
|
|||
|
"hovermode": "closest",
|
|||
|
"mapbox": {
|
|||
|
"style": "light"
|
|||
|
},
|
|||
|
"paper_bgcolor": "white",
|
|||
|
"plot_bgcolor": "white",
|
|||
|
"polar": {
|
|||
|
"angularaxis": {
|
|||
|
"gridcolor": "rgb(232,232,232)",
|
|||
|
"linecolor": "rgb(36,36,36)",
|
|||
|
"showgrid": false,
|
|||
|
"showline": true,
|
|||
|
"ticks": "outside"
|
|||
|
},
|
|||
|
"bgcolor": "white",
|
|||
|
"radialaxis": {
|
|||
|
"gridcolor": "rgb(232,232,232)",
|
|||
|
"linecolor": "rgb(36,36,36)",
|
|||
|
"showgrid": false,
|
|||
|
"showline": true,
|
|||
|
"ticks": "outside"
|
|||
|
}
|
|||
|
},
|
|||
|
"scene": {
|
|||
|
"xaxis": {
|
|||
|
"backgroundcolor": "white",
|
|||
|
"gridcolor": "rgb(232,232,232)",
|
|||
|
"gridwidth": 2,
|
|||
|
"linecolor": "rgb(36,36,36)",
|
|||
|
"showbackground": true,
|
|||
|
"showgrid": false,
|
|||
|
"showline": true,
|
|||
|
"ticks": "outside",
|
|||
|
"zeroline": false,
|
|||
|
"zerolinecolor": "rgb(36,36,36)"
|
|||
|
},
|
|||
|
"yaxis": {
|
|||
|
"backgroundcolor": "white",
|
|||
|
"gridcolor": "rgb(232,232,232)",
|
|||
|
"gridwidth": 2,
|
|||
|
"linecolor": "rgb(36,36,36)",
|
|||
|
"showbackground": true,
|
|||
|
"showgrid": false,
|
|||
|
"showline": true,
|
|||
|
"ticks": "outside",
|
|||
|
"zeroline": false,
|
|||
|
"zerolinecolor": "rgb(36,36,36)"
|
|||
|
},
|
|||
|
"zaxis": {
|
|||
|
"backgroundcolor": "white",
|
|||
|
"gridcolor": "rgb(232,232,232)",
|
|||
|
"gridwidth": 2,
|
|||
|
"linecolor": "rgb(36,36,36)",
|
|||
|
"showbackground": true,
|
|||
|
"showgrid": false,
|
|||
|
"showline": true,
|
|||
|
"ticks": "outside",
|
|||
|
"zeroline": false,
|
|||
|
"zerolinecolor": "rgb(36,36,36)"
|
|||
|
}
|
|||
|
},
|
|||
|
"shapedefaults": {
|
|||
|
"fillcolor": "black",
|
|||
|
"line": {
|
|||
|
"width": 0
|
|||
|
},
|
|||
|
"opacity": 0.3
|
|||
|
},
|
|||
|
"ternary": {
|
|||
|
"aaxis": {
|
|||
|
"gridcolor": "rgb(232,232,232)",
|
|||
|
"linecolor": "rgb(36,36,36)",
|
|||
|
"showgrid": false,
|
|||
|
"showline": true,
|
|||
|
"ticks": "outside"
|
|||
|
},
|
|||
|
"baxis": {
|
|||
|
"gridcolor": "rgb(232,232,232)",
|
|||
|
"linecolor": "rgb(36,36,36)",
|
|||
|
"showgrid": false,
|
|||
|
"showline": true,
|
|||
|
"ticks": "outside"
|
|||
|
},
|
|||
|
"bgcolor": "white",
|
|||
|
"caxis": {
|
|||
|
"gridcolor": "rgb(232,232,232)",
|
|||
|
"linecolor": "rgb(36,36,36)",
|
|||
|
"showgrid": false,
|
|||
|
"showline": true,
|
|||
|
"ticks": "outside"
|
|||
|
}
|
|||
|
},
|
|||
|
"title": {
|
|||
|
"x": 0.05
|
|||
|
},
|
|||
|
"xaxis": {
|
|||
|
"automargin": true,
|
|||
|
"gridcolor": "rgb(232,232,232)",
|
|||
|
"linecolor": "rgb(36,36,36)",
|
|||
|
"showgrid": false,
|
|||
|
"showline": true,
|
|||
|
"ticks": "outside",
|
|||
|
"title": {
|
|||
|
"standoff": 15
|
|||
|
},
|
|||
|
"zeroline": false,
|
|||
|
"zerolinecolor": "rgb(36,36,36)"
|
|||
|
},
|
|||
|
"yaxis": {
|
|||
|
"automargin": true,
|
|||
|
"gridcolor": "rgb(232,232,232)",
|
|||
|
"linecolor": "rgb(36,36,36)",
|
|||
|
"showgrid": false,
|
|||
|
"showline": true,
|
|||
|
"ticks": "outside",
|
|||
|
"title": {
|
|||
|
"standoff": 15
|
|||
|
},
|
|||
|
"zeroline": false,
|
|||
|
"zerolinecolor": "rgb(36,36,36)"
|
|||
|
}
|
|||
|
}
|
|||
|
},
|
|||
|
"title": {
|
|||
|
"font": {
|
|||
|
"color": "Black",
|
|||
|
"size": 22
|
|||
|
},
|
|||
|
"text": "<b>Intertopic Distance Map</b>",
|
|||
|
"x": 0.5,
|
|||
|
"xanchor": "center",
|
|||
|
"y": 0.95,
|
|||
|
"yanchor": "top"
|
|||
|
},
|
|||
|
"width": 650,
|
|||
|
"xaxis": {
|
|||
|
"anchor": "y",
|
|||
|
"domain": [
|
|||
|
0,
|
|||
|
1
|
|||
|
],
|
|||
|
"range": [
|
|||
|
8.03216586112976,
|
|||
|
16.973989295959473
|
|||
|
],
|
|||
|
"title": {
|
|||
|
"text": ""
|
|||
|
},
|
|||
|
"visible": false
|
|||
|
},
|
|||
|
"yaxis": {
|
|||
|
"anchor": "x",
|
|||
|
"domain": [
|
|||
|
0,
|
|||
|
1
|
|||
|
],
|
|||
|
"range": [
|
|||
|
1.4220046877861023,
|
|||
|
6.437612867355346
|
|||
|
],
|
|||
|
"title": {
|
|||
|
"text": ""
|
|||
|
},
|
|||
|
"visible": false
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"topic_model.visualize_topics()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": []
|
|||
|
}
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"kernelspec": {
|
|||
|
"display_name": "Python 3 (ipykernel)",
|
|||
|
"language": "python",
|
|||
|
"name": "python3"
|
|||
|
},
|
|||
|
"language_info": {
|
|||
|
"codemirror_mode": {
|
|||
|
"name": "ipython",
|
|||
|
"version": 3
|
|||
|
},
|
|||
|
"file_extension": ".py",
|
|||
|
"mimetype": "text/x-python",
|
|||
|
"name": "python",
|
|||
|
"nbconvert_exporter": "python",
|
|||
|
"pygments_lexer": "ipython3",
|
|||
|
"version": "3.8.15"
|
|||
|
}
|
|||
|
},
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 2
|
|||
|
}
|