diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..ca458198b1bf18a1dd3c9d07a69cfab051ca6b91 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +data/**/*.npy filter=lfs diff=lfs merge=lfs -text +data/**/*.jsonl filter=lfs diff=lfs merge=lfs -text +data/**/*.json filter=lfs diff=lfs merge=lfs -text +assets/*.png filter=lfs diff=lfs merge=lfs -text +data/**/*.npz filter=lfs diff=lfs merge=lfs -text diff --git a/.huggingface.yaml b/.huggingface.yaml new file mode 100644 index 0000000000000000000000000000000000000000..23f293f7177e70e3af9c761c24fcd755ce30b864 --- /dev/null +++ b/.huggingface.yaml @@ -0,0 +1,3 @@ +# .huggingface.yaml +sdk: streamlit # or gradio +app_file: ./app/ui.py diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..c518600715b03dc515e239eff496934aa82f1e16 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 Suman Adhya + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/app/ui_updated.py b/app/ui_updated.py new file mode 100644 index 0000000000000000000000000000000000000000..d3db765ca7ea28997b3ba41649362e57558148a7 --- /dev/null +++ b/app/ui_updated.py @@ -0,0 +1,450 @@ +import streamlit as st +import plotly.graph_objects as go +import plotly.colors as pc +import sys +import os +import base64 +import streamlit.components.v1 as components +import html + +# Absolute path to the repo root (assuming `ui.py` is in /app) +REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) +sys.path.append(REPO_ROOT) +ASSETS_DIR = os.path.join(REPO_ROOT, 'assets') +DATA_DIR = os.path.join(REPO_ROOT, 'data') + +# Import functions from the backend +from backend.inference.process_beta import ( + load_beta_matrix, + get_top_words_over_time, + load_time_labels + ) +from backend.inference.word_selector import get_interesting_words, get_word_trend +from backend.inference.indexing_utils import load_index +from backend.inference.doc_retriever import ( + load_length_stats, + get_yearly_counts_for_word, + deduplicate_docs, + get_all_documents_for_word_year, + highlight_words, + extract_snippet +) +from backend.llm_utils.summarizer import summarize_multiword_docs, ask_multiturn_followup +from backend.llm_utils.label_generator import get_topic_labels +from backend.llm.llm_router import get_llm, list_supported_models +from backend.llm_utils.token_utils import estimate_k_max_from_word_stats + +def get_base64_image(image_path): + with open(image_path, "rb") as img_file: + return base64.b64encode(img_file.read()).decode() + +# --- Page Configuration --- +st.set_page_config( + page_title="DTECT", + page_icon="🔍", + layout="wide" +) + +# Sidebar branding and repo link +st.sidebar.markdown( + """ +
+ + + +
+
+ """.format(get_base64_image(os.path.join(ASSETS_DIR, 'Logo_light.png'))), + unsafe_allow_html=True +) + +# 1. Sidebar: Model and Dataset Selection +st.sidebar.title("Configuration") + +AVAILABLE_MODELS = ["DTM", "DETM", "CFDTM"] +ENV_VAR_MAP = { + "OpenAI": "OPENAI_API_KEY", + "Anthropic": "ANTHROPIC_API_KEY", + "Gemini": "GEMINI_API_KEY", + "Mistral": "MISTRAL_API_KEY" +} + +def list_datasets(data_dir): + return sorted([ + name for name in os.listdir(data_dir) + if os.path.isdir(os.path.join(data_dir, name)) + ]) + +with st.sidebar.expander("Select Dataset & Topic Model", expanded=True): + datasets = list_datasets(DATA_DIR) + selected_dataset = st.selectbox("Dataset", datasets, help="Choose an available dataset.") + selected_model = st.selectbox("Model", AVAILABLE_MODELS, help="Select topic model architecture.") + +# Resolve paths +dataset_path = os.path.join(DATA_DIR, selected_dataset) +model_path = os.path.join(dataset_path, selected_model) +docs_path = os.path.join(dataset_path, "docs.jsonl") +vocab_path = os.path.join(dataset_path, "processed/vocab.txt") +time2id_path = os.path.join(dataset_path, "processed/time2id.txt") +index_path = os.path.join(dataset_path, "inverted_index.json") +beta_path = os.path.join(model_path, "beta.npy") +label_cache_path = os.path.join(model_path, "topic_label_cache.json") +length_stats_path = os.path.join(dataset_path, "processed/length_stats.json") +lemma_map_path = os.path.join(dataset_path, "processed/lemma_to_forms.json") + +with st.sidebar.expander("LLM Settings", expanded=True): + provider = st.selectbox("LLM Provider", options=list(ENV_VAR_MAP.keys()), help="Choose the LLM backend.") + available_models = list_supported_models(provider) + model = st.selectbox("LLM Model", options=available_models) + env_var = ENV_VAR_MAP[provider] + api_key = os.getenv(env_var) + + if "llm_configured" not in st.session_state: + st.session_state.llm_configured = False + + if api_key: + st.session_state.llm_configured = True + else: + st.session_state.llm_configured = False + with st.form(key="api_key_form"): + entered_key = st.text_input(f"Enter your {provider} API Key", type="password") + submitted = st.form_submit_button("Submit and Confirm") + if submitted: + if entered_key: + os.environ[env_var] = entered_key + api_key = entered_key + st.session_state.llm_configured = True + st.rerun() + else: + st.warning("Please enter a key.") + + if not st.session_state.llm_configured: + st.warning("Please configure your LLM settings in the sidebar.") + st.stop() + + if api_key and not st.session_state.llm_configured: + st.session_state.llm_configured = True + + if not api_key: + st.session_state.llm_configured = False + + if not st.session_state.llm_configured: + st.warning("Please configure your LLM settings in the sidebar.") + st.stop() + +# Initialize LLM with the provided key +llm = get_llm(provider=provider, model=model, api_key=api_key) + +# 3. Load Data +@st.cache_resource +def load_resources(beta_path, vocab_path, docs_path, index_path, time2id_path, length_stats_path, lemma_map_path): + beta, vocab = load_beta_matrix(beta_path, vocab_path) + index, docs, lemma_to_forms = load_index(docs_file_path=docs_path, vocab=vocab, index_path=index_path, lemma_map_path=lemma_map_path) + time_labels = load_time_labels(time2id_path) + length_stats = load_length_stats(length_stats_path) + return beta, vocab, index, docs, lemma_to_forms, time_labels, length_stats + +# --- Main Title and Paper-aligned Intro --- +st.markdown("""# 🔍 DTECT: Dynamic Topic Explorer & Context Tracker""") + +# --- Load resources --- +try: + beta, vocab, index, docs, lemma_to_forms, time_labels, length_stats = load_resources( + beta_path, + vocab_path, + docs_path, + index_path, + time2id_path, + length_stats_path, + lemma_map_path + ) +except FileNotFoundError as e: + st.error(f"Missing required file: {e}") + st.stop() +except Exception as e: + st.error(f"Failed to load data: {str(e)}") + st.stop() + +timestamps = list(range(len(time_labels))) +num_topics = beta.shape[1] +# Estimate max_k based on document length stats and selected LLM +suggested_max_k = estimate_k_max_from_word_stats(length_stats.get("avg_len"), model_name=model, provider=provider) + + +# ============================================================================== +# 1. 🏷 TOPIC LABELING +# ============================================================================== +st.markdown("## 1️⃣ 🏷️ Topic Labeling") +st.info("Topics are automatically labeled using LLMs by analyzing their temporal word distributions.") + +topic_labels = get_topic_labels(beta, vocab, time_labels, llm, label_cache_path) +topic_options = list(topic_labels.values()) +selected_topic_label = st.selectbox("Select a Topic", topic_options, help="LLM-generated topic label") +label_to_topic = {v: k for k, v in topic_labels.items()} +selected_topic = label_to_topic[selected_topic_label] + +# ============================================================================== +# 2. 💡 INFORMATIVE WORD DETECTION & 📊 TREND VISUALIZATION +# ============================================================================== +st.markdown("---") +st.markdown("## 2️⃣ 💡 Informative Word Detection & 📊 Trend Visualization") +st.info("Explore top/interesting words for each topic, and visualize their trends over time.") + +top_n_words = st.slider("Number of Top Words per Topic", min_value=5, max_value=500, value=10) +top_words = get_top_words_over_time( + beta=beta, + vocab=vocab, + topic_id=selected_topic, + top_n=top_n_words +) + +st.write(f"### Top {top_n_words} Words for Topic '{selected_topic_label}' (Ranked):") +scrollable_top_words = "
" +words_per_col = (top_n_words + 3) // 4 +columns = [top_words[i:i+words_per_col] for i in range(0, len(top_words), words_per_col)] +scrollable_top_words += "
" +word_rank = 1 +for col in columns: + scrollable_top_words += "
" + for word in col: + scrollable_top_words += f"
{word_rank}. {word}
" + word_rank += 1 + scrollable_top_words += "
" +scrollable_top_words += "
" +st.markdown(scrollable_top_words, unsafe_allow_html=True) + +st.markdown("
", unsafe_allow_html=True) + +if st.button("💡 Suggest Informative Words", key="suggest_topic_words"): + top_words = get_top_words_over_time( + beta=beta, + vocab=vocab, + topic_id=selected_topic, + top_n=top_n_words + ) + interesting_words = get_interesting_words(beta, vocab, topic_id=selected_topic, restrict_to=top_words) + st.session_state.interesting_words = interesting_words + st.session_state.selected_words = interesting_words[:15] # pre-fill multiselect + styled_words = " ".join([ + f"{w}" + for w in interesting_words + ]) + st.markdown( + f"**Top Informative Words from Topic '{selected_topic_label}':**
{styled_words}", + unsafe_allow_html=True + ) + +st.markdown("#### 📈 Plot Word Trends Over Time") +all_word_options = vocab +interesting_words = st.session_state.get("interesting_words", []) + +if "selected_words" not in st.session_state: + st.session_state.selected_words = interesting_words[:15] # initial default + +selected_words = st.multiselect( + "Select words to visualize trends", + options=all_word_options, + default=st.session_state.selected_words, + key="selected_words" +) +if selected_words: + fig = go.Figure() + color_cycle = pc.qualitative.Plotly + for i, word in enumerate(selected_words): + trend = get_word_trend(beta, vocab, word, topic_id=selected_topic) + color = color_cycle[i % len(color_cycle)] + fig.add_trace(go.Scatter( + x=time_labels, + y=trend, + name=word, + line=dict(color=color), + legendgroup=word, + showlegend=True + )) + fig.update_layout(title="", xaxis_title="Year", yaxis_title="Importance") + st.plotly_chart(fig, use_container_width=True) + +# ============================================================================== +# 3. 🔍 DOCUMENT RETRIEVAL & 📃 SUMMARIZATION +# ============================================================================== +st.markdown("---") +st.markdown("## 3️⃣ 🔍 Document Retrieval & 📃 Summarization") +st.info("Retrieve and summarize documents matching selected words and years.") + +if selected_words: + st.markdown("#### 📊 Document Frequency Over Time") + selected_words_for_counts = st.multiselect( + "Select word(s) to show document frequencies over time", + options=selected_words, + default=selected_words[:3], + key="word_counts_multiselect" + ) + + if selected_words_for_counts: + color_cycle = pc.qualitative.Set2 + bar_fig = go.Figure() + for i, word in enumerate(selected_words_for_counts): + doc_years, doc_counts = get_yearly_counts_for_word(index=index, word=word) + bar_fig.add_trace(go.Bar( + x=doc_years, + y=doc_counts, + name=word, + marker_color=color_cycle[i % len(color_cycle)], + opacity=0.85 + )) + bar_fig.update_layout( + barmode="group", + title="Document Frequency Over Time", + xaxis_title="Year", + yaxis_title="Document Count", + xaxis=dict( + tickmode='linear', + dtick=1, + tickformat='d' + ), + bargap=0.2 + ) + st.plotly_chart(bar_fig, use_container_width=True) + + st.markdown("#### 📄 Inspect Documents for Word-Year Pairs") + # selected_year = st.slider("Select year", min_value=int(time_labels[0]), max_value=int(time_labels[-1]), key="inspect_year_slider") + selected_year = st.selectbox( + "Select year", + options=time_labels, # Use the list of available time labels (years) + index=0, # Default to the first year in the list + key="inspect_year_selectbox" + ) + collected_docs_raw = [] + for word in selected_words_for_counts: + docs_for_word_year = get_all_documents_for_word_year( + index=index, + docs_file_path=docs_path, + word=word, + year=selected_year + ) + for doc in docs_for_word_year: + doc["__word__"] = word + collected_docs_raw.extend(docs_for_word_year) + + if collected_docs_raw: + st.session_state.collected_deduplicated_docs = deduplicate_docs(collected_docs_raw) + st.write(f"Found {len(collected_docs_raw)} matching documents, {len(st.session_state.collected_deduplicated_docs)} after deduplication.") + + html_blocks = "" + for doc in st.session_state.collected_deduplicated_docs: + word = doc["__word__"] + full_text = html.escape(doc["text"]) + snippet_text = extract_snippet(doc["text"], word) + highlighted_snippet = highlight_words( + snippet_text, + query_words=selected_words_for_counts, + lemma_to_forms=lemma_to_forms + ) + html_blocks += f""" +
+
Match: {word} | Doc ID: {doc['id']} | Timestamp: {doc['timestamp']}
+
Snippet: {highlighted_snippet}
+
+ Show full document +
{full_text}
+
+
+ """ + min_height = 120 + max_height = 700 + per_doc_height = 130 + dynamic_height = min_height + per_doc_height * max(len(st.session_state.collected_deduplicated_docs) - 1, 0) + container_height = min(dynamic_height, max_height) + scrollable_html = f""" +
+ {html_blocks} +
+ """ + components.html(scrollable_html, height=container_height, scrolling=True) + else: + st.warning("No documents found for the selected words and year.") + +# ============================================================================== +# 4. 💬 CHAT ASSISTANT (Summary & Follow-up) +# ============================================================================== +st.markdown("---") +st.markdown("## 4️⃣ 💬 Chat Assistant") +st.info("Generate summaries from the inspected documents and ask follow-up questions.") + +if "summary" not in st.session_state: + st.session_state.summary = None +if "context_for_followup" not in st.session_state: + st.session_state.context_for_followup = "" +if "followup_history" not in st.session_state: + st.session_state.followup_history = [] + +# MMR K selection +st.markdown(f"**Max documents for summarization (k):**") +st.markdown(f"The suggested maximum number of documents for summarization (k) based on the average document length and the selected LLM is **{suggested_max_k}**.") +mmr_k = st.slider( + "Select the maximum number of documents (k) for MMR (Maximum Marginal Relevance) selection for summarization.", + min_value=1, + max_value=20, # Set a reasonable max for k, can be adjusted + value=min(suggested_max_k, 20), # Use suggested_max_k as default, capped at 20 + help="This value determines how many relevant and diverse documents will be selected for summarization." +) + +if st.button("📃 Summarize These Documents"): + if st.session_state.get("collected_deduplicated_docs"): + st.session_state.summary = None + st.session_state.context_for_followup = "" + st.session_state.followup_history = [] + with st.spinner("Selecting and summarizing documents..."): + summary, mmr_docs = summarize_multiword_docs( + selected_words_for_counts, + selected_year, + st.session_state.collected_deduplicated_docs, + llm, + k=mmr_k + ) + st.session_state.summary = summary + st.session_state.context_for_followup = "\n".join( + f"Document {i+1}:\n{doc.page_content.strip()}" for i, doc in enumerate(mmr_docs) + ) + st.session_state.followup_history.append( + {"role": "user", "content": f"Please summarize the context of the words '{', '.join(selected_words_for_counts)}' in {selected_year} based on the provided documents."} + ) + st.session_state.followup_history.append( + {"role": "assistant", "content": st.session_state.summary} + ) + st.success(f"✅ Summary generated from {len(mmr_docs)} MMR-selected documents.") + else: + st.warning("⚠️ No documents collected to summarize. Please inspect some documents first.") + +if st.session_state.summary: + st.markdown(f"**Summary for words `{', '.join(selected_words_for_counts)}` in `{selected_year}`:**") + st.write(st.session_state.summary) + + if st.checkbox("💬 Ask follow-up questions about this summary", key="enable_followup"): + with st.expander("View the documents used for this conversation"): + st.text_area("Context Documents", st.session_state.context_for_followup, height=200) + st.info("Ask a question based on the summary and the documents above.") + for msg in st.session_state.followup_history[2:]: + with st.chat_message(msg["role"], avatar="🧑" if msg["role"] == "user" else "🤖"): + st.markdown(msg["content"]) + if user_query := st.chat_input("Ask a follow-up question..."): + with st.chat_message("user", avatar="🧑"): + st.markdown(user_query) + st.session_state.followup_history.append({"role": "user", "content": user_query}) + with st.spinner("Thinking..."): + followup_response = ask_multiturn_followup( + history=st.session_state.followup_history, + question=user_query, + llm=llm, + context_texts=st.session_state.context_for_followup + ) + st.session_state.followup_history.append({"role": "assistant", "content": followup_response}) + if followup_response.startswith("[Error"): + st.error(followup_response) + else: + with st.chat_message("assistant", avatar="🤖"): + st.markdown(followup_response) + st.rerun() \ No newline at end of file diff --git a/assets/Logo_light.png b/assets/Logo_light.png new file mode 100644 index 0000000000000000000000000000000000000000..607d48a9948f9798871e324b3b6e34fef8fec1ce --- /dev/null +++ b/assets/Logo_light.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4237eeb306339868507feb9ae60b6c5ab5980abe769d8b26d3635eed55e9714f +size 317450 diff --git a/backend/__init__.py b/backend/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6b5517720eef30b7c7e6af35a6bd163682c9302c --- /dev/null +++ b/backend/__init__.py @@ -0,0 +1,81 @@ +# === Inference components === +from .inference.process_beta import ( + load_beta_matrix, + get_top_words_at_time, + get_top_words_over_time, + load_time_labels + ) + +from .inference.indexing_utils import load_index +from .inference.word_selector import ( + get_interesting_words, + get_word_trend +) +from .inference.peak_detector import detect_peaks +from .inference.doc_retriever import ( + load_length_stats, + get_yearly_counts_for_word, + get_all_documents_for_word_year, + deduplicate_docs, + extract_snippet, + highlight, + get_docs_by_ids, +) + +# === LLM components === +from .llm_utils.label_generator import label_topic_temporal, get_topic_labels +from .llm_utils.token_utils import ( + get_token_limit_for_model, + count_tokens, + estimate_avg_tokens_per_doc, + estimate_max_k, + estimate_max_k_fast + ) +from .llm_utils.summarizer import ( + summarize_docs, + summarize_multiword_docs, + ask_multiturn_followup +) +from .llm.llm_router import ( + list_supported_models, + get_llm +) + +# === Dataset utilities === +from .datasets import dynamic_dataset +from .datasets import preprocess +from .datasets.utils import logger, _utils +from .datasets.data import file_utils, download + +# === Evaluation === +from .evaluation.CoherenceModel_ttc import CoherenceModel_ttc +from .evaluation.eval import TopicQualityAssessor + +# === Models === +from .models.DETM import DETM +from .models.DTM_trainer import DTMTrainer +from .models.CFDTM.CFDTM import CFDTM +from .models.dynamic_trainer import DynamicTrainer + +__all__ = [ + # Inference + "load_beta_matrix", "load_time_labels", "get_top_words_at_time", "get_top_words_over_time", + "load_index", "get_interesting_words", "get_word_trend", "detect_peaks", + "load_length_stats", "get_yearly_counts_for_word", "get_all_documents_for_word_year", + "deduplicate_docs", "extract_snippet", "highlight", "get_docs_by_ids", + + # LLM + "summarize_docs", "summarize_multiword_docs", "ask_multiturn_followup", + "get_token_limit_for_model", "list_supported_models", "get_llm", + "label_topic_temporal", "get_topic_labels", "count_tokens", + "estimate_avg_tokens_per_doc", "estimate_max_k", "estimate_max_k_fast", + + # Dataset + "dynamic_dataset", "preprocess", "logger","_utils", "file_utils", "download", + + # Evaluation + "CoherenceModel_ttc", "TopicQualityAssessor", + + # Models + "DETM", "DTMTrainer", "CFDTM", "DynamicTrainer" +] diff --git a/backend/datasets/_preprocess.py b/backend/datasets/_preprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..e77e9e56f25986b63fb2bbaf2dba56f1f7264450 --- /dev/null +++ b/backend/datasets/_preprocess.py @@ -0,0 +1,447 @@ +import os +import re +import string +import gensim.downloader +from collections import Counter +import numpy as np +import scipy.sparse +from tqdm import tqdm +from sklearn.feature_extraction.text import CountVectorizer + +from backend.datasets.data import file_utils +from backend.datasets.utils._utils import get_stopwords_set +from backend.datasets.utils.logger import Logger +import json +import nltk +from nltk.stem import WordNetLemmatizer + +logger = Logger("WARNING") + +try: + nltk.data.find('corpora/wordnet') +except LookupError: + nltk.download('wordnet', quiet=True) +try: + nltk.data.find('corpora/omw-1.4') +except LookupError: + nltk.download('omw-1.4', quiet=True) + +# compile some regexes +punct_chars = list(set(string.punctuation) - set("'")) +punct_chars.sort() +punctuation = ''.join(punct_chars) +replace = re.compile('[%s]' % re.escape(punctuation)) +alpha = re.compile('^[a-zA-Z_]+$') +alpha_or_num = re.compile('^[a-zA-Z_]+|[0-9_]+$') +alphanum = re.compile('^[a-zA-Z0-9_]+$') + + +class Tokenizer: + def __init__(self, + stopwords="English", + keep_num=False, + keep_alphanum=False, + strip_html=False, + no_lower=False, + min_length=3, + lemmatize=True, + ): + self.keep_num = keep_num + self.keep_alphanum = keep_alphanum + self.strip_html = strip_html + self.lower = not no_lower + self.min_length = min_length + + self.stopword_set = get_stopwords_set(stopwords) + + self.lemmatize = lemmatize + if lemmatize: + self.lemmatizer = WordNetLemmatizer() + + def clean_text(self, text, strip_html=False, lower=True, keep_emails=False, keep_at_mentions=False): + # remove html tags + if strip_html: + text = re.sub(r'<[^>]+>', '', text) + else: + # replace angle brackets + text = re.sub(r'<', '(', text) + text = re.sub(r'>', ')', text) + # lower case + if lower: + text = text.lower() + # eliminate email addresses + if not keep_emails: + text = re.sub(r'\S+@\S+', ' ', text) + # eliminate @mentions + if not keep_at_mentions: + text = re.sub(r'\s@\S+', ' ', text) + # replace underscores with spaces + text = re.sub(r'_', ' ', text) + # break off single quotes at the ends of words + text = re.sub(r'\s\'', ' ', text) + text = re.sub(r'\'\s', ' ', text) + # remove periods + text = re.sub(r'\.', '', text) + # replace all other punctuation (except single quotes) with spaces + text = replace.sub(' ', text) + # remove single quotes + text = re.sub(r'\'', '', text) + # replace all whitespace with a single space + text = re.sub(r'\s', ' ', text) + # strip off spaces on either end + text = text.strip() + return text + + def tokenize(self, text): + text = self.clean_text(text, self.strip_html, self.lower) + tokens = text.split() + + tokens = ['_' if t in self.stopword_set else t for t in tokens] + + # remove tokens that contain numbers + if not self.keep_alphanum and not self.keep_num: + tokens = [t if alpha.match(t) else '_' for t in tokens] + + # or just remove tokens that contain a combination of letters and numbers + elif not self.keep_alphanum: + tokens = [t if alpha_or_num.match(t) else '_' for t in tokens] + + # drop short tokens + if self.min_length > 0: + tokens = [t if len(t) >= self.min_length else '_' for t in tokens] + + if getattr(self, "lemmatize", False): + tokens = [self.lemmatizer.lemmatize(t) if t != '_' else t for t in tokens] + + unigrams = [t for t in tokens if t != '_'] + return unigrams + + +def make_word_embeddings(vocab): + glove_vectors = gensim.downloader.load('glove-wiki-gigaword-200') + word_embeddings = np.zeros((len(vocab), glove_vectors.vectors.shape[1])) + + num_found = 0 + + try: + key_word_list = glove_vectors.index_to_key + except: + key_word_list = glove_vectors.index2word + + for i, word in enumerate(tqdm(vocab, desc="loading word embeddings")): + if word in key_word_list: + word_embeddings[i] = glove_vectors[word] + num_found += 1 + + logger.info(f'number of found embeddings: {num_found}/{len(vocab)}') + + return scipy.sparse.csr_matrix(word_embeddings) + + +class Preprocess: + def __init__(self, + tokenizer=None, + test_sample_size=None, + test_p=0.2, + stopwords="English", + min_doc_count=0, + max_doc_freq=1.0, + keep_num=False, + keep_alphanum=False, + strip_html=False, + no_lower=False, + min_length=3, + min_term=0, + vocab_size=None, + seed=42, + verbose=True, + lemmatize=True, + ): + """ + Args: + test_sample_size: + Size of the test set. + test_p: + Proportion of the test set. This helps sample the train set based on the size of the test set. + stopwords: + List of stopwords to exclude. + min-doc-count: + Exclude words that occur in less than this number of documents. + max_doc_freq: + Exclude words that occur in more than this proportion of documents. + keep-num: + Keep tokens made of only numbers. + keep-alphanum: + Keep tokens made of a mixture of letters and numbers. + strip_html: + Strip HTML tags. + no-lower: + Do not lowercase text + min_length: + Minimum token length. + min_term: + Minimum term number + vocab-size: + Size of the vocabulary (by most common in the union of train and test sets, following above exclusions) + seed: + Random integer seed (only relevant for choosing test set) + lemmatize: + Whether to apply lemmatization to the tokens. + """ + + self.test_sample_size = test_sample_size + self.min_doc_count = min_doc_count + self.max_doc_freq = max_doc_freq + self.min_term = min_term + self.test_p = test_p + self.vocab_size = vocab_size + self.seed = seed + + if tokenizer is not None: + self.tokenizer = tokenizer + else: + self.tokenizer = Tokenizer( + stopwords, + keep_num, + keep_alphanum, + strip_html, + no_lower, + min_length, + lemmatize=lemmatize + ).tokenize + + if verbose: + logger.set_level("DEBUG") + else: + logger.set_level("WARNING") + + def parse(self, texts, vocab): + if not isinstance(texts, list): + texts = [texts] + + vocab_set = set(vocab) + parsed_texts = list() + for i, text in enumerate(tqdm(texts, desc="parsing texts")): + tokens = self.tokenizer(text) + tokens = [t for t in tokens if t in vocab_set] + parsed_texts.append(" ".join(tokens)) + + vectorizer = CountVectorizer(vocabulary=vocab, tokenizer=lambda x: x.split()) + sparse_bow = vectorizer.fit_transform(parsed_texts) + return parsed_texts, sparse_bow + + def preprocess_jsonlist(self, dataset_dir, label_name=None, use_partition=True): + if use_partition: + train_items = file_utils.read_jsonlist(os.path.join(dataset_dir, 'train.jsonlist')) + test_items = file_utils.read_jsonlist(os.path.join(dataset_dir, 'test.jsonlist')) + else: + raw_path = os.path.join(dataset_dir, 'docs.jsonl') + with open(raw_path, 'r', encoding='utf-8') as f: + train_items = [json.loads(line.strip()) for line in f if line.strip()] + test_items = [] + + logger.info(f"Found training documents {len(train_items)} testing documents {len(test_items)}") + + # Initialize containers + raw_train_texts, train_labels, raw_train_times = [], [], [] + raw_test_texts, test_labels, raw_test_times = [], [], [] + + # Process train items + for item in train_items: + raw_train_texts.append(item['text']) + raw_train_times.append(str(item['timestamp'])) + if label_name and label_name in item: + train_labels.append(item[label_name]) + + # Process test items + for item in test_items: + raw_test_texts.append(item['text']) + raw_test_times.append(str(item['timestamp'])) + if label_name and label_name in item: + test_labels.append(item[label_name]) + + # Create and apply time2id mapping + all_times = sorted(set(raw_train_times + raw_test_times)) + time2id = {t: i for i, t in enumerate(all_times)} + + train_times = np.array([time2id[t] for t in raw_train_times], dtype=np.int32) + test_times = np.array([time2id[t] for t in raw_test_times], dtype=np.int32) if raw_test_times else None + + # Preprocess and get sample indices + rst = self.preprocess(raw_train_texts, train_labels, raw_test_texts, test_labels) + train_idx = rst.get("train_idx") + test_idx = rst.get("test_idx") + + # Add filtered timestamps to result for saving later + rst["train_times"] = train_times[train_idx] + if test_times is not None and test_idx is not None: + rst["test_times"] = test_times[test_idx] + + # Add time2id to result dict + rst["time2id"] = time2id + + return rst + + + def convert_labels(self, train_labels, test_labels): + if train_labels: + label_list = list(set(train_labels).union(set(test_labels))) + label_list.sort() + n_labels = len(label_list) + label2id = dict(zip(label_list, range(n_labels))) + + logger.info(f"label2id: {label2id}") + + train_labels = [label2id[label] for label in train_labels] + + if test_labels: + test_labels = [label2id[label] for label in test_labels] + + return train_labels, test_labels + + def preprocess( + self, + raw_train_texts, + train_labels=None, + raw_test_texts=None, + test_labels=None, + pretrained_WE=True + ): + np.random.seed(self.seed) + + train_texts = list() + test_texts = list() + word_counts = Counter() + doc_counts_counter = Counter() + + train_labels, test_labels = self.convert_labels(train_labels, test_labels) + + for text in tqdm(raw_train_texts, desc="loading train texts"): + tokens = self.tokenizer(text) + word_counts.update(tokens) + doc_counts_counter.update(set(tokens)) + parsed_text = ' '.join(tokens) + train_texts.append(parsed_text) + + if raw_test_texts: + for text in tqdm(raw_test_texts, desc="loading test texts"): + tokens = self.tokenizer(text) + word_counts.update(tokens) + doc_counts_counter.update(set(tokens)) + parsed_text = ' '.join(tokens) + test_texts.append(parsed_text) + + words, doc_counts = zip(*doc_counts_counter.most_common()) + doc_freqs = np.array(doc_counts) / float(len(train_texts) + len(test_texts)) + + vocab = [word for i, word in enumerate(words) if doc_counts[i] >= self.min_doc_count and doc_freqs[i] <= self.max_doc_freq] + + # filter vocabulary + if self.vocab_size is not None: + vocab = vocab[:self.vocab_size] + + vocab.sort() + + train_idx = [i for i, text in enumerate(train_texts) if len(text.split()) >= self.min_term] + train_idx = np.asarray(train_idx) + + if raw_test_texts is not None: + test_idx = [i for i, text in enumerate(test_texts) if len(text.split()) >= self.min_term] + test_idx = np.asarray(test_idx) + else: + test_idx = None + + # randomly sample + if self.test_sample_size and raw_test_texts is not None: + logger.info("sample train and test sets...") + + train_num = len(train_idx) + test_num = len(test_idx) + test_sample_size = min(test_num, self.test_sample_size) + train_sample_size = int((test_sample_size / self.test_p) * (1 - self.test_p)) + if train_sample_size > train_num: + test_sample_size = int((train_num / (1 - self.test_p)) * self.test_p) + train_sample_size = train_num + + train_idx = train_idx[np.sort(np.random.choice(train_num, train_sample_size, replace=False))] + test_idx = test_idx[np.sort(np.random.choice(test_num, test_sample_size, replace=False))] + + logger.info(f"sampled train size: {len(train_idx)}") + logger.info(f"sampled test size: {len(test_idx)}") + + train_texts, train_bow = self.parse([train_texts[i] for i in train_idx], vocab) + + rst = { + 'vocab': vocab, + 'train_bow': train_bow, + "train_texts": train_texts, + "train_idx": train_idx, # <--- NEW: indices of kept train samples + } + + if train_labels: + rst['train_labels'] = np.asarray(train_labels)[train_idx] + + logger.info(f"Real vocab size: {len(vocab)}") + logger.info(f"Real training size: {len(train_texts)} \t avg length: {rst['train_bow'].sum() / len(train_texts):.3f}") + + if raw_test_texts: + rst['test_texts'], rst['test_bow'] = self.parse(np.asarray(test_texts)[test_idx].tolist(), vocab) + rst["test_idx"] = test_idx # <--- NEW: indices of kept test samples + + if test_labels: + rst['test_labels'] = np.asarray(test_labels)[test_idx] + + logger.info(f"Real testing size: {len(rst['test_texts'])} \t avg length: {rst['test_bow'].sum() / len(rst['test_texts']):.3f}") + + if pretrained_WE: + rst['word_embeddings'] = make_word_embeddings(vocab) + + return rst + + def save( + self, + output_dir, + vocab, + train_texts, + train_bow, + word_embeddings=None, + train_labels=None, + test_texts=None, + test_bow=None, + test_labels=None, + train_times=None, + test_times=None, + time2id=None # <-- new parameter + ): + file_utils.make_dir(output_dir) + + file_utils.save_text(vocab, f"{output_dir}/vocab.txt") + file_utils.save_text(train_texts, f"{output_dir}/train_texts.txt") + scipy.sparse.save_npz(f"{output_dir}/train_bow.npz", scipy.sparse.csr_matrix(train_bow)) + + if word_embeddings is not None: + scipy.sparse.save_npz(f"{output_dir}/word_embeddings.npz", word_embeddings) + + if train_labels: + np.savetxt(f"{output_dir}/train_labels.txt", train_labels, fmt='%i') + + if train_times is not None: + np.savetxt(f"{output_dir}/train_times.txt", train_times, fmt='%i') + + if test_bow is not None: + scipy.sparse.save_npz(f"{output_dir}/test_bow.npz", scipy.sparse.csr_matrix(test_bow)) + + if test_texts is not None: + file_utils.save_text(test_texts, f"{output_dir}/test_texts.txt") + + if test_labels: + np.savetxt(f"{output_dir}/test_labels.txt", test_labels, fmt='%i') + + if test_times is not None: + np.savetxt(f"{output_dir}/test_times.txt", test_times, fmt='%i') + + # Save time2id mapping if provided + if time2id is not None: + with open(f"{output_dir}/time2id.txt", "w", encoding="utf-8") as f: + json.dump(time2id, f, indent=2) + diff --git a/backend/datasets/data/download.py b/backend/datasets/data/download.py new file mode 100644 index 0000000000000000000000000000000000000000..e6fc2a357811eb3d79cc0eb82a76f190d93c2374 --- /dev/null +++ b/backend/datasets/data/download.py @@ -0,0 +1,32 @@ +import os +import zipfile +from torchvision.datasets.utils import download_url +from backend.datasets.utils.logger import Logger + + +logger = Logger("WARNING") + + +def download_dataset(dataset_name, cache_path="~/.topmost"): + cache_path = os.path.expanduser(cache_path) + raw_filename = f'{dataset_name}.zip' + + if dataset_name in ['Wikitext-103']: + # download from Git LFS. + zipped_dataset_url = f"https://media.githubusercontent.com/media/BobXWu/TopMost/main/data/{raw_filename}" + else: + zipped_dataset_url = f"https://raw.githubusercontent.com/BobXWu/TopMost/master/data/{raw_filename}" + + logger.info(zipped_dataset_url) + + download_url(zipped_dataset_url, root=cache_path, filename=raw_filename, md5=None) + + path = f'{cache_path}/{raw_filename}' + with zipfile.ZipFile(path, 'r') as zip_ref: + zip_ref.extractall(cache_path) + + os.remove(path) + + +if __name__ == '__main__': + download_dataset('20NG') diff --git a/backend/datasets/data/file_utils.py b/backend/datasets/data/file_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..0defcaf4e54b965b6b8a3ff361bac5722330f5e3 --- /dev/null +++ b/backend/datasets/data/file_utils.py @@ -0,0 +1,39 @@ +import os +import json + + +def make_dir(path): + os.makedirs(path, exist_ok=True) + + +def read_text(path): + texts = list() + with open(path, 'r', encoding='utf-8', errors='ignore') as file: + for line in file: + texts.append(line.strip()) + return texts + + +def save_text(texts, path): + with open(path, 'w', encoding='utf-8') as file: + for text in texts: + file.write(text.strip() + '\n') + + +def read_jsonlist(path): + data = list() + with open(path, 'r', encoding='utf-8') as input_file: + for line in input_file: + data.append(json.loads(line)) + return data + + +def save_jsonlist(list_of_json_objects, path, sort_keys=True): + with open(path, 'w', encoding='utf-8') as output_file: + for obj in list_of_json_objects: + output_file.write(json.dumps(obj, sort_keys=sort_keys) + '\n') + + +def split_text_word(texts): + texts = [text.split() for text in texts] + return texts diff --git a/backend/datasets/dynamic_dataset.py b/backend/datasets/dynamic_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..5462e56ca43e357f955a1bf2d1b9bb76a5e01ed1 --- /dev/null +++ b/backend/datasets/dynamic_dataset.py @@ -0,0 +1,90 @@ +import torch +from torch.utils.data import Dataset, DataLoader +import numpy as np +import scipy.sparse +import scipy.io +from backend.datasets.data import file_utils + + +class _SequentialDataset(Dataset): + def __init__(self, bow, times, time_wordfreq): + super().__init__() + self.bow = bow + self.times = times + self.time_wordfreq = time_wordfreq + + def __len__(self): + return len(self.bow) + + def __getitem__(self, index): + return_dict = { + 'bow': self.bow[index], + 'times': self.times[index], + 'time_wordfreq': self.time_wordfreq[self.times[index]], + } + + return return_dict + + +class DynamicDataset: + def __init__(self, dataset_dir, batch_size=200, read_labels=False, use_partition=False, device='cuda', as_tensor=True): + + self.load_data(dataset_dir, read_labels, use_partition) + + self.vocab_size = len(self.vocab) + self.train_size = len(self.train_bow) + self.num_times = int(self.train_times.max()) + 1 # assuming train_times is a numpy array + self.train_time_wordfreq = self.get_time_wordfreq(self.train_bow, self.train_times) + + print('train size: ', len(self.train_bow)) + if use_partition: + print('test size: ', len(self.test_bow)) + print('vocab size: ', len(self.vocab)) + print('average length: {:.3f}'.format(self.train_bow.sum(1).mean().item())) + print('num of each time slice: ', self.num_times, np.bincount(self.train_times)) + + if as_tensor: + self.train_bow = torch.from_numpy(self.train_bow).float().to(device) + self.train_times = torch.from_numpy(self.train_times).long().to(device) + self.train_time_wordfreq = torch.from_numpy(self.train_time_wordfreq).float().to(device) + + if use_partition: + self.test_bow = torch.from_numpy(self.test_bow).float().to(device) + self.test_times = torch.from_numpy(self.test_times).long().to(device) + + self.train_dataset = _SequentialDataset(self.train_bow, self.train_times, self.train_time_wordfreq) + + if use_partition: + self.test_dataset = _SequentialDataset(self.test_bow, self.test_times, self.train_time_wordfreq) + + self.train_dataloader = DataLoader(self.train_dataset, batch_size=batch_size, shuffle=True) + + def load_data(self, path, read_labels, use_partition=False): + self.train_bow = scipy.sparse.load_npz(f'{path}/train_bow.npz').toarray().astype('float32') + self.train_texts = file_utils.read_text(f'{path}/train_texts.txt') + self.train_times = np.loadtxt(f'{path}/train_times.txt').astype('int32') + self.vocab = file_utils.read_text(f'{path}/vocab.txt') + self.word_embeddings = scipy.sparse.load_npz(f'{path}/word_embeddings.npz').toarray().astype('float32') + + self.pretrained_WE = self.word_embeddings # preserve compatibility + + if read_labels: + self.train_labels = np.loadtxt(f'{path}/train_labels.txt').astype('int32') + + if use_partition: + self.test_bow = scipy.sparse.load_npz(f'{path}/test_bow.npz').toarray().astype('float32') + self.test_texts = file_utils.read_text(f'{path}/test_texts.txt') + self.test_times = np.loadtxt(f'{path}/test_times.txt').astype('int32') + if read_labels: + self.test_labels = np.loadtxt(f'{path}/test_labels.txt').astype('int32') + + # word frequency at each time slice. + def get_time_wordfreq(self, bow, times): + train_time_wordfreq = np.zeros((self.num_times, self.vocab_size)) + for time in range(self.num_times): + idx = np.where(times == time)[0] + train_time_wordfreq[time] += bow[idx].sum(0) + cnt_times = np.bincount(times) + + train_time_wordfreq = train_time_wordfreq / cnt_times[:, np.newaxis] + return train_time_wordfreq diff --git a/backend/datasets/preprocess.py b/backend/datasets/preprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..8518ec3ca64bf46b3586e291b50f8b0d97c93fc0 --- /dev/null +++ b/backend/datasets/preprocess.py @@ -0,0 +1,362 @@ +import json +import os +import numpy as np +from collections import OrderedDict +import tempfile +import gensim.downloader +from tqdm import tqdm +from backend.datasets.utils.logger import Logger +import scipy.sparse +from gensim.models.phrases import Phrases, Phraser +from typing import List, Union +from octis.preprocessing.preprocessing import Preprocessing + +logger = Logger("WARNING") + +class Preprocessor: + def __init__(self, + docs_jsonl_path: str, + output_folder: str, + use_partition: bool = False, + use_bigrams: bool = False, + min_count_bigram: int = 5, + threshold_bigram: int = 10, + remove_punctuation: bool = True, + lemmatize: bool = True, + stopword_list: Union[str, List[str]] = None, + min_chars: int = 3, + min_words_docs: int = 10, + min_df: Union[int, float] = 0.0, + max_df: Union[int, float] = 1.0, + max_features: int = None, + language: str = 'english'): + + self.docs_jsonl_path = docs_jsonl_path + self.output_folder = output_folder + self.use_partition = use_partition + self.use_bigrams = use_bigrams + self.min_count_bigram = min_count_bigram + self.threshold_bigram = threshold_bigram + + os.makedirs(self.output_folder, exist_ok=True) + + self.preprocessing_params = { + 'remove_punctuation': remove_punctuation, + 'lemmatize': lemmatize, + 'stopword_list': stopword_list, + 'min_chars': min_chars, + 'min_words_docs': min_words_docs, + 'min_df': min_df, + 'max_df': max_df, + 'max_features': max_features, + 'language': language + } + self.preprocessor_octis = Preprocessing(**self.preprocessing_params) + + def _load_data_to_temp_files(self): + """Loads data from JSONL and writes to temporary files for OCTIS preprocessor.""" + raw_texts = [] + raw_timestamps = [] + raw_labels = [] + has_labels = False + + with open(self.docs_jsonl_path, 'r', encoding='utf-8') as f: + for line in f: + data = json.loads(line.strip()) + # Remove newlines from text + clean_text = data.get('text', '').replace('\n', ' ').replace('\r', ' ') + clean_text = " ".join(clean_text.split()) + raw_texts.append(clean_text) + raw_timestamps.append(data.get('timestamp', '')) + label = data.get('label', '') + if label: + has_labels = True + raw_labels.append(label) + + # Create temporary files + temp_dir = tempfile.mkdtemp() + temp_docs_path = os.path.join(temp_dir, "temp_docs.txt") + temp_labels_path = None + + with open(temp_docs_path, 'w', encoding='utf-8') as f_docs: + for text in raw_texts: + f_docs.write(f"{text}\n") + + if has_labels: + temp_labels_path = os.path.join(temp_dir, "temp_labels.txt") + with open(temp_labels_path, 'w', encoding='utf-8') as f_labels: + for label in raw_labels: + f_labels.write(f"{label}\n") + + print(f"Loaded {len(raw_texts)} raw documents and created temporary files in {temp_dir}.") + return raw_texts, raw_timestamps, raw_labels, temp_docs_path, temp_labels_path, temp_dir + + def _make_word_embeddings(self, vocab): + """ + Generates word embeddings for the given vocabulary using GloVe. + For n-grams (e.g., "wordA_wordB", "wordX_wordY_wordZ" for n>=2), + the resultant embedding is the sum of the embeddings of its constituent + single words (wordA + wordB + ...). + """ + print("Loading GloVe word embeddings...") + glove_vectors = gensim.downloader.load('glove-wiki-gigaword-200') + + # Initialize word_embeddings matrix with zeros. + # This ensures that words not found (single or n-gram constituents) + # will have a zero vector embedding. + word_embeddings = np.zeros((len(vocab), glove_vectors.vectors.shape[1]), dtype=np.float32) + + num_found = 0 + + try: + # Using a set for key_word_list for O(1) average time complexity lookup + key_word_list = set(glove_vectors.index_to_key) + except AttributeError: # For older gensim versions + key_word_list = set(glove_vectors.index2word) + + print("Generating word embeddings for vocabulary (including n-grams)...") + for i, word in enumerate(tqdm(vocab, desc="Processing vocabulary words")): + if '_' in word: # Check if it's a potential n-gram (n >= 2) + parts = word.split('_') + + # Check if *all* constituent words are present in GloVe + all_parts_in_glove = True + for part in parts: + if part not in key_word_list: + all_parts_in_glove = False + break # One part not found, stop checking + + if all_parts_in_glove: + # If all parts are found, sum their embeddings + resultant_vector = np.zeros(glove_vectors.vectors.shape[1], dtype=np.float32) + for part in parts: + resultant_vector += glove_vectors[part] + + word_embeddings[i] = resultant_vector + num_found += 1 + # Else: one or more constituent words not found, embedding remains zero + else: # It's a single word (n=1) + if word in key_word_list: + word_embeddings[i] = glove_vectors[word] + num_found += 1 + # Else: single word not found, embedding remains zero + + logger.info(f'Number of found embeddings (including n-grams): {num_found}/{len(vocab)}') + return word_embeddings # Return as dense NumPy array + + + def _save_doc_length_stats(self, filepath: str, output_path: str): + doc_lengths = [] + try: + with open(filepath, 'r', encoding='utf-8') as f: + for line in f: + doc = line.strip() + if doc: + doc_lengths.append(len(doc)) + except Exception as e: + print(f"Error processing '{filepath}': {e}") + return + + if not doc_lengths: + print(f"No documents found in '{filepath}'.") + return + + stats = { + "avg_len": float(np.mean(doc_lengths)), + "std_len": float(np.std(doc_lengths)), + "max_len": int(np.max(doc_lengths)), + "min_len": int(np.min(doc_lengths)), + "num_docs": int(len(doc_lengths)) + } + + with open(output_path, 'w', encoding='utf-8') as f: + json.dump(stats, f, indent=4) + print(f"Saved document length stats to: {output_path}") + + + def preprocess(self): + print("Loading data and creating temporary files for OCTIS...") + _, raw_timestamps, _, temp_docs_path, temp_labels_path, temp_dir = \ + self._load_data_to_temp_files() + + print("Starting OCTIS pre-processing using file paths and specified parameters...") + octis_dataset = self.preprocessor_octis.preprocess_dataset( + documents_path=temp_docs_path, + labels_path=temp_labels_path + ) + + # Clean up temporary files immediately + os.remove(temp_docs_path) + if temp_labels_path: + os.remove(temp_labels_path) + os.rmdir(temp_dir) + print(f"Temporary files in {temp_dir} cleaned up.") + + # --- Proxy: Save __original_indexes and then manually load it --- + temp_indexes_dir = tempfile.mkdtemp() + temp_indexes_file = os.path.join(temp_indexes_dir, "temp_original_indexes.txt") + + print(f"Saving __original_indexes to {temp_indexes_file}...") + octis_dataset._save_document_indexes(temp_indexes_file) + + # Manually load the indexes from the file + original_indexes_after_octis = [] + with open(temp_indexes_file, 'r') as f_indexes: + for line in f_indexes: + original_indexes_after_octis.append(int(line.strip())) # Read as int + + # Clean up the temporary indexes file and its directory + os.remove(temp_indexes_file) + os.rmdir(temp_indexes_dir) + print("Temporary indexes file cleaned up.") + # --- End Proxy --- + + # Get processed data from OCTIS Dataset object + processed_corpus_octis_list = octis_dataset.get_corpus() # List of list of tokens + processed_labels_octis = octis_dataset.get_labels() # List of labels + + print("Max index in original_indexes_after_octis:", max(original_indexes_after_octis)) + print("Length of raw_timestamps:", len(raw_timestamps)) + + # Filter timestamps based on documents that survived OCTIS preprocessing + filtered_timestamps = [raw_timestamps[i] for i in original_indexes_after_octis] + + print(f"OCTIS preprocessing complete. {len(processed_corpus_octis_list)} documents remaining.") + + if self.use_bigrams: + print("Generating bigrams with Gensim...") + phrases = Phrases(processed_corpus_octis_list, min_count=self.min_count_bigram, threshold=self.threshold_bigram) + bigram_phraser = Phraser(phrases) + bigrammed_corpus_list = [bigram_phraser[doc] for doc in processed_corpus_octis_list] + print("Bigram generation complete.") + else: + print("Skipping bigram generation as 'use_bigrams' is False.") + bigrammed_corpus_list = processed_corpus_octis_list # Use the original processed list + + + # Convert back to list of strings for easier handling if needed later, but keep as list of lists for BOW + bigrammed_texts_for_file = [" ".join(doc) for doc in bigrammed_corpus_list] + print("Bigram generation complete.") + + # Build Vocabulary from OCTIS output (after bigrams) + # We need a flat list of all tokens to build the vocabulary + all_tokens = [token for doc in bigrammed_corpus_list for token in doc] + vocab = sorted(list(set(all_tokens))) # Sorted unique words form the vocabulary + word_to_id = {word: i for i, word in enumerate(vocab)} + + # Create BOW matrix manually + print("Creating Bag-of-Words representations...") + rows, cols, data = [], [], [] + for i, doc_tokens in enumerate(bigrammed_corpus_list): + doc_word_counts = {} + for token in doc_tokens: + if token in word_to_id: # Ensure token is in our final vocab + doc_word_counts[word_to_id[token]] = doc_word_counts.get(word_to_id[token], 0) + 1 + for col_id, count in doc_word_counts.items(): + rows.append(i) + cols.append(col_id) + data.append(count) + + # Shape is (num_documents, vocab_size) + bow_matrix = scipy.sparse.csc_matrix((data, (rows, cols)), shape=(len(bigrammed_corpus_list), len(vocab))) + print("Bag-of-Words complete.") + + # Handle partitioning if required + if self.use_partition: + num_docs = len(bigrammed_corpus_list) + train_size = int(0.8 * num_docs) + + train_texts = bigrammed_texts_for_file[:train_size] + train_bow_matrix = bow_matrix[:train_size] + train_timestamps = filtered_timestamps[:train_size] + train_labels = processed_labels_octis[:train_size] if processed_labels_octis else [] + + test_texts = bigrammed_texts_for_file[train_size:] + test_bow_matrix = bow_matrix[train_size:] + test_timestamps = filtered_timestamps[train_size:] + test_labels = processed_labels_octis[train_size:] if processed_labels_octis else [] + + else: + train_texts = bigrammed_texts_for_file + train_bow_matrix = bow_matrix + train_timestamps = filtered_timestamps + train_labels = processed_labels_octis + test_texts = [] + test_timestamps = [] + test_labels = [] + + # Generate word embeddings using the provided function + word_embeddings = self._make_word_embeddings(vocab) + + # Process timestamps to 0, 1, 2...T and create time2id.txt + print("Processing timestamps...") + unique_timestamps = sorted(list(set(train_timestamps + test_timestamps))) + time_to_id = {timestamp: i for i, timestamp in enumerate(unique_timestamps)} + + train_times_ids = [time_to_id[ts] for ts in train_timestamps] + test_times_ids = [time_to_id[ts] for ts in test_timestamps] if self.use_partition else [] + print("Timestamps processed.") + + # Save files + print(f"Saving preprocessed files to {self.output_folder}...") + + # 1. vocab.txt + with open(os.path.join(self.output_folder, "vocab.txt"), "w", encoding="utf-8") as f: + for word in vocab: + f.write(f"{word}\n") + + # 2. train_texts.txt + train_text_path = os.path.join(self.output_folder, "train_texts.txt") + with open(train_text_path, "w", encoding="utf-8") as f: + for doc in train_texts: + f.write(f"{doc}\n") + + # Save document length stats + doc_stats_path = os.path.join(self.output_folder, "length_stats.json") + self._save_doc_length_stats(train_text_path, doc_stats_path) + + # 3. train_bow.npz + scipy.sparse.save_npz(os.path.join(self.output_folder, "train_bow.npz"), train_bow_matrix) + + # 4. word_embeddings.npz + sparse_word_embeddings = scipy.sparse.csr_matrix(word_embeddings) + scipy.sparse.save_npz(os.path.join(self.output_folder, "word_embeddings.npz"), sparse_word_embeddings) + + # 5. train_labels.txt (if labels exist) + if train_labels: + with open(os.path.join(self.output_folder, "train_labels.txt"), "w", encoding="utf-8") as f: + for label in train_labels: + f.write(f"{label}\n") + + # 6. train_times.txt + with open(os.path.join(self.output_folder, "train_times.txt"), "w", encoding="utf-8") as f: + for time_id in train_times_ids: + f.write(f"{time_id}\n") + + # Files for test set (if use_partition=True) + if self.use_partition: + # 7. test_bow.npz + scipy.sparse.save_npz(os.path.join(self.output_folder, "test_bow.npz"), test_bow_matrix) + + # 8. test_texts.txt + with open(os.path.join(self.output_folder, "test_texts.txt"), "w", encoding="utf-8") as f: + for doc in test_texts: + f.write(f"{doc}\n") + + # 9. test_labels.txt (if labels exist) + if test_labels: + with open(os.path.join(self.output_folder, "test_labels.txt"), "w", encoding="utf-8") as f: + for label in test_labels: + f.write(f"{label}\n") + + # 10. test_times.txt + with open(os.path.join(self.output_folder, "test_times.txt"), "w", encoding="utf-8") as f: + for time_id in test_times_ids: + f.write(f"{time_id}\n") + + # 11. time2id.txt + sorted_time_to_id = OrderedDict(sorted(time_to_id.items(), key=lambda item: item[1])) + with open(os.path.join(self.output_folder, "time2id.txt"), "w", encoding="utf-8") as f: + json.dump(sorted_time_to_id, f, indent=4) + + print("All files saved successfully.") \ No newline at end of file diff --git a/backend/datasets/utils/_utils.py b/backend/datasets/utils/_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..89ffa0e4465f2a07063d1caeabb386bc7b80c221 --- /dev/null +++ b/backend/datasets/utils/_utils.py @@ -0,0 +1,37 @@ +import numpy as np +from backend.datasets.data import file_utils + + +def get_top_words(beta, vocab, num_top_words, verbose=False): + topic_str_list = list() + for i, topic_dist in enumerate(beta): + topic_words = np.array(vocab)[np.argsort(topic_dist)][:-(num_top_words + 1):-1] + topic_str = ' '.join(topic_words) + topic_str_list.append(topic_str) + if verbose: + print('Topic {}: {}'.format(i, topic_str)) + + return topic_str_list + + +def get_stopwords_set(stopwords=[]): + from backend.datasets.data.download import download_dataset + + if stopwords == 'English': + from gensim.parsing.preprocessing import STOPWORDS as stopwords + + elif stopwords in ['mallet', 'snowball']: + download_dataset('stopwords', cache_path='./') + path = f'./stopwords/{stopwords}_stopwords.txt' + stopwords = file_utils.read_text(path) + + stopword_set = frozenset(stopwords) + + return stopword_set + + +if __name__ == '__main__': + print(list(get_stopwords_set('English'))[:10]) + print(list(get_stopwords_set('mallet'))[:10]) + print(list(get_stopwords_set('snowball'))[:10]) + print(list(get_stopwords_set())[:10]) diff --git a/backend/datasets/utils/logger.py b/backend/datasets/utils/logger.py new file mode 100644 index 0000000000000000000000000000000000000000..4878a9e063cba7c57f6873143f1e0f7ed8ba6cd3 --- /dev/null +++ b/backend/datasets/utils/logger.py @@ -0,0 +1,29 @@ +import logging + + +class Logger: + def __init__(self, level): + self.logger = logging.getLogger('TopMost') + self.set_level(level) + self._add_handler() + self.logger.propagate = False + + def info(self, message): + self.logger.info(f"{message}") + + def warning(self, message): + self.logger.warning(f"WARNING: {message}") + + def set_level(self, level): + levels = ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] + if level in levels: + self.logger.setLevel(level) + + def _add_handler(self): + sh = logging.StreamHandler() + sh.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(message)s')) + self.logger.addHandler(sh) + + # Remove duplicate handlers + if len(self.logger.handlers) > 1: + self.logger.handlers = [self.logger.handlers[0]] diff --git a/backend/evaluation/CoherenceModel_ttc.py b/backend/evaluation/CoherenceModel_ttc.py new file mode 100644 index 0000000000000000000000000000000000000000..07e58faf82f9a6240d50e457899014dab5745f3a --- /dev/null +++ b/backend/evaluation/CoherenceModel_ttc.py @@ -0,0 +1,862 @@ +import logging +import multiprocessing as mp +from collections import namedtuple + +import numpy as np + +from gensim import interfaces, matutils +from gensim import utils +from gensim.topic_coherence import ( + segmentation, probability_estimation, + direct_confirmation_measure, indirect_confirmation_measure, + aggregation, +) +from gensim.topic_coherence.probability_estimation import unique_ids_from_segments + +# Set up logging for this module +logger = logging.getLogger(__name__) + +# Define sets for categorizing coherence measures based on their probability estimation method +BOOLEAN_DOCUMENT_BASED = {'u_mass'} +SLIDING_WINDOW_BASED = {'c_v', 'c_uci', 'c_npmi', 'c_w2v'} + +# Create a namedtuple to define the structure of a coherence measure pipeline +# Each pipeline consists of a segmentation (seg), probability estimation (prob), +# confirmation measure (conf), and aggregation (aggr) function. +_make_pipeline = namedtuple('Coherence_Measure', 'seg, prob, conf, aggr') + +# Define the supported coherence measures and their respective pipeline components +COHERENCE_MEASURES = { + 'u_mass': _make_pipeline( + segmentation.s_one_pre, + probability_estimation.p_boolean_document, + direct_confirmation_measure.log_conditional_probability, + aggregation.arithmetic_mean + ), + 'c_v': _make_pipeline( + segmentation.s_one_set, + probability_estimation.p_boolean_sliding_window, + indirect_confirmation_measure.cosine_similarity, + aggregation.arithmetic_mean + ), + 'c_w2v': _make_pipeline( + segmentation.s_one_set, + probability_estimation.p_word2vec, + indirect_confirmation_measure.word2vec_similarity, + aggregation.arithmetic_mean + ), + 'c_uci': _make_pipeline( + segmentation.s_one_one, + probability_estimation.p_boolean_sliding_window, + direct_confirmation_measure.log_ratio_measure, + aggregation.arithmetic_mean + ), + 'c_npmi': _make_pipeline( + segmentation.s_one_one, + probability_estimation.p_boolean_sliding_window, + direct_confirmation_measure.log_ratio_measure, + aggregation.arithmetic_mean + ), +} + +# Define default sliding window sizes for different coherence measures +SLIDING_WINDOW_SIZES = { + 'c_v': 110, + 'c_w2v': 5, + 'c_uci': 10, + 'c_npmi': 10, + 'u_mass': None # u_mass does not use a sliding window +} + + +class CoherenceModel_ttc(interfaces.TransformationABC): + """Objects of this class allow for building and maintaining a model for topic coherence. + + Examples + --------- + One way of using this feature is through providing a trained topic model. A dictionary has to be explicitly provided + if the model does not contain a dictionary already + + .. sourcecode:: pycon + + >>> from gensim.test.utils import common_corpus, common_dictionary + >>> from gensim.models.ldamodel import LdaModel + >>> # Assuming CoherenceModel_ttc is imported or defined in the current scope + >>> # from your_module import CoherenceModel_ttc # if saved in a file + >>> + >>> model = LdaModel(common_corpus, 5, common_dictionary) + >>> + >>> cm = CoherenceModel_ttc(model=model, corpus=common_corpus, coherence='u_mass') + >>> coherence = cm.get_coherence() # get coherence value + + Another way of using this feature is through providing tokenized topics such as: + + .. sourcecode:: pycon + + >>> from gensim.test.utils import common_corpus, common_dictionary + >>> # Assuming CoherenceModel_ttc is imported or defined in the current scope + >>> # from your_module import CoherenceModel_ttc # if saved in a file + >>> topics = [ + ... ['human', 'computer', 'system', 'interface'], + ... ['graph', 'minors', 'trees', 'eps'] + ... ] + >>> + >>> cm = CoherenceModel_ttc(topics=topics, corpus=common_corpus, dictionary=common_dictionary, coherence='u_mass') + >>> coherence = cm.get_coherence() # get coherence value + + """ + def __init__(self, model=None, topics=None, texts=None, corpus=None, dictionary=None, + window_size=None, keyed_vectors=None, coherence='c_v', topn=20, processes=-1): + """ + Initializes the CoherenceModel_ttc. + + Parameters + ---------- + model : :class:`~gensim.models.basemodel.BaseTopicModel`, optional + Pre-trained topic model. Should be provided if `topics` is not provided. + Supports models that implement the `get_topics` method. + topics : list of list of str, optional + List of tokenized topics. If provided, `dictionary` must also be provided. + texts : list of list of str, optional + Tokenized texts, needed for coherence models that use sliding window based (e.g., `c_v`, `c_uci`, `c_npmi`). + corpus : iterable of list of (int, number), optional + Corpus in Bag-of-Words format. + dictionary : :class:`~gensim.corpora.dictionary.Dictionary`, optional + Gensim dictionary mapping of id word to create corpus. + If `model.id2word` is present and `dictionary` is None, `model.id2word` will be used. + window_size : int, optional + The size of the window to be used for coherence measures using boolean sliding window as their + probability estimator. For 'u_mass' this doesn't matter. + If None, default window sizes from `SLIDING_WINDOW_SIZES` are used. + keyed_vectors : :class:`~gensim.models.keyedvectors.KeyedVectors`, optional + Pre-trained word embeddings (e.g., Word2Vec model) for 'c_w2v' coherence. + coherence : {'u_mass', 'c_v', 'c_uci', 'c_npmi', 'c_w2v'}, optional + Coherence measure to be used. + 'u_mass' requires `corpus` (or `texts` which will be converted to corpus). + 'c_v', 'c_uci', 'c_npmi', 'c_w2v' require `texts`. + topn : int, optional + Integer corresponding to the number of top words to be extracted from each topic. Defaults to 20. + processes : int, optional + Number of processes to use for probability estimation phase. Any value less than 1 will be interpreted as + `num_cpus - 1`. Defaults to -1. + """ + # Ensure either a model or explicit topics are provided + if model is None and topics is None: + raise ValueError("One of 'model' or 'topics' has to be provided.") + # If topics are provided, a dictionary is mandatory to convert tokens to IDs + elif topics is not None and dictionary is None: + raise ValueError("Dictionary has to be provided if 'topics' are to be used.") + + self.keyed_vectors = keyed_vectors + # Ensure a data source (keyed_vectors, texts, or corpus) is provided for coherence calculation + if keyed_vectors is None and texts is None and corpus is None: + raise ValueError("One of 'texts', 'corpus', or 'keyed_vectors' has to be provided.") + + # Determine the dictionary to use + if dictionary is None: + # If no explicit dictionary, try to use the model's dictionary + if isinstance(model.id2word, utils.FakeDict): + # If model's id2word is a FakeDict, it means no proper dictionary is associated + raise ValueError( + "The associated dictionary should be provided with the corpus or 'id2word'" + " for topic model should be set as the associated dictionary.") + else: + self.dictionary = model.id2word + else: + self.dictionary = dictionary + + # Store coherence type and window size + self.coherence = coherence + self.window_size = window_size + if self.window_size is None: + # Use default window size if not specified + self.window_size = SLIDING_WINDOW_SIZES[self.coherence] + + # Store texts and corpus + self.texts = texts + self.corpus = corpus + + # Validate inputs based on coherence type + if coherence in BOOLEAN_DOCUMENT_BASED: + # For document-based measures (e.g., u_mass), corpus is preferred + if utils.is_corpus(corpus)[0]: + self.corpus = corpus + elif self.texts is not None: + # If texts are provided, convert them to corpus format + self.corpus = [self.dictionary.doc2bow(text) for text in self.texts] + else: + raise ValueError( + "Either 'corpus' with 'dictionary' or 'texts' should " + "be provided for %s coherence." % coherence) + + elif coherence == 'c_w2v' and keyed_vectors is not None: + # For c_w2v, keyed_vectors are needed + pass + elif coherence in SLIDING_WINDOW_BASED: + # For sliding window-based measures, texts are required + if self.texts is None: + raise ValueError("'texts' should be provided for %s coherence." % coherence) + else: + # Raise error if coherence type is not supported + raise ValueError("%s coherence is not currently supported." % coherence) + + self._topn = topn + self._model = model + self._accumulator = None # Cached accumulator for probability estimation + self._topics = None # Store topics internally + self.topics = topics # Call the setter to initialize topics and accumulator state + + # Determine the number of processes to use for parallelization + self.processes = processes if processes >= 1 else max(1, mp.cpu_count() - 1) + + @classmethod + def for_models(cls, models, dictionary, topn=20, **kwargs): + """ + Initialize a CoherenceModel_ttc with estimated probabilities for all of the given models. + This method extracts topics from each model and then uses `for_topics`. + + Parameters + ---------- + models : list of :class:`~gensim.models.basemodel.BaseTopicModel` + List of models to evaluate coherence of. Each model should implement + the `get_topics` method. + dictionary : :class:`~gensim.corpora.dictionary.Dictionary` + Gensim dictionary mapping of id word. + topn : int, optional + Integer corresponding to the number of top words to be extracted from each topic. Defaults to 20. + kwargs : object + Additional arguments passed to the `CoherenceModel_ttc` constructor (e.g., `corpus`, `texts`, `coherence`). + + Returns + ------- + :class:`~gensim.models.coherencemodel.CoherenceModel` + CoherenceModel_ttc instance with estimated probabilities for all given models. + + Example + ------- + .. sourcecode:: pycon + + >>> from gensim.test.utils import common_corpus, common_dictionary + >>> from gensim.models.ldamodel import LdaModel + >>> # from your_module import CoherenceModel_ttc + >>> + >>> m1 = LdaModel(common_corpus, 3, common_dictionary) + >>> m2 = LdaModel(common_corpus, 5, common_dictionary) + >>> + >>> cm = CoherenceModel_ttc.for_models([m1, m2], common_dictionary, corpus=common_corpus, coherence='u_mass') + >>> # To get coherences for each model: + >>> # model_coherences = cm.compare_model_topics([ + >>> # CoherenceModel_ttc._get_topics_from_model(m1, topn=cm.topn), + >>> # CoherenceModel_ttc._get_topics_from_model(m2, topn=cm.topn) + >>> # ]) + """ + # Extract top words as lists for each model's topics + topics = [cls.top_topics_as_word_lists(model, dictionary, topn) for model in models] + kwargs['dictionary'] = dictionary + kwargs['topn'] = topn + # Use for_topics to initialize the coherence model with these topics + return cls.for_topics(topics, **kwargs) + + @staticmethod + def top_topics_as_word_lists(model, dictionary, topn=20): + """ + Get `topn` topics from a model as lists of words. + + Parameters + ---------- + model : :class:`~gensim.models.basemodel.BaseTopicModel` + Pre-trained topic model. + dictionary : :class:`~gensim.corpora.dictionary.Dictionary` + Gensim dictionary mapping of id word. + topn : int, optional + Integer corresponding to the number of top words to be extracted from each topic. Defaults to 20. + + Returns + ------- + list of list of str + Top topics in list-of-list-of-words format. + """ + # Ensure id2token mapping exists in the dictionary + if not dictionary.id2token: + dictionary.id2token = {v: k for k, v in dictionary.token2id.items()} + + str_topics = [] + for topic_distribution in model.get_topics(): + # Get the indices of the topN words based on their probabilities + bestn_indices = matutils.argsort(topic_distribution, topn=topn, reverse=True) + # Convert word IDs back to words using the dictionary + best_words = [dictionary.id2token[_id] for _id in bestn_indices] + str_topics.append(best_words) + return str_topics + + @classmethod + def for_topics(cls, topics_as_topn_terms, **kwargs): + """ + Initialize a CoherenceModel_ttc with estimated probabilities for all of the given topics. + This is useful when you have raw topics (list of lists of words) and not a Gensim model object. + + Parameters + ---------- + topics_as_topn_terms : list of list of str + Each element in the top-level list should be a list of top-N words, one per topic. + For example: `[['word1', 'word2'], ['word3', 'word4']]`. + + Returns + ------- + :class:`~gensim.models.coherencemodel.CoherenceModel` + CoherenceModel_ttc with estimated probabilities for the given topics. + """ + if not topics_as_topn_terms: + raise ValueError("len(topics_as_topn_terms) must be > 0.") + if any(len(topic_list) == 0 for topic_list in topics_as_topn_terms): + raise ValueError("Found an empty topic listing in `topics_as_topn_terms`.") + + # Determine the maximum 'topn' value among the provided topics + # This will be used to initialize the CoherenceModel_ttc correctly for probability estimation + actual_topn_in_data = 0 + for topic_list in topics_as_topn_terms: + for topic in topic_list: + actual_topn_in_data = max(actual_topn_in_data, len(topic)) + + # Use the provided 'topn' from kwargs, or the determined 'actual_topn_in_data', + # ensuring it's not greater than the actual data available. + # This allows for precomputing probabilities for a wider set of words if needed. + topn_for_prob_estimation = min(kwargs.pop('topn', actual_topn_in_data), actual_topn_in_data) + + # Flatten all topics into a single "super topic" for initial probability estimation. + # This ensures that all words relevant to *any* topic in the comparison set + # are included in the accumulator. + super_topic = utils.flatten(topics_as_topn_terms) + + logger.info( + "Number of relevant terms for all %d models (or topic sets): %d", + len(topics_as_topn_terms), len(super_topic)) + + # Initialize CoherenceModel_ttc with the super topic to pre-estimate probabilities + # for all relevant words across all models. + # We pass `topics=[super_topic]` and `topn=len(super_topic)` to ensure all words + # are considered during the probability estimation phase. + cm = CoherenceModel_ttc(topics=[super_topic], topn=len(super_topic), **kwargs) + cm.estimate_probabilities() # Perform the actual probability estimation + + # After estimation, set the 'topn' back to the desired value for coherence calculation. + cm.topn = topn_for_prob_estimation + return cm + + def __str__(self): + """Returns a string representation of the coherence measure pipeline.""" + return str(self.measure) + + @property + def model(self): + """ + Get the current topic model used by the instance. + + Returns + ------- + :class:`~gensim.models.basemodel.BaseTopicModel` + The currently set topic model. + """ + return self._model + + @model.setter + def model(self, model): + """ + Set the topic model for the instance. When a new model is set, + it triggers an update of the internal topics and checks if the accumulator needs recomputing. + + Parameters + ---------- + model : :class:`~gensim.models.basemodel.BaseTopicModel` + The new topic model to set. + """ + self._model = model + if model is not None: + new_topics = self._get_topics() # Get topics from the new model + self._update_accumulator(new_topics) # Check and update accumulator if needed + self._topics = new_topics # Store the new topics + + @property + def topn(self): + """ + Get the number of top words (`_topn`) used for coherence calculation. + + Returns + ------- + int + The number of top words. + """ + return self._topn + + @topn.setter + def topn(self, topn): + """ + Set the number of top words (`_topn`) to consider for coherence calculation. + If the new `topn` requires more words than currently loaded topics, and a model is available, + it will attempt to re-extract topics from the model. + + Parameters + ---------- + topn : int + The new number of top words. + """ + # Get the length of the first topic to check current topic length + current_topic_length = len(self._topics[0]) + # Determine if the new 'topn' requires more words than currently available in topics + requires_expansion = current_topic_length < topn + + if self.model is not None: + self._topn = topn + if requires_expansion: + # If expansion is needed and a model is available, re-extract topics from the model. + # This call to the setter property `self.model = self._model` effectively re-runs + # the logic that extracts topics and updates the accumulator based on the new `_topn`. + self.model = self._model + else: + # If no model is available and expansion is required, raise an error + if requires_expansion: + raise ValueError("Model unavailable and topic sizes are less than topn=%d" % topn) + self._topn = topn # Topics will be truncated by the `topics` getter if needed + + @property + def measure(self): + """ + Returns the namedtuple representing the coherence pipeline functions + (segmentation, probability estimation, confirmation, aggregation) + based on the `self.coherence` type. + + Returns + ------- + namedtuple + Pipeline that contains needed functions/method for calculating coherence. + """ + return COHERENCE_MEASURES[self.coherence] + + @property + def topics(self): + """ + Get the current topics. If the internally stored topics have more words + than `self._topn`, they are truncated to `self._topn` words. + + Returns + ------- + list of list of str + Topics as lists of word tokens. + """ + # If the stored topics contain more words than `_topn`, truncate them + if len(self._topics[0]) > self._topn: + return [topic[:self._topn] for topic in self._topics] + else: + return self._topics + + @topics.setter + def topics(self, topics): + """ + Set the topics for the instance. This method converts topic words to their + corresponding dictionary IDs and updates the accumulator state. + + Parameters + ---------- + topics : list of list of str or list of list of int + Topics, either as lists of word tokens or lists of word IDs. + """ + if topics is not None: + new_topics = [] + for topic in topics: + # Ensure topic elements are converted to dictionary IDs (numpy array for efficiency) + topic_token_ids = self._ensure_elements_are_ids(topic) + new_topics.append(topic_token_ids) + + if self.model is not None: + # Warn if both model and explicit topics are set, as they might be inconsistent + logger.warning( + "The currently set model '%s' may be inconsistent with the newly set topics", + self.model) + elif self.model is not None: + # If topics are None but a model exists, extract topics from the model + new_topics = self._get_topics() + logger.debug("Setting topics to those of the model: %s", self.model) + else: + new_topics = None + + # Check if the accumulator needs to be recomputed based on the new topics + self._update_accumulator(new_topics) + self._topics = new_topics # Store the (ID-converted) topics + + def _ensure_elements_are_ids(self, topic): + """ + Internal helper to ensure that topic elements are converted to dictionary IDs. + Handles cases where input topic might be tokens or already IDs. + + Parameters + ---------- + topic : list of str or list of int + A single topic, either as a list of word tokens or word IDs. + + Returns + ------- + :class:`numpy.ndarray` + A numpy array of word IDs for the topic. + + Raises + ------ + KeyError + If a token is not found in the dictionary or an ID is not a valid key in id2token. + """ + try: + # Try to convert tokens to IDs. This is the common case if `topic` contains strings. + return np.array([self.dictionary.token2id[token] for token in topic if token in self.dictionary.token2id]) + except KeyError: + # If `KeyError` occurs, assume `topic` might already be a list of IDs. + # Attempt to convert IDs to tokens and then back to IDs, ensuring they are valid dictionary entries. + # This handles cases where `topic` might contain integer IDs that are not present in the dictionary. + try: + # Convert IDs to tokens (via id2token) and then tokens to IDs (via token2id) + # This filters out invalid IDs. + return np.array([self.dictionary.token2id[self.dictionary.id2token[_id]] + for _id in topic if _id in self.dictionary]) + except KeyError: + raise ValueError("Unable to interpret topic as either a list of tokens or a list of valid IDs within the dictionary.") + + def _update_accumulator(self, new_topics): + """ + Internal helper to determine if the cached `_accumulator` (probability statistics) + needs to be wiped and recomputed due to changes in topics. + """ + if self._relevant_ids_will_differ(new_topics): + logger.debug("Wiping cached accumulator since it does not contain all relevant ids.") + self._accumulator = None + + def _relevant_ids_will_differ(self, new_topics): + """ + Internal helper to check if the set of unique word IDs relevant to the new topics + is different from the IDs already covered by the current accumulator. + + Parameters + ---------- + new_topics : list of list of int + The new set of topics (as word IDs). + + Returns + ------- + bool + True if the relevant IDs will differ, False otherwise. + """ + if self._accumulator is None or not self._topics_differ(new_topics): + return False + + # Get unique IDs from the segmented new topics + new_set = unique_ids_from_segments(self.measure.seg(new_topics)) + # Check if the current accumulator's relevant IDs are a superset of the new set. + # If not, it means the new topics introduce words not covered, so the accumulator needs updating. + return not self._accumulator.relevant_ids.issuperset(new_set) + + def _topics_differ(self, new_topics): + """ + Internal helper to check if the new topics are different from the currently stored topics. + + Parameters + ---------- + new_topics : list of list of int + The new set of topics (as word IDs). + + Returns + ------- + bool + True if topics are different, False otherwise. + """ + # Compare topic arrays using numpy.array_equal for efficient comparison + return (new_topics is not None + and self._topics is not None + and not np.array_equal(new_topics, self._topics)) + + def _get_topics(self): + """ + Internal helper function to extract top words (as IDs) from a trained topic model. + """ + return self._get_topics_from_model(self.model, self.topn) + + @staticmethod + def _get_topics_from_model(model, topn): + """ + Internal static method to extract top `topn` words (as IDs) from a trained topic model. + + Parameters + ---------- + model : :class:`~gensim.models.basemodel.BaseTopicModel` + Pre-trained topic model (must implement `get_topics` method). + topn : int + Integer corresponding to the number of top words to extract. + + Returns + ------- + list of :class:`numpy.ndarray` + A list where each element is a numpy array of word IDs representing a topic's top words. + + Raises + ------ + AttributeError + If the provided model does not implement a `get_topics` method. + """ + try: + # Iterate over the topic distributions from the model + # Use matutils.argsort to get the indices (word IDs) of the top `topn` words + return [ + matutils.argsort(topic, topn=topn, reverse=True) for topic in + model.get_topics() + ] + except AttributeError: + raise ValueError( + "This topic model is not currently supported. Supported topic models" + " should implement the `get_topics` method.") + + def segment_topics(self): + """ + Segments the current topics using the segmentation function defined by the + chosen coherence measure (`self.measure.seg`). + + Returns + ------- + list of list of tuple + Segmented topics. The structure depends on the segmentation method (e.g., pairs of word IDs). + """ + # Apply the segmentation function from the pipeline to the current topics + return self.measure.seg(self.topics) + + def estimate_probabilities(self, segmented_topics=None): + """ + Accumulates word occurrences and co-occurrences from texts or corpus + using the optimal probability estimation method for the chosen coherence metric. + This operation can be computationally intensive, especially for sliding window methods. + + Parameters + ---------- + segmented_topics : list of list of tuple, optional + Segmented topics. If None, `self.segment_topics()` is called internally. + + Returns + ------- + :class:`~gensim.topic_coherence.text_analysis.CorpusAccumulator` + An object that holds the accumulated statistics (word frequencies, co-occurrence frequencies). + """ + if segmented_topics is None: + segmented_topics = self.segment_topics() + + # Choose the appropriate probability estimation method based on the coherence type + if self.coherence in BOOLEAN_DOCUMENT_BASED: + self._accumulator = self.measure.prob(self.corpus, segmented_topics) + else: + kwargs = dict( + texts=self.texts, segmented_topics=segmented_topics, + dictionary=self.dictionary, window_size=self.window_size, + processes=self.processes) + if self.coherence == 'c_w2v': + kwargs['model'] = self.keyed_vectors # Pass keyed_vectors for word2vec based coherence + + self._accumulator = self.measure.prob(**kwargs) + + return self._accumulator + + def get_coherence_per_topic(self, segmented_topics=None, with_std=False, with_support=False): + """ + Calculates and returns a list of coherence values, one for each topic, + based on the pipeline's confirmation measure. + + Parameters + ---------- + segmented_topics : list of list of tuple, optional + Segmented topics. If None, `self.segment_topics()` is called internally. + with_std : bool, optional + If True, also includes the standard deviation across topic segment sets in addition + to the mean coherence for each topic. Defaults to False. + with_support : bool, optional + If True, also includes the "support" (number of pairwise similarity comparisons) + used to compute each topic's coherence. Defaults to False. + + Returns + ------- + list of float or list of tuple + A sequence of similarity measures for each topic. + If `with_std` or `with_support` is True, each element in the list will be a tuple + containing the coherence value and the requested additional statistics. + """ + measure = self.measure + if segmented_topics is None: + segmented_topics = measure.seg(self.topics) + + # Ensure probabilities are estimated before calculating coherence + if self._accumulator is None: + self.estimate_probabilities(segmented_topics) + + kwargs = dict(with_std=with_std, with_support=with_support) + if self.coherence in BOOLEAN_DOCUMENT_BASED or self.coherence == 'c_w2v': + # These coherence types don't require specific additional kwargs for confirmation measure + pass + elif self.coherence == 'c_v': + # Specific kwargs for c_v's confirmation measure (cosine_similarity) + kwargs['topics'] = self.topics + kwargs['measure'] = 'nlr' # Normalized Log Ratio + kwargs['gamma'] = 1 + else: + # For c_uci and c_npmi, 'normalize' parameter is relevant + kwargs['normalize'] = (self.coherence == 'c_npmi') + + return measure.conf(segmented_topics, self._accumulator, **kwargs) + + def aggregate_measures(self, topic_coherences): + """ + Aggregates the individual topic coherence measures into a single overall score + using the pipeline's aggregation function (`self.measure.aggr`). + + Parameters + ---------- + topic_coherences : list of float + List of coherence values for each topic. + + Returns + ------- + float + The aggregated coherence value (e.g., arithmetic mean). + """ + # Apply the aggregation function from the pipeline to the list of topic coherences + return self.measure.aggr(topic_coherences) + + def get_coherence(self): + """ + Calculates and returns the overall coherence value for the entire set of topics. + This is the main entry point for getting a single coherence score. + + Returns + ------- + float + The aggregated coherence value. + """ + # First, get coherence values for each individual topic + confirmed_measures = self.get_coherence_per_topic() + # Then, aggregate these topic-level coherences into a single score + return self.aggregate_measures(confirmed_measures) + + def compare_models(self, models): + """ + Compares multiple topic models by their coherence values. + It extracts topics from each model and then calls `compare_model_topics`. + + Parameters + ---------- + models : list of :class:`~gensim.models.basemodel.BaseTopicModel` + A sequence of topic models to compare. + + Returns + ------- + list of (list of float, float) + A sequence where each element is a pair: + (list of average topic coherences for the model, overall model coherence). + """ + # Extract topics (as word IDs) for each model using the internal helper + model_topics = [self._get_topics_from_model(model, self.topn) for model in models] + # Delegate to compare_model_topics for the actual coherence comparison + return self.compare_model_topics(model_topics) + + def compare_model_topics(self, model_topics): + """ + Performs coherence evaluation for each set of topics provided in `model_topics`. + This method is designed to be efficient by precomputing probabilities once if needed, + and then evaluating coherence for each set of topics. + + Parameters + ---------- + model_topics : list of list of list of int + A list where each element is itself a list of topics (each topic being a list of word IDs) + representing a set of topics (e.g., from a single model). + + Returns + ------- + list of (list of float, float) + A sequence where each element is a pair: + (list of average topic coherences for the topic set, overall topic set coherence). + + Notes + ----- + This method uses a heuristic of evaluating coherence at various `topn` values (e.g., 20, 15, 10, 5) + and averaging the results for robustness, as suggested in some research. + """ + # Store original topics and topn to restore them after comparison + orig_topics = self._topics + orig_topn = self.topn + + try: + # Perform the actual comparison + coherences = self._compare_model_topics(model_topics) + finally: + # Ensure original topics and topn are restored even if an error occurs + self.topics = orig_topics + self.topn = orig_topn + + return coherences + + def _compare_model_topics(self, model_topics): + """ + Internal helper to get average topic and model coherences across multiple sets of topics. + + Parameters + ---------- + model_topics : list of list of list of int + A list where each element is a set of topics (list of lists of word IDs). + + Returns + ------- + list of (list of float, float) + A sequence of pairs: + (average topic coherences across different `topn` values for each topic, + overall model coherence averaged across different `topn` values). + """ + coherences = [] + # Define a grid of `topn` values to evaluate coherence. + # This provides a more robust average coherence value. + # It goes from `self.topn` down to `min(self.topn - 1, 4)` in steps of -5. + # e.g., if self.topn is 20, grid might be [20, 15, 10, 5]. + # The `min(self.topn - 1, 4)` ensures at least some lower values are included, + # but also prevents trying `topn` values that are too small or negative. + last_topn_value = min(self.topn - 1, 4) + topn_grid = list(range(self.topn, last_topn_value, -5)) + if not topn_grid or max(topn_grid) < 1: # Ensure at least one valid topn if range is empty or too small + topn_grid = [max(1, min(self.topn, 5))] # Use min of self.topn and 5, ensure at least 1 + + for model_num, topics in enumerate(model_topics): + # Set the current topics for the instance to the topics of the model being evaluated + self.topics = topics + + coherence_at_n = {} # Dictionary to store coherence results for different `topn` values + for n in topn_grid: + self.topn = n # Set the `topn` for the current evaluation round + topic_coherences = self.get_coherence_per_topic() + + # Handle NaN values in topic coherences by imputing with the mean + filled_coherences = np.array(topic_coherences, dtype=float) + # Check for NaN values and replace them with the mean of non-NaN values. + # np.nanmean handles arrays with all NaNs gracefully by returning NaN. + if np.any(np.isnan(filled_coherences)): + mean_val = np.nanmean(filled_coherences) + if np.isnan(mean_val): # If all are NaN, mean_val will also be NaN. In this case, replace with 0 or a very small number. + filled_coherences[np.isnan(filled_coherences)] = 0.0 # Or another sensible default + else: + filled_coherences[np.isnan(filled_coherences)] = mean_val + + + # Store the topic-level coherences and the aggregated (overall) coherence for this `topn` + coherence_at_n[n] = (topic_coherences, self.aggregate_measures(filled_coherences)) + + # Unpack the stored coherences for different `topn` values + all_topic_coherences_at_n, all_avg_coherences_at_n = zip(*coherence_at_n.values()) + + # Calculate the average topic coherence across all `topn` values + # np.vstack stacks lists of topic coherences into a 2D array, then mean(0) computes mean for each topic. + avg_topic_coherences = np.vstack(all_topic_coherences_at_n).mean(axis=0) + + # Calculate the overall model coherence by averaging the aggregated coherences from all `topn` values + model_coherence = np.mean(all_avg_coherences_at_n) + + logging.info("Avg coherence for model %d: %.5f" % (model_num, model_coherence)) + coherences.append((avg_topic_coherences.tolist(), model_coherence)) # Convert numpy array back to list for output + + return coherences \ No newline at end of file diff --git a/backend/evaluation/eval.py b/backend/evaluation/eval.py new file mode 100644 index 0000000000000000000000000000000000000000..3d39ed50eff2c3af84d6ca43343d9d96fb018982 --- /dev/null +++ b/backend/evaluation/eval.py @@ -0,0 +1,179 @@ +# dynamic_topic_quality.py +import numpy as np +import pandas as pd +from gensim.corpora.dictionary import Dictionary +from gensim.models.coherencemodel import CoherenceModel +from backend.evaluation.CoherenceModel_ttc import CoherenceModel_ttc +from typing import List, Dict + +class TopicQualityAssessor: + """ + Calculates various quality metrics for dynamic topic models from in-memory data. + + This class provides methods to compute: + - Temporal Topic Coherence (TTC) + - Temporal Topic Smoothness (TTS) + - Temporal Topic Quality (TTQ) + - Yearly Topic Coherence (TC) + - Yearly Topic Diversity (TD) + - Yearly Topic Quality (TQ) + """ + + def __init__(self, topics: List[List[List[str]]], train_texts: List[List[str]], topn: int, coherence_type: str): + """ + Initializes the TopicQualityAssessor with data in memory. + + Args: + topics (List[List[List[str]]]): A nested list of topics with structure (T, K, W), + where T is time slices, K is topics, and W is words. + train_texts (List[List[str]]): A list of tokenized documents for the reference corpus. + topn (int): Number of top words per topic to consider for calculations. + coherence_type (str): The type of coherence to calculate (e.g., 'c_npmi', 'c_v'). + """ + # 1. Set texts and dictionary + self.texts = train_texts + self.dictionary = Dictionary(self.texts) + + # 2. Process topics + # User provides topics as (T, K, W) -> List[timestamps][topics][words] + # Internal representation for temporal evolution is (K, T, W) + topics_array_T_K_W = np.array(topics, dtype=object) + if topics_array_T_K_W.ndim != 3: + raise ValueError(f"Input 'topics' must be a 3-dimensional list/array. Got {topics_array_T_K_W.ndim} dimensions.") + self.total_topics = topics_array_T_K_W.transpose(1, 0, 2) # Shape: (K, T, W) + + # 3. Get dimensions + self.K, self.T, _ = self.total_topics.shape + + # 4. Create topic groups for smoothness calculation (pairs of topics over time) + groups = [] + for k in range(self.K): + time_pairs = [] + for t in range(self.T - 1): + time_pairs.append([self.total_topics[k, t].tolist(), self.total_topics[k, t+1].tolist()]) + groups.append(time_pairs) + self.group_topics = np.array(groups, dtype=object) + + # 5. Create yearly topics (T, K, W) for TC/TD calculation + self.yearly_topics = self.total_topics.transpose(1, 0, 2) + + # 6. Set parameters + self.topn = topn + self.coherence_type = coherence_type + + def _compute_coherence(self, topics: List[List[str]]) -> List[float]: + cm = CoherenceModel( + topics=topics, texts=self.texts, dictionary=self.dictionary, + coherence=self.coherence_type, topn=self.topn + ) + return cm.get_coherence_per_topic() + + def _compute_coherence_ttc(self, topics: List[List[str]]) -> List[float]: + cm = CoherenceModel_ttc( + topics=topics, texts=self.texts, dictionary=self.dictionary, + coherence=self.coherence_type, topn=self.topn + ) + return cm.get_coherence_per_topic() + + def _topic_smoothness(self, topics: List[List[str]]) -> float: + K = len(topics) + if K <= 1: + return 1.0 # Or 0.0, depending on definition. A single topic has no other topic to be dissimilar to. + scores = [] + for i, base in enumerate(topics): + base_set = set(base[:self.topn]) + others = [other for j, other in enumerate(topics) if j != i] + if not others: + return 1.0 + overlaps = [len(base_set & set(other[:self.topn])) / self.topn for other in others] + scores.append(sum(overlaps) / len(overlaps)) + return float(sum(scores) / K) + + def get_ttq_dataframe(self) -> pd.DataFrame: + """Computes and returns a DataFrame with detailed TTQ metrics per topic chain.""" + all_coh_scores, avg_coh_scores = [], [] + for k in range(self.K): + coh_per_topic = self._compute_coherence_ttc(self.total_topics[k].tolist()) + all_coh_scores.append(coh_per_topic) + avg_coh_scores.append(float(np.mean(coh_per_topic))) + + all_smooth_scores, avg_smooth_scores = [], [] + for k in range(self.K): + pair_scores = [self._topic_smoothness(pair) for pair in self.group_topics[k]] + all_smooth_scores.append(pair_scores) + avg_smooth_scores.append(float(np.mean(pair_scores))) + + df = pd.DataFrame({ + 'topic_idx': list(range(self.K)), + 'temporal_coherence': all_coh_scores, + 'temporal_smoothness': all_smooth_scores, + 'avg_temporal_coherence': avg_coh_scores, + 'avg_temporal_smoothness': avg_smooth_scores + }) + df['ttq_product'] = df['avg_temporal_coherence'] * df['avg_temporal_smoothness'] + return df + + def get_tq_dataframe(self) -> pd.DataFrame: + """Computes and returns a DataFrame with detailed TQ metrics per time slice.""" + all_coh, avg_coh, div = [], [], [] + for t in range(self.T): + yearly_t_topics = self.yearly_topics[t].tolist() + coh_per_topic = self._compute_coherence(yearly_t_topics) + all_coh.append(coh_per_topic) + avg_coh.append(float(np.mean(coh_per_topic))) + div.append(1 - self._topic_smoothness(yearly_t_topics)) + + df = pd.DataFrame({ + 'year': list(range(self.T)), + 'all_coherence': all_coh, + 'avg_coherence': avg_coh, + 'diversity': div + }) + df['tq_product'] = df['avg_coherence'] * df['diversity'] + return df + + def get_ttc_score(self) -> float: + """Calculates the overall Temporal Topic Coherence (TTC).""" + ttq_df = self.get_ttq_dataframe() + return ttq_df['avg_temporal_coherence'].mean() + + def get_tts_score(self) -> float: + """Calculates the overall Temporal Topic Smoothness (TTS).""" + ttq_df = self.get_ttq_dataframe() + return ttq_df['avg_temporal_smoothness'].mean() + + def get_ttq_score(self) -> float: + """Calculates the overall Temporal Topic Quality (TTQ).""" + ttq_df = self.get_ttq_dataframe() + return ttq_df['ttq_product'].mean() + + def get_tc_score(self) -> float: + """Calculates the overall yearly Topic Coherence (TC).""" + tq_df = self.get_tq_dataframe() + return tq_df['avg_coherence'].mean() + + def get_td_score(self) -> float: + """Calculates the overall yearly Topic Diversity (TD).""" + tq_df = self.get_tq_dataframe() + return tq_df['diversity'].mean() + + def get_tq_score(self) -> float: + """Calculates the overall yearly Topic Quality (TQ).""" + tq_df = self.get_tq_dataframe() + return tq_df['tq_product'].mean() + + def get_dtq_summary(self) -> Dict[str, float]: + """ + Computes all dynamic topic quality metrics and returns them in a dictionary. + """ + ttq_df = self.get_ttq_dataframe() + tq_df = self.get_tq_dataframe() + summary = { + 'TTC': ttq_df['avg_temporal_coherence'].mean(), + 'TTS': ttq_df['avg_temporal_smoothness'].mean(), + 'TTQ': ttq_df['ttq_product'].mean(), + 'TC': tq_df['avg_coherence'].mean(), + 'TD': tq_df['diversity'].mean(), + 'TQ': tq_df['tq_product'].mean() + } + return summary \ No newline at end of file diff --git a/backend/inference/doc_retriever.py b/backend/inference/doc_retriever.py new file mode 100644 index 0000000000000000000000000000000000000000..e0c03c06b614c7a2681cda2096c034a82470b532 --- /dev/null +++ b/backend/inference/doc_retriever.py @@ -0,0 +1,219 @@ +import html +import json +import re +import os +from hashlib import md5 + +def deduplicate_docs(collected_docs): + seen = set() + unique_docs = [] + for doc in collected_docs: + # Prefer unique ID if available + key = doc.get("id", md5(doc["text"].encode()).hexdigest()) + if key not in seen: + seen.add(key) + unique_docs.append(doc) + return unique_docs + +def load_length_stats(length_stats_path): + """ + Loads length statistics from a JSON file for a given model path. + + Args: + path (str): Path to the model directory containing 'length_stats.json'. + + Returns: + dict: A dictionary containing document length statistics. + """ + if not os.path.exists(length_stats_path): + raise FileNotFoundError(f"'length_stats.json' not found at: {length_stats_path}") + + with open(length_stats_path, "r") as f: + length_stats = json.load(f) + + return length_stats + +def get_yearly_counts_for_word(index, word): + if word not in index: + print(f"[ERROR] Word '{word}' not found in index.") + return [], [] + + year_counts = index[word] + sorted_items = sorted((int(year), len(doc_ids)) for year, doc_ids in year_counts.items()) + years, counts = zip(*sorted_items) if sorted_items else ([], []) + return list(years), list(counts) + + +def get_all_documents_for_word_year(index, docs_file_path, word, year): + """ + Returns all full documents (text + metadata) that contain a given word in a given year. + + Parameters: + index (dict): Inverted index. + docs_file_path (str): Path to original jsonl corpus. + word (str): Word (unigram or bigram). + year (int): Year to retrieve docs for. + + Returns: + List[Dict]: List of documents with 'id', 'timestamp', and 'text'. + """ + year = int(year) + + if word not in index or year not in index[word]: + return [] + + doc_ids = set(index[word][year]) + results = [] + + try: + with open(docs_file_path, 'r', encoding='utf-8') as f: + for doc_id, line in enumerate(f): + if doc_id in doc_ids: + doc = json.loads(line) + results.append({ + "id": doc_id, + "timestamp": doc.get("timestamp", "N/A"), + "text": doc["text"] + }) + except Exception as e: + print(f"[ERROR] Could not load documents: {e}") + + return results + + +def get_documents_with_all_words_for_year(index, docs_path, words, year): + doc_sets = [] + all_doc_occurrences = {} + + for word in words: + word_docs = get_all_documents_for_word_year(index, docs_path, word, year) + doc_sets.append(set(doc["id"] for doc in word_docs)) + for doc in word_docs: + all_doc_occurrences.setdefault(doc["id"], doc) + + common_doc_ids = set.intersection(*doc_sets) if doc_sets else set() + return [all_doc_occurrences[doc_id] for doc_id in common_doc_ids] + + +def get_intersection_doc_counts_by_year(index, docs_path, words, all_years): + year_counts = {} + for y in all_years: + docs = get_documents_with_all_words_for_year(index, docs_path, words, y) + year_counts[y] = len(docs) + return year_counts + + +def extract_snippet(text, query, window=30): + """ + Return a short snippet around the first occurrence of the query word. + """ + pattern = re.compile(re.escape(query.replace('_', ' ')), re.IGNORECASE) + match = pattern.search(text) + if not match: + return text[:200] + "..." + + start = max(match.start() - window, 0) + end = min(match.end() + window, len(text)) + snippet = text[start:end].strip() + + return f"...{snippet}..." + +def highlight(text, query, highlight_color="#FFD54F"): + """ + Highlight all instances of the query term in text using a colored tag. + """ + escaped_query = re.escape(query.replace('_', ' ')) + pattern = re.compile(f"({escaped_query})", flags=re.IGNORECASE) + + def replacer(match): + matched_text = html.escape(match.group(1)) + return f"{matched_text}" + + return pattern.sub(replacer, html.escape(text)) + +def highlight_words(text, query_words, highlight_color="#24F31D", lemma_to_forms=None): + """ + Highlight all surface forms of each query lemma in the text using a colored tag. + + Args: + text (str): The input raw document text. + query_words (List[str]): Lemmatized query tokens to highlight. + highlight_color (str): Color to use for highlighting. + lemma_to_forms (Dict[str, Set[str]]): Maps a lemma to its surface forms. + """ + # Escape HTML special characters first + escaped_text = html.escape(text) + + # Expand query words to include all surface forms + expanded_forms = set() + for lemma in query_words: + if lemma_to_forms and lemma in lemma_to_forms: + expanded_forms.update(lemma_to_forms[lemma]) + else: + expanded_forms.add(lemma) # Fallback if map is missing + + # Sort by length to avoid partial overlaps (e.g., "run" before "running") + sorted_queries = sorted(expanded_forms, key=lambda w: -len(w)) + + for word in sorted_queries: + # Match full word, case insensitive + pattern = re.compile(rf'\b({re.escape(word)})\b', flags=re.IGNORECASE) + + def replacer(match): + matched_text = match.group(1) + return f"{matched_text}" + + escaped_text = pattern.sub(replacer, escaped_text) + + return escaped_text + +def get_docs_by_ids(docs_file_path, doc_ids): + """ + Efficiently retrieves specific documents from a .jsonl file by their line number (ID). + + This function reads the file line-by-line and only parses the lines that match + the requested document IDs, avoiding loading the entire file into memory. + + Args: + docs_file_path (str): The path to the documents.jsonl file. + doc_ids (list or set): A collection of document IDs (0-indexed line numbers) to retrieve. + + Returns: + list[dict]: A list of document dictionaries that were found. Each dictionary + is augmented with an 'id' key corresponding to its line number. + """ + # Use a set for efficient O(1) lookups. + doc_ids_to_find = set(doc_ids) + found_docs = {} + + if not doc_ids_to_find: + return [] + + try: + with open(docs_file_path, 'r', encoding='utf-8') as f: + for i, line in enumerate(f): + # If the current line number is one we're looking for + if i in doc_ids_to_find: + try: + doc = json.loads(line) + # Explicitly add the line number as the 'id' + doc['id'] = i + found_docs[i] = doc + # Optimization: stop reading the file once all docs are found + if len(found_docs) == len(doc_ids_to_find): + break + except json.JSONDecodeError: + # Skip malformed lines but inform the user + print(f"[WARNING] Skipping malformed JSON on line {i+1} in {docs_file_path}") + continue + + except FileNotFoundError: + print(f"[ERROR] Document file not found at: {docs_file_path}") + return [] + except Exception as e: + print(f"[ERROR] An unexpected error occurred while reading documents: {e}") + return [] + + # Return the documents in the same order as the original doc_ids list + # This ensures consistency for downstream processing. + return [found_docs[doc_id] for doc_id in doc_ids if doc_id in found_docs] \ No newline at end of file diff --git a/backend/inference/indexing_utils.py b/backend/inference/indexing_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..0a904773a7e727afd88f8ca3663a3f166fd47eb7 --- /dev/null +++ b/backend/inference/indexing_utils.py @@ -0,0 +1,146 @@ +import json +import os +import re +import spacy +from collections import defaultdict + +# Load spaCy once +nlp = spacy.load("en_core_web_sm", disable=["parser", "ner"]) + +def tokenize(text): + return re.findall(r"\b\w+\b", text.lower()) + +def has_bigram(tokens, bigram): + parts = bigram.split('_') + for i in range(len(tokens) - len(parts) + 1): + if tokens[i:i + len(parts)] == parts: + return True + return False + +def build_inverse_lemma_map(docs_file_path, cache_path=None): + """ + Build or load a mapping from lemma -> set of surface forms seen in corpus. + If cache_path is provided and exists, loads from it. + Else builds from scratch and saves to cache_path. + """ + if cache_path and os.path.exists(cache_path): + print(f"[INFO] Loading cached lemma_to_forms from {cache_path}") + with open(cache_path, "r", encoding="utf-8") as f: + raw_map = json.load(f) + return {lemma: set(forms) for lemma, forms in raw_map.items()} + + print(f"[INFO] Building inverse lemma map from {docs_file_path}...") + lemma_to_forms = defaultdict(set) + + with open(docs_file_path, 'r', encoding='utf-8') as f: + for line in f: + doc = json.loads(line) + tokens = tokenize(doc['text']) + spacy_doc = nlp(" ".join(tokens)) + for token in spacy_doc: + lemma_to_forms[token.lemma_].add(token.text.lower()) + + if cache_path: + print(f"[INFO] Saving lemma_to_forms to {cache_path}") + os.makedirs(os.path.dirname(cache_path), exist_ok=True) + with open(cache_path, "w", encoding="utf-8") as f: + json.dump({k: list(v) for k, v in lemma_to_forms.items()}, f, indent=2) + + return lemma_to_forms + +def build_inverted_index(docs_file_path, vocab_set, lemma_map_path=None): + vocab_unigrams = {w for w in vocab_set if '_' not in w} + vocab_bigrams = {w for w in vocab_set if '_' in w} + + # Load or build lemma map + lemma_to_forms = build_inverse_lemma_map(docs_file_path, cache_path=lemma_map_path) + + index = defaultdict(lambda: defaultdict(list)) + docs = [] + global_seen_words = set() + + with open(docs_file_path, 'r', encoding='utf-8') as f: + for doc_id, line in enumerate(f): + doc = json.loads(line) + text = doc['text'] + timestamp = int(doc['timestamp']) + docs.append({"text": text, "timestamp": timestamp}) + + tokens = tokenize(text) + token_set = set(tokens) + seen_words = set() + + # Match all lemma queries using surface forms + for lemma in vocab_unigrams: + surface_forms = lemma_to_forms.get(lemma, set()) + if token_set & surface_forms: + index[lemma][timestamp].append(doc_id) + seen_words.add(lemma) + + for bigram in vocab_bigrams: + if bigram not in seen_words and has_bigram(tokens, bigram): + index[bigram][timestamp].append(doc_id) + seen_words.add(bigram) + + global_seen_words.update(seen_words) + + if (doc_id + 1) % 500 == 0: + missing = vocab_set - global_seen_words + print(f"[INFO] After {doc_id+1} docs, {len(missing)} vocab words still not seen.") + print("Example missing words:", list(missing)[:5]) + + missing_final = vocab_set - global_seen_words + if missing_final: + print(f"[WARNING] {len(missing_final)} vocab words were never found in any document.") + print("Examples:", list(missing_final)[:10]) + + return index, docs, lemma_to_forms + +def save_index_to_disk(index, index_path): + index_clean = { + word: {str(ts): doc_ids for ts, doc_ids in ts_dict.items()} + for word, ts_dict in index.items() + } + os.makedirs(os.path.dirname(index_path), exist_ok=True) + with open(index_path, "w", encoding='utf-8') as f: + json.dump(index_clean, f, ensure_ascii=False) + +def load_index_from_disk(index_path): + with open(index_path, 'r', encoding='utf-8') as f: + raw_index = json.load(f) + + index = defaultdict(lambda: defaultdict(list)) + for word, ts_dict in raw_index.items(): + for ts, doc_ids in ts_dict.items(): + index[word][int(ts)] = doc_ids + + return index + +def load_docs(docs_file_path): + docs = [] + with open(docs_file_path, 'r', encoding='utf-8') as f: + for line in f: + doc = json.loads(line) + docs.append({ + "text": doc["text"], + "timestamp": int(doc["timestamp"]) + }) + return docs + +def load_index(docs_file_path, vocab, index_path=None, lemma_map_path=None): + if index_path and os.path.exists(index_path): + index = load_index_from_disk(index_path) + docs = load_docs(docs_file_path) + lemma_to_forms = build_inverse_lemma_map(docs_file_path, cache_path=lemma_map_path) + return index, docs, lemma_to_forms + + index, docs, lemma_to_forms = build_inverted_index( + docs_file_path, + set(vocab), + lemma_map_path=lemma_map_path + ) + + if index_path: + save_index_to_disk(index, index_path) + + return index, docs, lemma_to_forms diff --git a/backend/inference/peak_detector.py b/backend/inference/peak_detector.py new file mode 100644 index 0000000000000000000000000000000000000000..2a023111fd6fe7225f3485df32ba85cfd8dfdc6b --- /dev/null +++ b/backend/inference/peak_detector.py @@ -0,0 +1,18 @@ +import numpy as np +from scipy.signal import find_peaks + +def detect_peaks(trend, prominence=0.001, distance=2): + """ + Detect peaks in a word's trend over time. + + Args: + trend: List or np.array of floats (word importance over time) + prominence: Required prominence of peaks (tune based on scale) + distance: Minimum distance between peaks + + Returns: + List of indices (timestamps) where peaks occur + """ + trend = np.array(trend) + peaks, _ = find_peaks(trend, prominence=prominence, distance=distance) + return peaks.tolist() diff --git a/backend/inference/process_beta.py b/backend/inference/process_beta.py new file mode 100644 index 0000000000000000000000000000000000000000..86eca5e79cdb48175c59d9ef56d603f4e90fb62e --- /dev/null +++ b/backend/inference/process_beta.py @@ -0,0 +1,33 @@ +import numpy as np +import json + +def load_beta_matrix(beta_path: str, vocab_path: str): + """ + Loads the beta matrix (T x K x V) and vocab list. + + Returns: + beta: np.ndarray of shape (T, K, V) + vocab: list of words + """ + beta = np.load(beta_path) # shape: T x K x V + with open(vocab_path, 'r') as f: + vocab = [line.strip() for line in f.readlines()] + return beta, vocab + +def get_top_words_at_time(beta, vocab, topic_id, time, top_n): + topic_beta = beta[time, topic_id, :] + top_indices = topic_beta.argsort()[-top_n:][::-1] + return [vocab[i] for i in top_indices] + +def get_top_words_over_time(beta, vocab, topic_id, top_n): + topic_beta = beta[:, topic_id, :] + mean_beta = topic_beta.mean(axis=0) + top_indices = mean_beta.argsort()[-top_n:][::-1] + return [vocab[i] for i in top_indices] + +def load_time_labels(time2id_path): + with open(time2id_path, 'r') as f: + time2id = json.load(f) + # Invert and sort by id + id2time = {v: k for k, v in time2id.items()} + return [id2time[i] for i in sorted(id2time)] \ No newline at end of file diff --git a/backend/inference/word_selector.py b/backend/inference/word_selector.py new file mode 100644 index 0000000000000000000000000000000000000000..06a35a4b5fe7a685e4355ad31a8c33feb432c4f9 --- /dev/null +++ b/backend/inference/word_selector.py @@ -0,0 +1,102 @@ +import numpy as np +from scipy.special import softmax + +def get_interesting_words(beta, vocab, topic_id, top_k_final=10, restrict_to=None): + """ + Suggests interesting words by prioritizing "bursty" or "emerging" terms, + making it effective at capturing important low-probability words. + + This algorithm focuses on the ratio of a word's peak probability to its mean, + capturing words that show significant growth or have a sudden moment of high + relevance, even if their average probability is low. + + Parameters: + - beta: np.ndarray (T, K, V) - Topic-word distributions for each timestamp. + - vocab: list of V words - The vocabulary. + - topic_id: int - The ID of the topic to analyze. + - top_k_final: int - The number of words to return. + - restrict_to: optional list of str - Restricts scoring to a subset of words. + + Returns: + - list of top_k_final interesting words (strings). + """ + T, K, V = beta.shape + + # --- 1. Detect whether softmax is needed --- + row_sums = beta.sum(axis=2) + is_prob_dist = np.allclose(row_sums, 1.0, atol=1e-2) + + if not is_prob_dist: + print("🔁 Beta is not normalized — applying softmax across words per topic.") + beta = softmax(beta / 1e-3, axis=2) + + # --- 2. Now extract normalized topic slice --- + topic_beta = beta[:, topic_id, :] # Shape: (T, V) + + # Mean and Peak probability within the topic for each word + mean_topic = topic_beta.mean(axis=0) # Shape: (V,) + peak_topic = topic_beta.max(axis=0) # Shape: (V,) + + # Corpus-wide mean for baseline comparison + mean_all = beta.mean(axis=(0, 1)) # Shape: (V,) + + # Epsilon to prevent division by zero for words that never appear + epsilon = 1e-9 + + # --- 3. Calculate the three core components of the new score --- + + # a) Burstiness Score: How much a word's peak stands out from its own average. + # This is the key to finding "surprising" words. + burstiness_score = peak_topic / (mean_topic + epsilon) + + # b) Peak Specificity: How much the word's peak in this topic stands out from + # its average presence in the entire corpus. + peak_specificity_score = peak_topic / (mean_all + epsilon) + + # c) Uniqueness Score (same as before): Penalizes words active in many topics. + active_in_topics = (beta > 1e-5).mean(axis=0) # Shape: (K, V) + idf_like = np.log((K + 1) / (active_in_topics.sum(axis=0) + 1)) # Shape: (V,) + + # --- 4. Compute Final Interestingness Score --- + # This score is high for words that are unique, have a high peak relative + # to their baseline, and whose peak is an unusual event for that word. + final_scores = burstiness_score * peak_specificity_score * idf_like + + # --- 5. Rank and select top words --- + if restrict_to is not None: + restrict_set = set(restrict_to) + word_indices = [i for i, w in enumerate(vocab) if w in restrict_set] + else: + word_indices = np.arange(V) + + if not word_indices: + return [] + + # Rank the filtered indices by the final score in descending order + sorted_indices = sorted(word_indices, key=lambda i: -final_scores[i]) + + return [vocab[i] for i in sorted_indices[:top_k_final]] + + +def get_word_trend(beta, vocab, word, topic_id): + """ + Get the time trend of a word's probability under a specific topic. + + Args: + beta: np.ndarray of shape (T, K, V) + vocab: list of vocab words + word: word to search + topic_id: index of topic to inspect (0 <= topic_id < K) + + Returns: + List of word probabilities over time (length T) + """ + T, K, V = beta.shape + if word not in vocab: + raise ValueError(f"Word '{word}' not found in vocab.") + if not (0 <= topic_id < K): + raise ValueError(f"Invalid topic_id {topic_id}. Must be between 0 and {K - 1}.") + + word_index = vocab.index(word) + trend = beta[:, topic_id, word_index] # shape (T,) + return trend.tolist() \ No newline at end of file diff --git a/backend/llm/custom_gemini.py b/backend/llm/custom_gemini.py new file mode 100644 index 0000000000000000000000000000000000000000..776d7d183e5d3b431f6b9300d7e12b7b7e6a8946 --- /dev/null +++ b/backend/llm/custom_gemini.py @@ -0,0 +1,28 @@ +from langchain_google_genai import ChatGoogleGenerativeAI +from langchain_core.messages import AIMessage, HumanMessage +from langchain_core.language_models.chat_models import BaseChatModel +from typing import List + + +class ChatGemini(BaseChatModel): + def __init__(self, api_key: str, model: str = "gemini-pro", temperature: float = 0.7): + self.model = model + self.temperature = temperature + self.api_key = api_key + self.client = ChatGoogleGenerativeAI( + model=model, + temperature=temperature, + google_api_key=api_key + ) + + def _generate(self, messages: List, stop: List[str] = None): + # Convert LangChain messages to string + prompt = "\n".join( + msg.content for msg in messages if isinstance(msg, (HumanMessage, AIMessage)) + ) + response = self.client.invoke(prompt) + return response + + @property + def _llm_type(self) -> str: + return "gemini" diff --git a/backend/llm/custom_mistral.py b/backend/llm/custom_mistral.py new file mode 100644 index 0000000000000000000000000000000000000000..c6d2eb781c2d7c1fbf0cd3cf92a02fd5b3354bf8 --- /dev/null +++ b/backend/llm/custom_mistral.py @@ -0,0 +1,27 @@ +from langchain_core.language_models.chat_models import BaseChatModel +from langchain_core.messages import HumanMessage, AIMessage +from langchain_core.outputs import ChatResult, ChatGeneration +import requests +import os + +class ChatMistral(BaseChatModel): + def __init__(self, hf_token=None, model_url=None): + self.hf_token = hf_token or os.getenv("HF_TOKEN") + self.model_url = model_url or "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.1" + self.headers = {"Authorization": f"Bearer {self.hf_token}"} + + def _call(self, prompt: str) -> str: + response = requests.post( + self.model_url, + headers=self.headers, + json={"inputs": prompt, "parameters": {"max_new_tokens": 256}}, + ) + return response.json()[0]["generated_text"] + + def invoke(self, messages, **kwargs): + prompt = "\n".join([msg.content for msg in messages if isinstance(msg, HumanMessage)]) + response = self._call(prompt) + return AIMessage(content=response) + + def _generate(self, messages, stop=None, **kwargs) -> ChatResult: + return ChatResult(generations=[ChatGeneration(message=self.invoke(messages))]) diff --git a/backend/llm/llm_router.py b/backend/llm/llm_router.py new file mode 100644 index 0000000000000000000000000000000000000000..d8bf66d9932e6152399a8dd38b597460ce100667 --- /dev/null +++ b/backend/llm/llm_router.py @@ -0,0 +1,73 @@ +from langchain_anthropic import ChatAnthropic +from backend.llm.custom_mistral import ChatMistral +from langchain_google_genai import ChatGoogleGenerativeAI +from langchain_openai import ChatOpenAI +import os +import google.auth.transport.requests +import requests + +resp = requests.get("https://www.google.com", proxies={ + "http": os.getenv("http_proxy"), + "https": os.getenv("https_proxy") +}) + +def list_supported_models(provider=None): + if provider == "OpenAI": + return ["gpt-4.1-nano", "gpt-4o-mini"] + elif provider == "Anthropic": + return ["claude-3-opus-20240229", "claude-3-sonnet-20240229"] + elif provider == "Gemini": + return ["gemini-2.0-flash-lite", "gemini-1.5-flash"] + elif provider == "Mistral": + return ["mistral-small", "mistral-medium"] + else: + # Default fallback: all models grouped by provider + return { + "OpenAI": ["gpt-4.1-nano", "gpt-4o-mini"], + "Anthropic": ["claude-3-opus-20240229", "claude-3-sonnet-20240229"], + "Gemini": ["gemini-2.0-flash-lite", "gemini-1.5-flash"], + "Mistral": ["mistral-small", "mistral-medium"] + } + + +def get_llm(provider: str, model: str, api_key: str = None): + if provider == "OpenAI": + api_key = api_key or os.getenv("OPENAI_API_KEY") + if not api_key: + raise ValueError("Missing OpenAI API key.") + return ChatOpenAI(model_name=model, temperature=0, openai_api_key=api_key) + + elif provider == "Anthropic": + api_key = api_key or os.getenv("ANTHROPIC_API_KEY") + if not api_key: + raise ValueError("Missing Anthropic API key.") + return ChatAnthropic(model=model, temperature=0, anthropic_api_key=api_key) + + elif provider == "Gemini": + api_key = api_key or os.getenv("GEMINI_API_KEY") + if not api_key: + raise ValueError("Missing Gemini API key.") + # --- Patch: Set proxy if available --- + if "HTTP_PROXY" in os.environ or "http_proxy" in os.environ: + + proxies = { + "http": os.getenv("http_proxy") or os.getenv("HTTP_PROXY"), + "https": os.getenv("https_proxy") or os.getenv("HTTPS_PROXY") + } + + google.auth.transport.requests.requests.Request = lambda *args, **kwargs: requests.Request( + *args, **kwargs, proxies=proxies + ) + + return ChatGoogleGenerativeAI(model=model, temperature=0, google_api_key=api_key) + + + elif provider == "Mistral": + api_key = api_key or os.getenv("MISTRAL_API_KEY") + if not api_key: + raise ValueError("Missing Mistral API key.") + return ChatMistral(model=model, temperature=0, mistral_api_key=api_key) + + else: + raise ValueError(f"Unsupported provider: {provider}") + diff --git a/backend/llm_utils/label_generator.py b/backend/llm_utils/label_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..441a61e5fbcddb79184f90f232f6e292dff18199 --- /dev/null +++ b/backend/llm_utils/label_generator.py @@ -0,0 +1,72 @@ +from hashlib import sha256 +import json +from langchain_core.prompts import ChatPromptTemplate +from langchain_core.output_parsers import StrOutputParser +from typing import Optional +import os + +#get_top_words_at_time +from backend.inference.process_beta import get_top_words_at_time + +def label_topic_temporal(word_trajectory_str: str, llm, cache_path: Optional[str] = None) -> str: + """ + Label a dynamic topic by providing the LLM with the top words over time. + + Args: + word_trajectory_str (str): Formatted keyword evolution string. + llm: LangChain-compatible LLM instance. + cache_path (Optional[str]): Path to the cache file (JSON). + + Returns: + str: Short label for the topic. + """ + topic_key = sha256(word_trajectory_str.encode()).hexdigest() + + # Load cache + if cache_path is not None and os.path.exists(cache_path): + with open(cache_path, "r") as f: + label_cache = json.load(f) + else: + label_cache = {} + + # Return cached result + if topic_key in label_cache: + return label_cache[topic_key] + + # Prompt template + prompt = ChatPromptTemplate.from_template( + "You are an expert in topic modeling and temporal data analysis. " + "Given the top words for a topic across multiple time points, your task is to return a short, specific, descriptive topic label. " + "Avoid vague, generic, or overly broad labels. Focus on consistent themes in the top words over time. " + "Use concise noun phrases, 2–5 words max. Do NOT include any explanation, justification, or extra output.\n\n" + "Top words over time:\n{trajectory}\n\n" + "Return ONLY the label (no quotes, no extra text):" + ) + chain = prompt | llm | StrOutputParser() + + try: + label = chain.invoke({"trajectory": word_trajectory_str}).strip() + except Exception as e: + label = "Unknown Topic" + print(f"[Labeling Error] {e}") + + # Update cache and save + label_cache[topic_key] = label + if cache_path is not None: + os.makedirs(os.path.dirname(cache_path), exist_ok=True) + with open(cache_path, "w") as f: + json.dump(label_cache, f, indent=2) + + return label + + +def get_topic_labels(beta, vocab, time_labels, llm, cache_path): + topic_labels = {} + for topic_id in range(beta.shape[1]): + word_trajectory_str = "\n".join([ + f"{time_labels[t]}: {', '.join(get_top_words_at_time(beta, vocab, topic_id, t, top_n=10))}" + for t in range(beta.shape[0]) + ]) + label = label_topic_temporal(word_trajectory_str, llm=llm, cache_path=cache_path) + topic_labels[topic_id] = label + return topic_labels \ No newline at end of file diff --git a/backend/llm_utils/summarizer.py b/backend/llm_utils/summarizer.py new file mode 100644 index 0000000000000000000000000000000000000000..6188e9cef0e099b832fd579cc130db8bcdde7b60 --- /dev/null +++ b/backend/llm_utils/summarizer.py @@ -0,0 +1,192 @@ +import hashlib +import numpy as np +from sklearn.metrics.pairwise import cosine_similarity +from sentence_transformers import SentenceTransformer +import faiss + +from langchain.prompts import ChatPromptTemplate +from langchain.docstore.document import Document +from langchain.memory import ConversationBufferMemory +from langchain.chains import ConversationChain + +import os +os.environ["TOKENIZERS_PARALLELISM"] = "false" + + +# --- MMR Utilities --- +def build_mmr_index(docs): + texts = [doc['text'] for doc in docs if 'text' in doc] + documents = [Document(page_content=text) for text in texts] + + model = SentenceTransformer("all-MiniLM-L6-v2") + embeddings = model.encode([doc.page_content for doc in documents], convert_to_numpy=True) + faiss.normalize_L2(embeddings) + + index = faiss.IndexFlatIP(embeddings.shape[1]) + index.add(embeddings) + + return model, index, embeddings, documents + +def get_mmr_sample(model, index, embeddings, documents, query, k=15, lambda_mult=0.7): + if len(documents) == 0: + print("Warning: No documents available, returning empty list.") + return [] + + if len(documents) <= k: + print(f"Warning: Only {len(documents)} documents available, returning all.") + return documents + + else: + query_vec = model.encode(query, convert_to_numpy=True) + query_vec = query_vec / np.linalg.norm(query_vec) + + # Get candidate indices from FAISS (k * 4 or less if not enough documents) + num_candidates = min(k * 4, len(documents)) + D, I = index.search(np.expand_dims(query_vec, axis=0), num_candidates) + candidate_idxs = list(I[0]) + + selected = [] + while len(selected) < k and candidate_idxs: + if not selected: + selected.append(candidate_idxs.pop(0)) + continue + + mmr_scores = [] + for idx in candidate_idxs: + relevance = cosine_similarity([query_vec], [embeddings[idx]])[0][0] + diversity = max([ + cosine_similarity([embeddings[idx]], [embeddings[sel]])[0][0] + for sel in selected + ]) + mmr_score = lambda_mult * relevance - (1 - lambda_mult) * diversity + mmr_scores.append((idx, mmr_score)) + + next_best = max(mmr_scores, key=lambda x: x[1])[0] + selected.append(next_best) + candidate_idxs.remove(next_best) + + return [documents[i] for i in selected] + + +# --- Summarization --- +def summarize_docs(word, timestamp, docs, llm, k): + if not docs: + return "No documents available for this word at this time.", [], 0 + + try: + model, index, embeddings, documents = build_mmr_index(docs) + mmr_docs = get_mmr_sample(model, index, embeddings, documents, query=word, k=k) + + context_texts = "\n".join(f"- {doc.page_content}" for doc in mmr_docs) + + prompt_template = ChatPromptTemplate.from_template( + "Given the following documents from {timestamp} containing the word '{word}', " + "identify the key themes or distinct discussion points that were prevalent during that time. " + "Do NOT describe each bullet in detail. Be concise. Each bullet should be a short phrase or sentence " + "capturing a unique, non-overlapping theme. Avoid any elaboration, examples, or justification.\n\n" + "Return no more than 5–7 bullets.\n\n" + "{context_texts}\n\nSummary:" + ) + + chain = prompt_template | llm + summary = chain.invoke({ + "word": word, + "timestamp": timestamp, + "context_texts": context_texts + }).content.strip() + + return summary, mmr_docs + + except Exception as e: + return f"[Error summarizing: {e}]", [], 0 + + +def summarize_multiword_docs(words, timestamp, docs, llm, k): + if not docs: + return "No common documents available for these words at this time.", [] + + try: + model, index, embeddings, documents = build_mmr_index(docs) + query = " ".join(words) + mmr_docs = get_mmr_sample(model, index, embeddings, documents, query=query, k=k) + + context_texts = "\n".join(f"- {doc.page_content}" for doc in mmr_docs) + + prompt_template = ChatPromptTemplate.from_template( + "Given the following documents from {timestamp} that all mention the words: '{word_list}', " + "identify the key themes or distinct discussion points that were prevalent during that time. " + "Do NOT describe each bullet in detail. Be concise. Each bullet should be a short phrase or sentence " + "capturing a unique, non-overlapping theme. Avoid any elaboration, examples, or justification.\n\n" + "Return no more than 5–7 bullets.\n\n" + "{context_texts}\n\n" + "Concise Thematic Summary:" + ) + + chain = prompt_template | llm + summary = chain.invoke({ + "word_list": ", ".join(words), + "timestamp": timestamp, + "context_texts": context_texts + }).content.strip() + + return summary, mmr_docs + + except Exception as e: + return f"[Error summarizing: {e}]", [] + + +# --- Follow-up Question Handler (Improved) --- +def ask_multiturn_followup(history: list, question: str, llm, context_texts: str) -> str: + """ + Handles multi-turn follow-up questions based on a provided set of documents. + + This function now REQUIRES context_texts to be provided, ensuring the LLM + is always grounded in the source documents for follow-up questions. + + Args: + history (list): A list of dictionaries representing the conversation history + (e.g., [{"role": "user", "content": "..."}]). + question (str): The user's new follow-up question. + llm: The initialized language model instance. + context_texts (str): A single string containing all the numbered documents + for context. + + Returns: + str: The AI's response to the follow-up question. + """ + try: + # 1. Reconstruct conversation memory from the history provided from the UI + memory = ConversationBufferMemory(return_messages=True) + for turn in history: + if turn["role"] == "user": + memory.chat_memory.add_user_message(turn["content"]) + elif turn["role"] == "assistant": + memory.chat_memory.add_ai_message(turn["content"]) + + # 2. Define the system instruction that grounds the LLM + system_instruction = ( + "You are an assistant answering questions strictly based on the provided sample documents below. " + "Your memory contains the previous turns of this conversation. " + "If the answer is not clearly available in the text, respond with: " + "'The information is not available in the documents provided.'\n\n" + ) + + # 3. Create the full prompt. No more conditional logic, as context is required. + # The `ConversationChain` will automatically use the memory, so we only need + # to provide the current input, which includes the grounding documents. + full_prompt = ( + f"{system_instruction}" + f"--- DOCUMENTS ---\n{context_texts.strip()}\n\n" + f"--- QUESTION ---\n{question}" + ) + + # 4. Create and run the conversation chain + conversation = ConversationChain(llm=llm, memory=memory, verbose=False) + response = conversation.predict(input=full_prompt) + + return response.strip() + + except Exception as e: + # Good practice to log the full exception for easier debugging + print(f"[ERROR] in ask_multiturn_followup: {e}") + return f"[Error during multi-turn follow-up. Please check the logs.]" \ No newline at end of file diff --git a/backend/llm_utils/token_utils.py b/backend/llm_utils/token_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..48c14f08a823432d49d24e7f6f3781687efe31c0 --- /dev/null +++ b/backend/llm_utils/token_utils.py @@ -0,0 +1,167 @@ +from typing import Literal +import tiktoken +import anthropic +from typing import List + +# Gemini requires the Vertex AI SDK +try: + from vertexai.preview import tokenization as vertex_tokenization +except ImportError: + vertex_tokenization = None + +# Mistral requires the SentencePiece tokenizer +try: + import sentencepiece as spm +except ImportError: + spm = None + +# --------------------------- +# Individual Token Counters +# --------------------------- + +def count_tokens_openai(text: str, model_name: str) -> int: + try: + encoding = tiktoken.encoding_for_model(model_name) + except KeyError: + encoding = tiktoken.get_encoding("cl100k_base") # fallback + return len(encoding.encode(text)) + +def count_tokens_anthropic(text: str, model_name: str) -> int: + try: + client = anthropic.Anthropic() + response = client.messages.count_tokens( + model=model_name, + messages=[{"role": "user", "content": text}] + ) + return response['input_tokens'] + except Exception as e: + raise RuntimeError(f"Anthropic token counting failed: {e}") + +def count_tokens_gemini(text: str, model_name: str) -> int: + if vertex_tokenization is None: + raise ImportError("Please install vertexai: pip install google-cloud-aiplatform[tokenization]") + try: + tokenizer = vertex_tokenization.get_tokenizer_for_model("gemini-1.5-flash-002") + result = tokenizer.count_tokens(text) + return result.total_tokens + except Exception as e: + raise RuntimeError(f"Gemini token counting failed: {e}") + +def count_tokens_mistral(text: str) -> int: + if spm is None: + raise ImportError("Please install sentencepiece: pip install sentencepiece") + try: + sp = spm.SentencePieceProcessor() + # IMPORTANT: You must provide the correct path to the tokenizer model file + sp.load("mistral_tokenizer.model") + tokens = sp.encode(text, out_type=str) + return len(tokens) + except Exception as e: + raise RuntimeError(f"Mistral token counting failed: {e}") + +# --------------------------- +# Unified Token Counter +# --------------------------- + +def count_tokens(text: str, model_name: str, provider: Literal["OpenAI", "Anthropic", "Gemini", "Mistral"]) -> int: + if provider == "OpenAI": + return count_tokens_openai(text, model_name) + elif provider == "Anthropic": + return count_tokens_anthropic(text, model_name) + elif provider == "Gemini": + return count_tokens_gemini(text, model_name) + elif provider == "Mistral": + return count_tokens_mistral(text) + else: + raise ValueError(f"Unsupported provider: {provider}") + + +def get_token_limit_for_model(model_name, provider): + # Example values; update as needed for your providers + if provider == "openai": + if "gpt-4.1-nano" in model_name: + return 1047576 # Based on search results + elif "gpt-4o-mini" in model_name: + return 128000 # Based on search results + elif provider == "anthropic": + if "claude-3-opus" in model_name: + return 200000 # Based on search results + elif "claude-3-sonnet" in model_name: + return 200000 # Based on search results + elif provider == "gemini": + if "gemini-2.0-flash-lite" in model_name: + return 1048576 # Based on search results + elif "gemini-1.5-flash" in model_name: + return 1048576 # Based on search results + elif provider == "mistral": + if "mistral-small" in model_name: + return 32000 # Based on search results + elif "mistral-medium" in model_name: + return 32000 # Based on search results + return 8000 # default fallback + + +def estimate_avg_tokens_per_doc( + docs: List[str], + model_name: str, + provider: Literal["OpenAI", "Anthropic", "Gemini", "Mistral"] +) -> float: + """ + Estimate the average number of tokens per document for the given model. + + Args: + docs (List[str]): List of documents. + model_name (str): Model name. + provider (Literal): LLM provider. + + Returns: + float: Average number of tokens per document. + """ + if not docs: + return 0.0 + token_counts = [count_tokens(doc, model_name, provider) for doc in docs] + return sum(token_counts) / len(token_counts) + +def estimate_max_k( + docs: List[str], + model_name: str, + provider: Literal["OpenAI", "Anthropic", "Gemini", "Mistral"], + margin_ratio: float = 0.1, +) -> int: + """ + Estimate the maximum number of documents that can fit in the context window. + + Returns: + int: Estimated K. + """ + if not docs: + return 0 + + max_tokens = get_token_limit_for_model(model_name, provider) + margin = int(max_tokens * margin_ratio) + available_tokens = max_tokens - margin + + avg_tokens_per_doc = estimate_avg_tokens_per_doc(docs, model_name, provider) + if avg_tokens_per_doc == 0: + return 0 + + return min(len(docs), int(available_tokens // avg_tokens_per_doc)) + +def estimate_max_k_fast(docs, margin_ratio=0.1, max_tokens=8000, model_name="gpt-3.5-turbo"): + enc = tiktoken.encoding_for_model(model_name) + avg_len = sum(len(enc.encode(doc)) for doc in docs[:20]) / min(20, len(docs)) + margin = int(max_tokens * margin_ratio) + available = max_tokens - margin + return min(len(docs), int(available // avg_len)) + +def estimate_k_max_from_word_stats( + avg_words_per_doc: float, + margin_ratio: float = 0.1, + avg_tokens_per_word: float = 1.3, + model_name=None, + provider=None +) -> int: + model_token_limit = get_token_limit_for_model(model_name, provider) + effective_limit = int(model_token_limit * (1 - margin_ratio)) + est_tokens_per_doc = avg_words_per_doc * avg_tokens_per_word + return int(effective_limit // est_tokens_per_doc) \ No newline at end of file diff --git a/backend/models/CFDTM/CFDTM.py b/backend/models/CFDTM/CFDTM.py new file mode 100644 index 0000000000000000000000000000000000000000..4324004f7674bd2d53785488f69e5f7287b72b21 --- /dev/null +++ b/backend/models/CFDTM/CFDTM.py @@ -0,0 +1,127 @@ +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F + +from .ETC import ETC +from .UWE import UWE +from .Encoder import MLPEncoder + + +class CFDTM(nn.Module): + ''' + Modeling Dynamic Topics in Chain-Free Fashion by Evolution-Tracking Contrastive Learning and Unassociated Word Exclusion. ACL 2024 Findings + + Xiaobao Wu, Xinshuai Dong, Liangming Pan, Thong Nguyen, Anh Tuan Luu. + ''' + + def __init__(self, + vocab_size, + train_time_wordfreq, + num_times, + pretrained_WE=None, + num_topics=50, + en_units=100, + temperature=0.1, + beta_temp=1.0, + weight_neg=1.0e+7, + weight_pos=1.0e+1, + weight_UWE=1.0e+3, + neg_topk=15, + dropout=0., + embed_size=200 + ): + super().__init__() + + self.num_topics = num_topics + self.beta_temp = beta_temp + self.train_time_wordfreq = train_time_wordfreq + self.encoder = MLPEncoder(vocab_size, num_topics, en_units, dropout) + + self.a = 1 * np.ones((1, num_topics)).astype(np.float32) + self.mu2 = nn.Parameter(torch.as_tensor((np.log(self.a).T - np.mean(np.log(self.a), 1)).T)) + self.var2 = nn.Parameter(torch.as_tensor((((1.0 / self.a) * (1 - (2.0 / num_topics))).T + (1.0 / (num_topics * num_topics)) * np.sum(1.0 / self.a, 1)).T)) + + self.mu2.requires_grad = False + self.var2.requires_grad = False + + self.decoder_bn = nn.BatchNorm1d(vocab_size, affine=False) + + if pretrained_WE is None: + self.word_embeddings = nn.init.trunc_normal_(torch.empty(vocab_size, embed_size), std=0.1) + self.word_embeddings = nn.Parameter(F.normalize(self.word_embeddings)) + + else: + self.word_embeddings = nn.Parameter(torch.from_numpy(pretrained_WE).float()) + + # topic_embeddings: TxKxD + self.topic_embeddings = nn.init.xavier_normal_(torch.zeros(num_topics, self.word_embeddings.shape[1])).repeat(num_times, 1, 1) + self.topic_embeddings = nn.Parameter(self.topic_embeddings) + + self.ETC = ETC(num_times, temperature, weight_neg, weight_pos) + self.UWE = UWE(self.ETC, num_times, temperature, weight_UWE, neg_topk) + + def get_beta(self): + dist = self.pairwise_euclidean_dist(F.normalize(self.topic_embeddings, dim=-1), F.normalize(self.word_embeddings, dim=-1)) + beta = F.softmax(-dist / self.beta_temp, dim=1) + + return beta + + def pairwise_euclidean_dist(self, x, y): + cost = torch.sum(x ** 2, axis=-1, keepdim=True) + torch.sum(y ** 2, axis=-1) - 2 * torch.matmul(x, y.t()) + return cost + + def get_theta(self, x, times=None): + theta, mu, logvar = self.encoder(x) + if self.training: + return theta, mu, logvar + + return theta + + def get_KL(self, mu, logvar): + var = logvar.exp() + var_division = var / self.var2 + diff = mu - self.mu2 + diff_term = diff * diff / self.var2 + logvar_division = self.var2.log() - logvar + KLD = 0.5 * ((var_division + diff_term + logvar_division).sum(axis=1) - self.num_topics) + + return KLD.mean() + + def get_NLL(self, theta, beta, x, recon_x=None): + if recon_x is None: + recon_x = self.decode(theta, beta) + recon_loss = -(x * recon_x.log()).sum(axis=1) + + return recon_loss + + def decode(self, theta, beta): + d1 = F.softmax(self.decoder_bn(torch.bmm(theta.unsqueeze(1), beta).squeeze(1)), dim=-1) + return d1 + + def forward(self, x, times): + loss = 0. + + theta, mu, logvar = self.get_theta(x) + kl_theta = self.get_KL(mu, logvar) + + loss += kl_theta + + beta = self.get_beta() + time_index_beta = beta[times] + recon_x = self.decode(theta, time_index_beta) + NLL = self.get_NLL(theta, time_index_beta, x, recon_x) + NLL = NLL.mean() + loss += NLL + + loss_ETC = self.ETC(self.topic_embeddings) + loss += loss_ETC + + loss_UWE = self.UWE(self.train_time_wordfreq, beta, self.topic_embeddings, self.word_embeddings) + loss += loss_UWE + + rst_dict = { + 'loss': loss, + } + + return rst_dict diff --git a/backend/models/CFDTM/ETC.py b/backend/models/CFDTM/ETC.py new file mode 100644 index 0000000000000000000000000000000000000000..fa1eb74ac062078680c5ddea70eb1aec9827f41d --- /dev/null +++ b/backend/models/CFDTM/ETC.py @@ -0,0 +1,62 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class ETC(nn.Module): + def __init__(self, num_times, temperature, weight_neg, weight_pos): + super().__init__() + self.num_times = num_times + self.weight_neg = weight_neg + self.weight_pos = weight_pos + self.temperature = temperature + + def forward(self, topic_embeddings): + loss = 0. + loss_neg = 0. + loss_pos = 0. + + for t in range(self.num_times): + loss_neg += self.compute_loss(topic_embeddings[t], topic_embeddings[t], self.temperature, self_contrast=True) + + for t in range(1, self.num_times): + loss_pos += self.compute_loss(topic_embeddings[t], topic_embeddings[t - 1].detach(), self.temperature, self_contrast=False, only_pos=True) + + loss_neg *= (self.weight_neg / self.num_times) + loss_pos *= (self.weight_pos / (self.num_times - 1)) + loss = loss_neg + loss_pos + + return loss + + def compute_loss(self, anchor_feature, contrast_feature, temperature, self_contrast=False, only_pos=False, all_neg=False): + # KxK + anchor_dot_contrast = torch.div( + torch.matmul(F.normalize(anchor_feature, dim=1), F.normalize(contrast_feature, dim=1).T), + temperature + ) + + logits_max, _ = torch.max(anchor_dot_contrast, dim=1, keepdim=True) + logits = anchor_dot_contrast - logits_max.detach() + + pos_mask = torch.eye(anchor_dot_contrast.shape[0]).to(anchor_dot_contrast.device) + + if self_contrast is False: + if only_pos is False: + if all_neg is True: + exp_logits = torch.exp(logits) + sum_exp_logits = exp_logits.sum(1) + log_prob = -torch.log(sum_exp_logits + 1e-12) + + mean_log_prob = -log_prob.sum() / (logits.shape[0] * logits.shape[1]) + else: + # only pos + mean_log_prob = -(logits * pos_mask).sum() / pos_mask.sum() + else: + # self contrast: push away from each other in the same time slice. + exp_logits = torch.exp(logits) * (1 - pos_mask) + sum_exp_logits = exp_logits.sum(1) + log_prob = -torch.log(sum_exp_logits + 1e-12) + + mean_log_prob = -log_prob.sum() / (1 - pos_mask).sum() + + return mean_log_prob diff --git a/backend/models/CFDTM/Encoder.py b/backend/models/CFDTM/Encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..92510dcdaf233fda0cb2d2b56df532f52d6de657 --- /dev/null +++ b/backend/models/CFDTM/Encoder.py @@ -0,0 +1,40 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class MLPEncoder(nn.Module): + def __init__(self, vocab_size, num_topic, hidden_dim, dropout): + super().__init__() + + self.fc11 = nn.Linear(vocab_size, hidden_dim) + self.fc12 = nn.Linear(hidden_dim, hidden_dim) + self.fc21 = nn.Linear(hidden_dim, num_topic) + self.fc22 = nn.Linear(hidden_dim, num_topic) + + self.fc1_drop = nn.Dropout(dropout) + self.z_drop = nn.Dropout(dropout) + + self.mean_bn = nn.BatchNorm1d(num_topic, affine=True) + self.mean_bn.weight.requires_grad = False + self.logvar_bn = nn.BatchNorm1d(num_topic, affine=True) + self.logvar_bn.weight.requires_grad = False + + def reparameterize(self, mu, logvar): + if self.training: + std = torch.exp(0.5 * logvar) + eps = torch.randn_like(std) + return mu + (eps * std) + else: + return mu + + def forward(self, x): + e1 = F.softplus(self.fc11(x)) + e1 = F.softplus(self.fc12(e1)) + e1 = self.fc1_drop(e1) + mu = self.mean_bn(self.fc21(e1)) + logvar = self.logvar_bn(self.fc22(e1)) + theta = self.reparameterize(mu, logvar) + theta = F.softmax(theta, dim=1) + theta = self.z_drop(theta) + return theta, mu, logvar diff --git a/backend/models/CFDTM/UWE.py b/backend/models/CFDTM/UWE.py new file mode 100644 index 0000000000000000000000000000000000000000..dda97d8ee8a78eaeb1f22528c4b0a542afa1c91b --- /dev/null +++ b/backend/models/CFDTM/UWE.py @@ -0,0 +1,48 @@ +import torch +import torch.nn as nn + + +class UWE(nn.Module): + def __init__(self, ETC, num_times, temperature, weight_UWE, neg_topk): + super().__init__() + + self.ETC = ETC + self.weight_UWE = weight_UWE + self.num_times = num_times + self.temperature = temperature + self.neg_topk = neg_topk + + def forward(self, time_wordcount, beta, topic_embeddings, word_embeddings): + assert(self.num_times == time_wordcount.shape[0]) + + topk_indices = self.get_topk_indices(beta) + + loss_UWE = 0. + cnt_valid_times = 0. + for t in range(self.num_times): + neg_idx = torch.where(time_wordcount[t] == 0)[0] + + time_topk_indices = topk_indices[t] + neg_idx = list(set(neg_idx.cpu().tolist()).intersection(set(time_topk_indices.cpu().tolist()))) + neg_idx = torch.tensor(neg_idx).long().to(time_wordcount.device) + + if len(neg_idx) == 0: + continue + + time_neg_WE = word_embeddings[neg_idx] + + # topic_embeddings[t]: K x D + # word_embeddings[neg_idx]: |V_{neg}| x D + loss_UWE += self.ETC.compute_loss(topic_embeddings[t], time_neg_WE, temperature=self.temperature, all_neg=True) + cnt_valid_times += 1 + + if cnt_valid_times > 0: + loss_UWE *= (self.weight_UWE / cnt_valid_times) + + return loss_UWE + + def get_topk_indices(self, beta): + # topk_indices: T x K x neg_topk + topk_indices = torch.topk(beta, k=self.neg_topk, dim=-1).indices + topk_indices = torch.flatten(topk_indices, start_dim=1) + return topk_indices diff --git a/backend/models/CFDTM/__init__.py b/backend/models/CFDTM/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/backend/models/CFDTM/__pycache__/CFDTM.cpython-39.pyc b/backend/models/CFDTM/__pycache__/CFDTM.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cf1b7309ac980d553f4139b5135edfd5c6e4843f Binary files /dev/null and b/backend/models/CFDTM/__pycache__/CFDTM.cpython-39.pyc differ diff --git a/backend/models/CFDTM/__pycache__/ETC.cpython-39.pyc b/backend/models/CFDTM/__pycache__/ETC.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..eb2c8b35b98314d8f97bef53afcce509d005638d Binary files /dev/null and b/backend/models/CFDTM/__pycache__/ETC.cpython-39.pyc differ diff --git a/backend/models/CFDTM/__pycache__/Encoder.cpython-39.pyc b/backend/models/CFDTM/__pycache__/Encoder.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3c99da65b2b8d0fc2d04079fc2da48a55ac13adb Binary files /dev/null and b/backend/models/CFDTM/__pycache__/Encoder.cpython-39.pyc differ diff --git a/backend/models/CFDTM/__pycache__/UWE.cpython-39.pyc b/backend/models/CFDTM/__pycache__/UWE.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..aec5ec4377c2267065c0061f47fe07426df368bc Binary files /dev/null and b/backend/models/CFDTM/__pycache__/UWE.cpython-39.pyc differ diff --git a/backend/models/CFDTM/__pycache__/__init__.cpython-39.pyc b/backend/models/CFDTM/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5e3c079a46d26c4300245cf6a9e638bae4729558 Binary files /dev/null and b/backend/models/CFDTM/__pycache__/__init__.cpython-39.pyc differ diff --git a/backend/models/DBERTopic_trainer.py b/backend/models/DBERTopic_trainer.py new file mode 100644 index 0000000000000000000000000000000000000000..5e1ae71820c5c0b18817bf04f87243908484a9e5 --- /dev/null +++ b/backend/models/DBERTopic_trainer.py @@ -0,0 +1,99 @@ +import numpy as np +from bertopic import BERTopic +from backend.datasets.utils import _utils +from backend.datasets.utils.logger import Logger + +logger = Logger("WARNING") + + +class DBERTopicTrainer: + def __init__(self, + dataset, + num_topics=20, + num_top_words=15, + nr_bins=20, + global_tuning=True, + evolution_tuning=True, + datetime_format=None, + verbose=False): + + self.dataset = dataset + self.docs = dataset.raw_documents + self.num_topics=num_topics + # self.timestamps = dataset.train_times + self.vocab = dataset.vocab + self.num_top_words = num_top_words + # self.nr_bins = nr_bins + # self.global_tuning = global_tuning + # self.evolution_tuning = evolution_tuning + # self.datetime_format = datetime_format + self.verbose = verbose + + if verbose: + logger.set_level("DEBUG") + else: + logger.set_level("WARNING") + + def train(self, timestamps, datetime_format='%Y'): + logger.info("Fitting BERTopic...") + self.model = BERTopic(nr_topics=self.num_topics, verbose=self.verbose) + self.topics, _ = self.model.fit_transform(self.docs) + + logger.info("Running topics_over_time...") + self.topics_over_time_df = self.model.topics_over_time( + docs=self.docs, + timestamps=timestamps, + nr_bins=len(set(timestamps)), + datetime_format=datetime_format + ) + + self.unique_timestamps = sorted(self.topics_over_time_df["Timestamp"].unique()) + self.unique_topics = sorted(self.topics_over_time_df["Topic"].unique()) + self.vocab = self.model.vectorizer_model.get_feature_names_out() + self.V = len(self.vocab) + self.K = len(self.unique_topics) + self.T = len(self.unique_timestamps) + + def get_beta(self): + logger.info("Generating β matrix...") + + beta = np.zeros((self.T, self.K, self.V)) + topic_to_index = {topic: idx for idx, topic in enumerate(self.unique_topics)} + timestamp_to_index = {timestamp: idx for idx, timestamp in enumerate(self.unique_timestamps)} + + # Extract topic representations at each time + for t_idx, timestamp in enumerate(self.unique_timestamps): + selection = self.topics_over_time_df[self.topics_over_time_df["Timestamp"] == timestamp] + for _, row in selection.iterrows(): + topic = row["Topic"] + words = row["Words"].split(", ") + if topic not in topic_to_index: + continue + k = topic_to_index[topic] + for word in words: + if word in self.vocab: + v = np.where(self.vocab == word)[0][0] + beta[t_idx, k, v] += 1.0 + + # Normalize each β_tk to be a probability distribution + beta = beta / (beta.sum(axis=2, keepdims=True) + 1e-10) + return beta + + def get_top_words(self, num_top_words=None): + if num_top_words is None: + num_top_words = self.num_top_words + beta = self.get_beta() + top_words_list = list() + for time in range(beta.shape[0]): + top_words = _utils.get_top_words(beta[time], self.vocab, num_top_words, self.verbose) + top_words_list.append(top_words) + return top_words_list + + def get_theta(self): + # Not applicable for BERTopic; can return topic assignments or soft topic distributions if required + logger.warning("get_theta is not implemented for BERTopic.") + return None + + def export_theta(self): + logger.warning("export_theta is not implemented for BERTopic.") + return None, None diff --git a/backend/models/DETM.py b/backend/models/DETM.py new file mode 100644 index 0000000000000000000000000000000000000000..f11796194532d1691c4a1588fa8c1e6de9b153f9 --- /dev/null +++ b/backend/models/DETM.py @@ -0,0 +1,259 @@ + +import torch +from torch import nn +import torch.nn.functional as F + + +class DETM(nn.Module): + """ + The Dynamic Embedded Topic Model. 2019 + + Adji B. Dieng, Francisco J. R. Ruiz, David M. Blei + """ + def __init__(self, vocab_size, num_times, train_size, train_time_wordfreq, + num_topics=50, train_WE=True, pretrained_WE=None, en_units=800, + eta_hidden_size=200, rho_size=300, enc_drop=0.0, eta_nlayers=3, + eta_dropout=0.0, delta=0.005, theta_act='relu', device='cpu'): + super().__init__() + + ## define hyperparameters + self.num_topics = num_topics + self.num_times = num_times + self.vocab_size = vocab_size + self.eta_hidden_size = eta_hidden_size + self.rho_size = rho_size + self.enc_drop = enc_drop + self.eta_nlayers = eta_nlayers + self.t_drop = nn.Dropout(enc_drop) + self.eta_dropout = eta_dropout + self.delta = delta + self.train_WE = train_WE + self.train_size = train_size + self.rnn_inp = train_time_wordfreq + self.device = device + + self.theta_act = self.get_activation(theta_act) + + ## define the word embedding matrix \rho + if self.train_WE: + self.rho = nn.Linear(self.rho_size, self.vocab_size, bias=False) + else: + rho = nn.Embedding(pretrained_WE.size()) + rho.weight.data = torch.from_numpy(pretrained_WE) + self.rho = rho.weight.data.clone().float().to(self.device) + + ## define the variational parameters for the topic embeddings over time (alpha) ... alpha is K x T x L + self.mu_q_alpha = nn.Parameter(torch.randn(self.num_topics, self.num_times, self.rho_size)) + self.logsigma_q_alpha = nn.Parameter(torch.randn(self.num_topics, self.num_times, self.rho_size)) + + ## define variational distribution for \theta_{1:D} via amortizartion... theta is K x D + self.q_theta = nn.Sequential( + nn.Linear(self.vocab_size + self.num_topics, en_units), + self.theta_act, + nn.Linear(en_units, en_units), + self.theta_act, + ) + self.mu_q_theta = nn.Linear(en_units, self.num_topics, bias=True) + self.logsigma_q_theta = nn.Linear(en_units, self.num_topics, bias=True) + + ## define variational distribution for \eta via amortizartion... eta is K x T + self.q_eta_map = nn.Linear(self.vocab_size, self.eta_hidden_size) + self.q_eta = nn.LSTM(self.eta_hidden_size, self.eta_hidden_size, self.eta_nlayers, dropout=self.eta_dropout) + self.mu_q_eta = nn.Linear(self.eta_hidden_size + self.num_topics, self.num_topics, bias=True) + self.logsigma_q_eta = nn.Linear(self.eta_hidden_size + self.num_topics, self.num_topics, bias=True) + + self.decoder_bn = nn.BatchNorm1d(vocab_size) + self.decoder_bn.weight.requires_grad = False + + def get_activation(self, act): + activations = { + 'tanh': nn.Tanh(), + 'relu': nn.ReLU(), + 'softplus': nn.Softplus(), + 'rrelu': nn.RReLU(), + 'leakyrelu': nn.LeakyReLU(), + 'elu': nn.ELU(), + 'selu': nn.SELU(), + 'glu': nn.GLU(), + } + + if act in activations: + act = activations[act] + else: + print('Defaulting to tanh activations...') + act = nn.Tanh() + return act + + def reparameterize(self, mu, logvar): + """Returns a sample from a Gaussian distribution via reparameterization. + """ + if self.training: + std = torch.exp(0.5 * logvar) + eps = torch.randn_like(std) + return eps.mul_(std).add_(mu) + else: + return mu + + def get_kl(self, q_mu, q_logsigma, p_mu=None, p_logsigma=None): + """Returns KL( N(q_mu, q_logsigma) || N(p_mu, p_logsigma) ). + """ + if p_mu is not None and p_logsigma is not None: + sigma_q_sq = torch.exp(q_logsigma) + sigma_p_sq = torch.exp(p_logsigma) + kl = ( sigma_q_sq + (q_mu - p_mu)**2 ) / ( sigma_p_sq + 1e-6 ) + kl = kl - 1 + p_logsigma - q_logsigma + kl = 0.5 * torch.sum(kl, dim=-1) + else: + kl = -0.5 * torch.sum(1 + q_logsigma - q_mu.pow(2) - q_logsigma.exp(), dim=-1) + return kl + + def get_alpha(self): ## mean field + alphas = torch.zeros(self.num_times, self.num_topics, self.rho_size).to(self.device) + kl_alpha = [] + + alphas[0] = self.reparameterize(self.mu_q_alpha[:, 0, :], self.logsigma_q_alpha[:, 0, :]) + + # TODO: why logsigma_p_0 is zero? + p_mu_0 = torch.zeros(self.num_topics, self.rho_size).to(self.device) + logsigma_p_0 = torch.zeros(self.num_topics, self.rho_size).to(self.device) + kl_0 = self.get_kl(self.mu_q_alpha[:, 0, :], self.logsigma_q_alpha[:, 0, :], p_mu_0, logsigma_p_0) + kl_alpha.append(kl_0) + for t in range(1, self.num_times): + alphas[t] = self.reparameterize(self.mu_q_alpha[:, t, :], self.logsigma_q_alpha[:, t, :]) + + p_mu_t = alphas[t - 1] + logsigma_p_t = torch.log(self.delta * torch.ones(self.num_topics, self.rho_size).to(self.device)) + kl_t = self.get_kl(self.mu_q_alpha[:, t, :], self.logsigma_q_alpha[:, t, :], p_mu_t, logsigma_p_t) + kl_alpha.append(kl_t) + kl_alpha = torch.stack(kl_alpha).sum() + return alphas, kl_alpha.sum() + + def get_eta(self, rnn_inp): ## structured amortized inference + inp = self.q_eta_map(rnn_inp).unsqueeze(1) + hidden = self.init_hidden() + output, _ = self.q_eta(inp, hidden) + output = output.squeeze() + + etas = torch.zeros(self.num_times, self.num_topics).to(self.device) + kl_eta = [] + + inp_0 = torch.cat([output[0], torch.zeros(self.num_topics,).to(self.device)], dim=0) + mu_0 = self.mu_q_eta(inp_0) + logsigma_0 = self.logsigma_q_eta(inp_0) + etas[0] = self.reparameterize(mu_0, logsigma_0) + + p_mu_0 = torch.zeros(self.num_topics,).to(self.device) + logsigma_p_0 = torch.zeros(self.num_topics,).to(self.device) + kl_0 = self.get_kl(mu_0, logsigma_0, p_mu_0, logsigma_p_0) + kl_eta.append(kl_0) + + for t in range(1, self.num_times): + inp_t = torch.cat([output[t], etas[t-1]], dim=0) + mu_t = self.mu_q_eta(inp_t) + logsigma_t = self.logsigma_q_eta(inp_t) + etas[t] = self.reparameterize(mu_t, logsigma_t) + + p_mu_t = etas[t-1] + logsigma_p_t = torch.log(self.delta * torch.ones(self.num_topics,).to(self.device)) + kl_t = self.get_kl(mu_t, logsigma_t, p_mu_t, logsigma_p_t) + kl_eta.append(kl_t) + kl_eta = torch.stack(kl_eta).sum() + + return etas, kl_eta + + def get_theta(self, bows, times, eta=None): ## amortized inference + """Returns the topic proportions. + """ + + normalized_bows = bows / bows.sum(1, keepdims=True) + + if eta is None and self.training is False: + eta, kl_eta = self.get_eta(self.rnn_inp) + + eta_td = eta[times] + inp = torch.cat([normalized_bows, eta_td], dim=1) + q_theta = self.q_theta(inp) + if self.enc_drop > 0: + q_theta = self.t_drop(q_theta) + mu_theta = self.mu_q_theta(q_theta) + logsigma_theta = self.logsigma_q_theta(q_theta) + z = self.reparameterize(mu_theta, logsigma_theta) + theta = F.softmax(z, dim=-1) + kl_theta = self.get_kl(mu_theta, logsigma_theta, eta_td, torch.zeros(self.num_topics).to(self.device)) + + if self.training: + return theta, kl_theta + else: + return theta + + @property + def word_embeddings(self): + return self.rho.weight + + @property + def topic_embeddings(self): + alpha, _ = self.get_alpha() + return alpha + + def get_beta(self, alpha=None): + """Returns the topic matrix \beta of shape T x K x V + """ + + if alpha is None and self.training is False: + alpha, kl_alpha = self.get_alpha() + + if self.train_WE: + logit = self.rho(alpha.view(alpha.size(0) * alpha.size(1), self.rho_size)) + else: + tmp = alpha.view(alpha.size(0) * alpha.size(1), self.rho_size) + logit = torch.mm(tmp, self.rho.permute(1, 0)) + logit = logit.view(alpha.size(0), alpha.size(1), -1) + + beta = F.softmax(logit, dim=-1) + + return beta + + def get_NLL(self, theta, beta, bows): + theta = theta.unsqueeze(1) + loglik = torch.bmm(theta, beta).squeeze(1) + loglik = torch.log(loglik + 1e-12) + nll = -loglik * bows + nll = nll.sum(-1) + return nll + + def forward(self, bows, times): + bsz = bows.size(0) + coeff = self.train_size / bsz + eta, kl_eta = self.get_eta(self.rnn_inp) + theta, kl_theta = self.get_theta(bows, times, eta) + kl_theta = kl_theta.sum() * coeff + + alpha, kl_alpha = self.get_alpha() + beta = self.get_beta(alpha) + + beta = beta[times] + # beta = beta[times.type('torch.LongTensor')] + nll = self.get_NLL(theta, beta, bows) + nll = nll.sum() * coeff + + loss = nll + kl_eta + kl_theta + + rst_dict = { + 'loss': loss, + 'nll': nll, + 'kl_eta': kl_eta, + 'kl_theta': kl_theta + } + + loss += kl_alpha + rst_dict['kl_alpha'] = kl_alpha + + return rst_dict + + def init_hidden(self): + """Initializes the first hidden state of the RNN used as inference network for \\eta. + """ + weight = next(self.parameters()) + nlayers = self.eta_nlayers + nhid = self.eta_hidden_size + return (weight.new_zeros(nlayers, 1, nhid), weight.new_zeros(nlayers, 1, nhid)) diff --git a/backend/models/DTM_trainer.py b/backend/models/DTM_trainer.py new file mode 100644 index 0000000000000000000000000000000000000000..01f989da11bff985cbc8ce9aede60958d957cdfa --- /dev/null +++ b/backend/models/DTM_trainer.py @@ -0,0 +1,148 @@ +import gensim +import numpy as np +from gensim.models import ldaseqmodel +from tqdm import tqdm +import datetime +from multiprocessing.pool import Pool +from backend.datasets.utils import _utils +from backend.datasets.utils.logger import Logger + + +logger = Logger("WARNING") + + +def work(arguments): + model, docs = arguments + theta_list = list() + for doc in tqdm(docs): + theta_list.append(model[doc]) + return theta_list + + +class DTMTrainer: + def __init__(self, + dataset, + num_topics=50, + num_top_words=15, + alphas=0.01, + chain_variance=0.005, + passes=10, + lda_inference_max_iter=25, + em_min_iter=6, + em_max_iter=20, + verbose=False + ): + + self.dataset = dataset + self.vocab_size = dataset.vocab_size + self.num_topics = num_topics + self.num_top_words = num_top_words + self.alphas = alphas + self.chain_variance = chain_variance + self.passes = passes + self.lda_inference_max_iter = lda_inference_max_iter + self.em_min_iter = em_min_iter + self.em_max_iter = em_max_iter + + self.verbose = verbose + if verbose: + logger.set_level("DEBUG") + else: + logger.set_level("WARNING") + + def train(self): + id2word = dict(zip(range(self.vocab_size), self.dataset.vocab)) + train_bow = self.dataset.train_bow + train_times = self.dataset.train_times.astype('int32') + + # order documents by time slices + self.doc_order_idx = np.argsort(train_times) + train_bow = train_bow[self.doc_order_idx] + time_slices = np.bincount(train_times) + + corpus = gensim.matutils.Dense2Corpus(train_bow, documents_columns=False) + + self.model = ldaseqmodel.LdaSeqModel( + corpus=corpus, + id2word=id2word, + time_slice=time_slices, + num_topics=self.num_topics, + alphas=self.alphas, + chain_variance=self.chain_variance, + em_min_iter=self.em_min_iter, + em_max_iter=self.em_max_iter, + lda_inference_max_iter=self.lda_inference_max_iter, + passes=self.passes + ) + + def test(self, bow): + # bow = dataset.bow.cpu().numpy() + # times = dataset.times.cpu().numpy() + corpus = gensim.matutils.Dense2Corpus(bow, documents_columns=False) + + num_workers = 20 + split_idx_list = np.array_split(np.arange(len(bow)), num_workers) + worker_size_list = [len(x) for x in split_idx_list] + + worker_id = 0 + docs_list = [list() for i in range(num_workers)] + for i, doc in enumerate(corpus): + docs_list[worker_id].append(doc) + if len(docs_list[worker_id]) >= worker_size_list[worker_id]: + worker_id += 1 + + args_list = list() + for docs in docs_list: + args_list.append([self.model, docs]) + + starttime = datetime.datetime.now() + + pool = Pool(processes=num_workers) + results = pool.map(work, args_list) + + pool.close() + pool.join() + + theta_list = list() + for rst in results: + theta_list.extend(rst) + + endtime = datetime.datetime.now() + + print("DTM test time: {}s".format((endtime - starttime).seconds)) + + return np.asarray(theta_list) + + def get_theta(self): + theta = self.model.gammas / self.model.gammas.sum(axis=1)[:, np.newaxis] + # NOTE: MUST transform gamma to original order. + return theta[np.argsort(self.doc_order_idx)] + + def get_beta(self): + beta = list() + # K x V x T + for item in self.model.topic_chains: + # V x T + beta.append(item.e_log_prob) + + # T x K x V + beta = np.transpose(np.asarray(beta), (2, 0, 1)) + # use softmax + beta = np.exp(beta) + beta = beta / beta.sum(-1, keepdims=True) + return beta + + def get_top_words(self, num_top_words=None): + if num_top_words is None: + num_top_words = self.num_top_words + beta = self.get_beta() + top_words_list = list() + for time in range(beta.shape[0]): + top_words = _utils.get_top_words(beta[time], self.dataset.vocab, num_top_words, self.verbose) + top_words_list.append(top_words) + return top_words_list + + def export_theta(self): + train_theta = self.get_theta() + test_theta = self.test(self.dataset.test_bow) + return train_theta, test_theta diff --git a/backend/models/dynamic_trainer.py b/backend/models/dynamic_trainer.py new file mode 100644 index 0000000000000000000000000000000000000000..a9d79f3c7b18e61249fa9f296412cf02bec4c51b --- /dev/null +++ b/backend/models/dynamic_trainer.py @@ -0,0 +1,177 @@ +import numpy as np +from tqdm import tqdm +from collections import defaultdict + +import torch +from torch.optim.lr_scheduler import StepLR +from backend.datasets.utils import _utils +from backend.datasets.utils.logger import Logger + +logger = Logger("WARNING") + +class DynamicTrainer: + def __init__(self, + model, + dataset, + num_top_words=15, + epochs=200, + learning_rate=0.002, + batch_size=200, + lr_scheduler=None, + lr_step_size=125, + log_interval=5, + verbose=False + ): + + self.model = model + self.dataset = dataset + self.num_top_words = num_top_words + self.epochs = epochs + self.learning_rate = learning_rate + self.batch_size = batch_size + self.lr_scheduler = lr_scheduler + self.lr_step_size = lr_step_size + self.log_interval = log_interval + + self.verbose = verbose + if verbose: + logger.set_level("DEBUG") + else: + logger.set_level("WARNING") + + def make_optimizer(self,): + args_dict = { + 'params': self.model.parameters(), + 'lr': self.learning_rate, + } + + optimizer = torch.optim.Adam(**args_dict) + return optimizer + + def make_lr_scheduler(self, optimizer): + lr_scheduler = StepLR(optimizer, step_size=self.lr_step_size, gamma=0.5, verbose=False) + return lr_scheduler + + def train(self): + optimizer = self.make_optimizer() + + if self.lr_scheduler: + logger.info("using lr_scheduler") + lr_scheduler = self.make_lr_scheduler(optimizer) + + data_size = len(self.dataset.train_dataloader.dataset) + + for epoch in tqdm(range(1, self.epochs + 1)): + self.model.train() + loss_rst_dict = defaultdict(float) + + for batch_data in self.dataset.train_dataloader: + + rst_dict = self.model(batch_data['bow'], batch_data['times']) + batch_loss = rst_dict['loss'] + + optimizer.zero_grad() + batch_loss.backward() + optimizer.step() + + for key in rst_dict: + loss_rst_dict[key] += rst_dict[key] * len(batch_data) + + if self.lr_scheduler: + lr_scheduler.step() + + if epoch % self.log_interval == 0: + output_log = f'Epoch: {epoch:03d}' + for key in loss_rst_dict: + output_log += f' {key}: {loss_rst_dict[key] / data_size :.3f}' + + logger.info(output_log) + + top_words = self.get_top_words() + train_theta = self.test(self.dataset.train_bow, self.dataset.train_times) + + return top_words, train_theta + + def test(self, bow, times): + data_size = bow.shape[0] + theta = list() + all_idx = torch.split(torch.arange(data_size), self.batch_size) + + with torch.no_grad(): + self.model.eval() + for idx in all_idx: + batch_theta = self.model.get_theta(bow[idx], times[idx]) + theta.extend(batch_theta.cpu().tolist()) + + theta = np.asarray(theta) + return theta + + def get_beta(self): + self.model.eval() + beta = self.model.get_beta().detach().cpu().numpy() + return beta + + def get_top_words(self, num_top_words=None): + if num_top_words is None: + num_top_words = self.num_top_words + + beta = self.get_beta() + top_words_list = list() + for time in range(beta.shape[0]): + if self.verbose: + print(f"======= Time: {time} =======") + top_words = _utils.get_top_words(beta[time], self.dataset.vocab, num_top_words, self.verbose) + top_words_list.append(top_words) + return top_words_list + + def export_theta(self): + train_theta = self.test(self.dataset.train_bow, self.dataset.train_times) + test_theta = self.test(self.dataset.test_bow, self.dataset.test_times) + + return train_theta, test_theta + + def get_top_words_at_time(self, topic_id, time, top_n): + beta = self.get_beta() # shape: [T, K, V] + topic_beta = beta[time, topic_id, :] + top_indices = topic_beta.argsort()[-top_n:][::-1] + return [self.dataset.vocab[i] for i in top_indices] + + + def get_topic_words_over_time(self, topic_id, top_n): + """ + Returns top_n words for the given topic_id over all time steps. + Output: List[List[str]], each inner list is the top_n words at a time step. + """ + beta = self.get_beta() # shape: [T, K, V] + T = beta.shape[0] + return [ + self.get_top_words_at_time(topic_id=topic_id, time=t, top_n=top_n) + for t in range(T) + ] + + def get_all_topics_at_time(self, time, top_n): + """ + Returns top_n words for each topic at the given time step. + Output: List[List[str]], each inner list is the top_n words for a topic. + """ + beta = self.get_beta() # shape: [T, K, V] + K = beta.shape[1] + return [ + self.get_top_words_at_time(topic_id=k, time=time, top_n=top_n) + for k in range(K) + ] + + def get_all_topics_over_time(self, top_n=10): + """ + Returns the top_n words for all topics over all time steps. + Output shape: List[List[List[str]]] = T x K x top_n + """ + beta = self.get_beta() # shape: [T, K, V] + T, K, _ = beta.shape + return [ + [ + self.get_top_words_at_time(topic_id=k, time=t, top_n=top_n) + for k in range(K) + ] + for t in range(T) + ] diff --git a/data/ACL_Anthology/CFDTM/beta.npy b/data/ACL_Anthology/CFDTM/beta.npy new file mode 100644 index 0000000000000000000000000000000000000000..0eedd28c5e295c33837df476fb51415355a3484c --- /dev/null +++ b/data/ACL_Anthology/CFDTM/beta.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34984bfb432a10733161a9dfed834a9ef4f366a28a6cb2ecd6e8351997f1599a +size 16645248 diff --git a/data/ACL_Anthology/DETM/beta.npy b/data/ACL_Anthology/DETM/beta.npy new file mode 100644 index 0000000000000000000000000000000000000000..bbc64913e280f13462d27902aef6a117ba8c5dc3 --- /dev/null +++ b/data/ACL_Anthology/DETM/beta.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c6eefa9b6aaea4c694736d09ad9e517446f09929c01889e26633300e5eff166 +size 41612928 diff --git a/data/ACL_Anthology/DTM/beta.npy b/data/ACL_Anthology/DTM/beta.npy new file mode 100644 index 0000000000000000000000000000000000000000..89156817aeee6b641ba74d08bf58db77e59a9135 --- /dev/null +++ b/data/ACL_Anthology/DTM/beta.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14c296a2e3fb49f9d0b66262907d64f7d181408768e43138d57c262ea6a11318 +size 33290368 diff --git a/data/ACL_Anthology/DTM/topic_label_cache.json b/data/ACL_Anthology/DTM/topic_label_cache.json new file mode 100644 index 0000000000000000000000000000000000000000..c08a1fd4423df2d7058524a81e67c0b2d968e4d4 --- /dev/null +++ b/data/ACL_Anthology/DTM/topic_label_cache.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea9f3c508ede82967cdf02050d7383d58dd9d269a7f661ae1462a95cbac3331e +size 2089 diff --git a/data/ACL_Anthology/docs.jsonl b/data/ACL_Anthology/docs.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..dcc108a1e337b4dca9d94dc61e9c0f9658982202 --- /dev/null +++ b/data/ACL_Anthology/docs.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a004dd095b9a4f29fdccb5144d50d3dacc7985af443a8de434005b7b8401f9b7 +size 67395059 diff --git a/data/ACL_Anthology/inverted_index.json b/data/ACL_Anthology/inverted_index.json new file mode 100644 index 0000000000000000000000000000000000000000..1aca665c98bbaf2f64165d7edbc2ca88dd3b2751 --- /dev/null +++ b/data/ACL_Anthology/inverted_index.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60e7ee888abb2fd025b11415a7ead6780d41c5f890cc25ba453615906f10b8d7 +size 30865281 diff --git a/data/ACL_Anthology/processed/lemma_to_forms.json b/data/ACL_Anthology/processed/lemma_to_forms.json new file mode 100644 index 0000000000000000000000000000000000000000..56c738c733ddc3a198bc44af0559eeaf30779147 --- /dev/null +++ b/data/ACL_Anthology/processed/lemma_to_forms.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00ea8855f9ced2ca3d785ce5926ced29b35e0779cd6b3166edfd5c5a5c1beccb +size 4370995 diff --git a/data/ACL_Anthology/processed/length_stats.json b/data/ACL_Anthology/processed/length_stats.json new file mode 100644 index 0000000000000000000000000000000000000000..04330f4567ea3c9c982b29a1117b91f04ea9b753 --- /dev/null +++ b/data/ACL_Anthology/processed/length_stats.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cc985e5a1ce565ca4179d343ade1526daab463520f6317122953da83d368306 +size 133 diff --git a/data/ACL_Anthology/processed/time2id.txt b/data/ACL_Anthology/processed/time2id.txt new file mode 100644 index 0000000000000000000000000000000000000000..791630808d4a3ddf72aec0d8b142877031124d29 --- /dev/null +++ b/data/ACL_Anthology/processed/time2id.txt @@ -0,0 +1,18 @@ +{ + "2010": 0, + "2011": 1, + "2012": 2, + "2013": 3, + "2014": 4, + "2015": 5, + "2016": 6, + "2017": 7, + "2018": 8, + "2019": 9, + "2020": 10, + "2021": 11, + "2022": 12, + "2023": 13, + "2024": 14, + "2025": 15 +} \ No newline at end of file diff --git a/data/ACL_Anthology/processed/vocab.txt b/data/ACL_Anthology/processed/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..ea58cd372b21400f4001d1177b27dbc709105aed --- /dev/null +++ b/data/ACL_Anthology/processed/vocab.txt @@ -0,0 +1,13004 @@ +abbreviation +abbreviation_acronym +abbreviation_expansion +abductive +abductive_reasoning +ability +ablation +ablation_study +able +abord +absa +absence +absent +absent_keyphrase +absolute +absolute_gain +absolute_improvement +absolute_position +absolute_relative +abstain +abstract +abstract_anaphora +abstract_away +abstract_categorial +abstract_concrete +abstract_meaning +abstract_syntax +abstraction +abstractive +abstractive_extractive +abstractive_summarisation +abstractive_summarization +abstractive_summarizer +abstractive_summary +abundance +abundant +abundant_unlabeled +abuse +abusive +abusive_abusive +abusive_comment +abusive_content +abusive_detection +abusive_tamil +academia +academia_industry +academic +academic_discipline +academic_industrial +academic_integrity +academic_publication +academic_writing +acc +acc_aux +accelerate +accelerate_convergence +accelerate_inference +accelerate_progress +acceleration +accent +accent_sur +accept +accept_reject +acceptability +acceptability_judgement +acceptability_judgment +acceptability_rating +acceptable +acceptance +acceptance_rate +access +accessibility +accessible +accessible_interoperable +accommodate +accompany +accomplish +accord +accord_official +accordance +accordingly +account +accumulate +accumulation +accuracy +accurate +accurately +accurately_reflect +ace +ace_ace +ace_aux +ace_ere +ace_genia +ace_ram +ace_tac +ache +ache_avec +ache_campagne +ache_compr +ache_consiste +ache_difficile +ache_esambigu +ache_est +ache_etection +ache_etiquetage +ache_nous +ache_qui +ache_reconnaissance +ache_traduction +ache_traitement +aches +aches_compr +aches_traitement +achievable +achieve +achieve_competitive +achieve_impressive +achieve_macro +achieve_remarkable +achievement +achievement_award +acknowledge +acl +acl_achievement +acl_anthology +acl_bionlp +acl_conference +acl_emnlp +acl_hope +acl_ijcnlp +acl_workshop +acoustic +acoustic_articulatory +acoustic_cue +acoustic_phonetic +acoustic_prosodic +acoustique +acoustique_des +acquire +acquisition +acronym +acronym_disambiguation +across +across_board +across_globe +act +action +actionable +actionable_insight +actionable_item +actionable_recommendation +activate +activate_neuron +activation +activation_function +activation_patch +activation_quantization +activation_sparsity +active +active_learning +active_passive +actively +actively_participate +activity +actor +actor_critic +actual +actually +acyclic +adapt +adaptability +adaptable +adaptation +adaptation_lora +adapter +adapter_lora +adaption +adaptive +adaptive_pretraining +adaptive_weighting +adaptively +adaptively_adjust +adaptor +adaptor_grammar +add +add_extra +addition +additional +additionally +additive +additive_compositionality +address +address_aforementioned +address_issue +address_limitation +address_shortcoming +addressee +addressee_recognition +ade +adept +adeptly +adequacy +adequacy_fluency +adequate +adequately +adhere +adherence +adjacency +adjacency_matrix +adjacent +adjective +adjective_adverb +adjective_noun +adjoin +adjoin_grammar +adjunct +adjust +adjustment +administer +administration +administrative +admit +adopt +adoption +adult +adult_child +advance +advanced +advancement +advantage +advantage_disadvantage +advantageous +advent +adverb +adverb_adjective +adverbial +adverbial_clause +adversarial +adversarial_attack +adversarial_example +adversarial_perturbation +adversarial_suffix +adversarially +adversary +adverse +adverse_drug +adverse_effect +adverse_reaction +adversely +adversely_affect +advertisement +advertising +advice +advice_seek +advocate +aes +affect +affected +affective +affective_computing +affective_lexicon +affective_polarity +affiliation +affinity +affirm +affix +afford +affordable +afin +afin_eliorer +afin_evaluer +afin_extraire +afin_leur +afin_mieux +afin_obtenir +aforementioned +aforementioned_issue +africa +africa_asia +african +african_american +african_asian +afterwards +age +age_acquisition +age_gender +age_group +agency +agenda +agent +agent_collaboration +agglutinative +agglutinative_morphology +aggregate +aggregated +aggregation +aggregator +aggression +aggression_cyberbullying +aggression_identification +aggressive +aggressive_aggressive +agit +agnostic +agnostic_backdoor +agnostic_meta +ago +agree +agree_disagree +agree_upon +agreement +agreement_cohen +agreement_disagreement +agreement_iaa +agreement_kappa +agreement_krippendorff +ahead +aid +aide +aide_mod +aide_une +aim +ainsi +ainsi_que +ainsi_une +air +air_force +air_traffic +air_travel +ais +ais_anglais +ais_annot +ais_langue +ais_nous +ais_parl +ais_partir +aise +aise_nous +aise_parl +aka +akin +albeit +albert +albert_roberta +alexa +alexa_google +alexa_prize +algebra +algebraic +algerian +algerian_arabic +algerian_dialect +algorithm +algorithme +algorithmic +algorithms +align +align_closely +aligned +alignement +alignement_automatique +aligner +alignment +alignment_tax +alike +alleviate +alleviate_aforementioned +alleviate_burden +alleviate_catastrophic +alleviate_issue +alleviate_overfitte +alleviate_problem +allocate +allocation +allocation_lda +allow +allow_seamless +almost +almost_always +almost_entirely +almost_every +almost_exclusively +almost_impossible +almost_lossless +almost_perfect +almost_perfectly +alone +alone_insufficient +alone_sufficient +along +along_axis +along_line +alongside +alor +alor_que +alpaca +alpha +alphabet +already +also +alta +alta_share +alter +alteration +alternate +alternation +alternative +alternative_lexicalization +alternatively +although +although_existing +altogether +always +alzheimer +alzheimer_disease +amazon +amazon_alexa +amazon_mechanical +amazon_product +amazon_review +amazon_yelp +ambiguity +ambiguous +ambiguous_pronoun +ambiguous_unanswerable +ameliorate +amenable +america +american +american_accent +american_british +american_english +american_national +american_sign +americasnlp +americasnlp_share +amharic +ami +among +among_participant +among_thing +amongst +amount +ample +ample_room +amplify +amr +amr_parser +amr_parsing +amrs +analogical +analogical_reasoning +analogous +analogy +analys +analys_ees +analyse +analyse_acoustique +analyse_automatique +analyse_emantique +analyse_morphologique +analyse_statistique +analyse_syntaxique +analyser +analyser_les +analyseur +analyseur_ependance +analyseur_syntaxique +analysis +analysis_absa +analyst +analytic +analytical +analyze +analyzer +anaphora +anaphora_ellipsis +anaphora_resolution +anaphoric +anaphoric_pronoun +anaphoric_reference +anchor +ancient +ancient_book +ancient_china +ancient_chinese +ancient_egyptian +ancient_greek +ancient_hebrew +android +anger +anger_disgust +anger_fear +anger_neutral +anger_sadness +anglais +anglais_fran +angle +animal +animal_disease +animate +animation +ann +ann_ees +annot +annot_manuellement +annot_nous +annot_pour +annot_sen +annotate +annotated +annotation +annotation_guideline +annotation_manuelle +annotation_scheme +annotator +annotator_agreement +annotator_disagree +annotator_disagreement +annual +annual_conference +annual_financial +annual_meeting +annual_report +anomaly +anomaly_detection +anomaly_detector +anonymity +anonymization +anonymize +anonymous +anonymous_open +another +answer +answer_cqa +answer_vqa +answerability +answerable +answerable_unanswerable +answering +ant +antecedent +antecedent_anaphora +antecedent_consequence +anthology +anti +anti_asian +anti_stereotypical +anticipate +antonym +antonymy +anxiety +anxiety_depression +anxiety_disorder +anxiety_symptom +anyone +apache +apache_license +apart +ape +apertium +apertium_rdf +api +api_call +apis +app +apparent +appeal +appealing +appear +appearance +appel +append +apple +apple_apple +applicability +applicable +application +application_smm +applied +appliqu +appliqu_aux +appliqu_ees +appliqu_sur +apply +apport +apport_des +appraisal +appraisal_dimension +appraisal_theory +apprenant +apprenant_fran +apprentissage +apprentissage_auto +apprentissage_automatique +apprentissage_par +apprentissage_profond +apprentissage_supervis +approach +approche +approche_bas +approche_fond +approche_neuronale +approche_obtient +approche_par +approche_pour +approche_propos +approche_qui +approche_supervis +approches +approches_ont +appropriate +appropriately +appropriateness +approx +approximate +approximate_near +approximate_posterior +approximately +approximately_hour +approximately_million +approximation +appuie +appuie_sur +appuyant +appuyant_sur +apr +apr_avoir +apr_entra +apr_une +arab +arab_country +arab_region +arab_world +arabe +arabert +arabic +arabic_diacritization +arabic_dialect +arabic_egyptian +arabic_msa +araieval +araieval_share +arbitrarily +arbitrary +arbitrary_length +arc +arc_factored +architectural +architectural_change +architectural_choice +architectural_modification +architecture +archive +area +area_curve +arena +arguably +argue +argument +argument_adjunct +argument_mining +argument_persuasiveness +argumentation +argumentation_mining +argumentative +argumentative_essay +argumentative_unit +argumentative_writing +arise +arithmetic +arithmetic_commonsense +arithmetic_operation +arithmetic_reasoning +arm +around +around_globe +around_world +arousal +arousal_dimension +arousal_dominance +arrange +arrangement +array +arrive +arrive_final +art +art_sota +artefact +article +article_ecrit +article_esente +article_nous +article_scientifique +articulate +articulation +articulatoire +articulatory +artifact +artificial +artificial_intelligence +artificially +artificially_generate +artificially_inflate +arxiv +arxiv_org +arxiv_pubme +ary +ary_fact +ary_relation +ascertain +asia +asian +aside +ask +ask_clarification +ask_whether +asl +asl_sign +aspect +aspect_sentiment +aspectual +aspectual_class +asr +asr_transcript +asr_tts +assamese +assamese_bengali +assamese_manipuri +assemble +assembly +assembly_minute +assert +assertion +assess +assess_funniness +assessment +asset +assign +assign_icd +assignment +assist +assistance +assistant +assistant_alexa +assistive +assistive_robot +assistive_technology +associ +associ_des +associ_ees +associ_une +associate +associated +association +association_norm +associative +assume +assume_existence +assumption +assurance +ast +aste +asymmetric +asymmetry +asynchronous +asynchronous_conversation +ate +atis +atis_snip +atomic +atomic_fact +atomic_unit +attach +attachment +attachment_ambiguity +attachment_score +attack +attack_defense +attack_success +attacker +attain +attempt +attend +attention +attention_head +attention_lately +attention_mechanism +attention_pay +attentional +attentive +attentive_listening +attentive_pooling +attest +attitude +attitude_toward +attitude_towards +attract +attract_considerable +attract_increase +attract_lot +attract_much +attractive +attractive_alternative +attribute +attribute_value +attribution +atypical +auc +auc_roc +audience +audio +audio_file +audio_recording +audio_video +audio_visual +audiovisual +audit +auditory +augment +augmentation +augmented +augmented_generation +augmented_reality +auroc +aussi +aussi_bien +aussi_que +australian +authentic +authenticity +author +author_affiliation +author_profiling +authoritative +authority +authorship +authorship_attribution +authorship_obfuscation +authorship_verification +autism +autism_spectrum +auto +auto_completion +auto_encoder +auto_encoding +auto_regressive +auto_supervis +autoencoder +autoencoder_vae +automata +automate +automate_essay +automate_scoring +automate_theorem +automated +automated_essay +automatic +automatic_misogyny +automatically +automation +automatique +automatique_des +automatique_langage +automatique_langue +automatique_parole +automatique_statistique +automatique_texte +automatiquement +automatiquement_des +automatiquement_les +automatiquement_partir +autonomous +autonomous_agent +autonomously +autoregressive +autoregressive_decoding +autoregressive_nar +autre +autre_part +autre_que +autre_sur +autres +autres_langue +aux +aux_aches +aux_donn +auxiliary +auxiliary_loss +availability +available +available_download +available_https +avant +avant_apr +avatar +avatar_animation +avec +avec_des +avec_les +avec_leur +avec_sans +avec_une +avenue +avenue_future +average +average_chrf +average_las +average_pearson +average_precision +average_spearman +averaging +avoid +avoid_catastrophic +avoid_forget +avoid_overfitte +avoir +avon +avon_egalement +avon_evalu +avon_evelopp +avon_identifi +avon_mis +avon_utilis +award +aware +aware_minimization +awareness +away +away_surface +axis +ayant +ayant_pour +babelnet +babelnet_synset +baby +babylm +babylm_challenge +back +back_channel +back_forth +back_propagate +back_propagation +back_translate +back_translation +back_transliteration +backbone +backdoor +backdoor_adjustment +backdoor_attack +backdoor_defense +backdoor_trigger +backdoor_watermark +backend +background +backpropagation +backtranslation +backward +backward_chain +backward_pass +bad +bad_scaling +bag +bag_gram +bag_word +balance +balance_trade +balanced +balancing +bandit +bandit_feedback +bangla +bank +banking +bar +bar_exam +barely +barrier +bart +bart_pegasus +bas +bas_ees +bas_sur +based +baseline +basic +basic_idea +basic_unit +basically +basis +basis_connaissance +basis_kbs +basque +basque_catalan +basque_spanish +batch +batch_size +batch_wise +battery +baye +baye_classifier +baye_logistic +baye_risk +bayes +bayes_classifier +bayesian +bayesian_optimization +bea +bea_share +beam +beam_search +beam_size +bear +beat +become +become_apparent +become_dominant +become_facto +become_imperative +become_increasingly +become_indispensable +become_integral +become_mainstream +become_paramount +become_popular +become_prevalent +become_ubiquitous +bed +begin +beginner +beginning +behave +behave_differently +behave_like +behave_similarly +behavior +behavioral +behavioral_testing +behavioral_therapy +behaviour +behavioural +behind +beir +beir_benchmark +belief +belief_desire +belief_propagation +belief_tracker +belief_tracking +believe +belong +bench +bench_mark +benchmark +benchmarke +benchmarking +beneficial +benefit +bengali +bengali_gujarati +bengali_hindi +bengali_marathi +benign +berkeley +berkeley_framenet +berkeley_parser +bert +bert_albert +bert_devlin +bert_mbert +bert_roberta +bert_sbert +bert_xlnet +bertscore +bertscore_bleurt +bertscore_comet +besides +besoin +beyond +beyond_mere +biaffine +biaffine_attention +biaffine_parser +bias +bias_mitigation +biased +bible +bidirectional +bidirectional_encoder +bidirectional_gate +bidirectional_gru +bidirectional_long +bidirectional_lstm +bidirectional_lstms +bidirectional_recurrent +bien +bien_que +big +big_bench +big_bird +big_five +bigram +bigram_trigram +bilinear +bilingual +bilingual_dictionary +bilingual_lexica +bilingual_lexicon +bilingual_terminology +bilingue +bilingue_partir +bill +billion +billion_billion +billion_parameter +bilstm +bilstm_cnn +bilstm_crf +bilstms +bimodal +binary +binary_classification +binary_classifier +binary_multiclass +bind +bing +bio +bio_electra +bio_medical +bio_tag +bioasq +bioasq_challenge +biobert +biographical +biography +biological +biology +biom +biom_edical +biom_edicale +biom_edicaux +biomedical +biomedical_literature +biomedicine +bionlp +bionlp_share +bionlp_workshop +bipartite +bipartite_graph +bipartite_matching +bird +bird_fly +birth +birth_defect +bit +bit_bit +bit_float +bit_integer +bit_per +bit_quantization +bit_quantize +bit_width +bitext +bitext_mining +black +black_box +black_white +blank +blend +bleu +bleu_bertscore +bleu_chrf +bleu_cider +bleu_comet +bleu_meteor +bleu_nist +bleu_point +bleu_rouge +bleu_sari +bleu_score +bleu_ter +bleurt +bleurt_comet +bli +blind +blind_spot +blind_test +block +block_diagram +block_wise +blog +blog_post +bloom +bloom_taxonomy +bloomz +blp +blp_workshop +blue +board +body +body_literature +body_movement +bolster +bon +bon_esultat +bonne +bonne_qualit +book +book_hour +book_review +booking +booking_com +boolean +boolean_logic +boost +boosting +bootstrap +bootstrappe +bootstrapping +border +borrow +borrow_idea +borrowing +bot +bottleneck +bottleneck_adapter +bottleneck_principle +bottom +bottom_top +bound +bound_box +boundary +bounding +bounding_box +box +box_attack +bpe +bpe_dropout +bpe_merge +bpe_subword +bpe_tokenization +brain +brain_activity +brain_encoding +brain_imaging +brain_recording +brain_region +brain_signal +branch +brand +brand_name +brand_product +brazilian +brazilian_european +brazilian_indigenous +brazilian_portuguese +breadth +breadth_depth +break +break_barrier +breakdown +breakthrough +bridge +bridge_anaphora +bridge_gap +bridging +brief +brief_description +brief_hospital +brief_introduction +brief_overview +briefly +briefly_describe +briefly_discuss +bring +bring_forth +bring_together +british +british_american +british_national +british_sign +brittle +brittleness +broad +broad_applicability +broad_array +broad_audience +broad_coverage +broad_range +broad_spectrum +broadcast +broadcast_news +broaden +broaden_scope +broadly +broadly_applicable +brown +brown_cluster +brown_clustering +browse +browse_news +browse_page +browser +browser_extension +budget +budget_constraint +bug +bug_fix +build +build_upon +builder +building +building_block +bulgarian +bulgarian_czech +bulgarian_macedonian +bulgarian_national +bulgarian_north +bulgarian_romanian +bulgarian_ukrainian +bundle +burden +business +buy +buy_product +bypass +bypass_need +bypass_safety +byte +byte_pair +cache +cache_compression +cadre +cadre_apprentissage +cadre_projet +cadre_une +calcul +calcul_similarit +calculate +calculation +calculus +calibrate +calibrate_confidence +calibration +call +call_center +call_centre +cambridge +cambridge_university +camembert +camembert_bio +camera +campagne +campagne_deft +campagne_evaluation +campaign +campaign_organize +canada +cancer +cancer_patient +candidate +canonical +canonical_correlation +canonical_form +cantonese +cantonese_mandarin +cantonese_wordnet +capability +capable +capacit +capacit_des +capacity +capitalization +capitalize +capsule +capsule_network +caption +captioning +capture +capture_nuance +car +caract +caract_ere +caract_eristiques +card +care +careful +careful_consideration +careful_examination +carefully +carefully_choose +carefully_craft +carefully_curate +carefully_design +carefully_engineer +carefully_select +carlo +carlo_tree +carry +cas +cas_clinique +cas_des +cascade +case +cast +cast_doubt +cast_light +casual +casual_conversation +cat +cat_tool +catalan +catalan_galician +catalan_spanish +catalog +catalogue +catastrophic +catastrophic_forgetting +catch +categorial +categorial_grammar +categorical +categorical_metadata +categorisation +categorise +categorization +categorization_offense +categorize +categorize_offensive +category +cater +causal +causal_effect +causal_intervention +causal_mediation +causal_relationship +causal_timebank +causality +causality_identification +causally +causation +cause +cause_clause +cause_death +cause_effect +caution +ccg +ccg_derivation +ccg_parse +ccg_parser +ccg_supertagge +ccl +ccl_eval +cefr +cefr_level +cela +cela_nous +cell +celle +celle_qui +celui +celui_est +center +center_around +center_tsc +central +central_bank +central_role +central_theme +centrality +centralized +centre +centre_appel +centric +centroid +century +century_latin +cependant +cependant_les +cer +cer_wer +certain +certaine +certainty +certify +certify_robustness +ces +ces_deux +cet +cet_article +cette +cette_ache +cette_approche +cette_derni +cette_emonstration +cette_ethode +cette_etude +cette_fin +cette_hypoth +cette_mesure +cette_probl +cette_ressource +ceux +ceux_obtenu +cged +cha +cha_ine +chain +chain_thought +challenge +challenging +challenging_testbe +chance +change +channel +channel_wise +chapter +chapter_association +chaque +chaque_mot +char +char_gram +character +character_gram +character_ngram +characterise +characteristic +characterization +characterize +charge +charge_prediction +chart +chat +chat_bot +chat_orient +chatbot +chatbot_arena +chatgpt +chatgpt_claude +chatgpt_gemini +chatgpt_gpt +chatgpt_turbo +chatgpt_vicuna +cheap +check +check_csc +check_worthy +checker +checking +checking_correction +checklist +checkpoint +checkpoint_averaging +chemical +chemical_compound +chemical_disease +chemical_patent +chemical_protein +chemical_reaction +chemistry +chen +chest +chest_ray +chez +chez_des +chez_les +child +child_acquisition +child_adult +child_age +child_autism +child_direct +child_parent +childhood +childhood_essay +children +children_book +children_story +china +china_mobile +chinese +chinese_poetry +chinese_spelling +chit_chat +choice +choix +choix_multiple +choose +choose_appropriate +choose_right +chrf +chrf_comet +chrf_score +chrf_ter +chronological +chronological_order +chronological_split +chunk +chunking +cible +cider +circumstance +circumvent +citation +citation_count +citation_linkage +citation_proximity +citation_recommendation +cite +citizen +citizen_science +city +city_country +civil +civil_law +civil_procedure +claim +claim_premise +claim_veracity +claim_verification +clarification +clarification_question +clarification_request +clarify +clarin +clarin_infrastructure +clarity +class +class_imbalance +classic +classical +classical_chinese +classical_latin +classical_poetry +classification +classified +classifier +classifieur +classify +classique +classroom +classroom_discussion +classroom_teaching +claude +claude_gemini +claude_llama +claude_opus +claude_sonnet +clausal +clause +clean +clean_noisy +cleaning +clear +clear_cut +clear_picture +clear_winner +clearly +clearly_define +click +click_log +click_rate +clickbait +clickbait_post +clickbait_spoiler +clickbait_title +client +client_server +client_side +client_therapist +climate +climate_change +clinic +clinical +clinical_coding +clinical_note +clinical_psychology +clinical_record +clinical_tempeval +clinical_triage +clinical_trial +clinically +clinically_meaningful +clinically_relevant +clinician +clinician_patient +clinique +clinique_fran +clip +clir +close +close_gap +close_inspection +close_look +closed +closed_book +closed_loop +closed_source +closed_track +closely +closely_align +closely_mimic +closely_mirror +closely_relate +closely_related +closely_resemble +closely_tie +closeness +cloud +cloud_computing +cloud_platform +cloud_service +cloze +cloze_style +cloze_test +clpsych +clpsych_share +cls +cls_token +clue +cluster +clustering +cmu +cmu_cmu +cmu_mosei +cnn +cnn_bilstm +cnn_daily +cnn_dailymail +cnn_gru +cnn_lstm +cnn_nyt +cnn_rnn +cnn_xsum +cnns +coarse +coarse_fine +coarse_grain +coarse_granularity +coco +coco_caption +coco_flickr +codalab +code +code_checkpoint +code_mix +code_mixing +code_publicly +code_snippet +code_switch +code_switching +codebase +codebase_publicly +codebook +coder +codex +coding +coefficient +cognate +cognate_borrowing +cognate_derivative +cognition +cognitive +cognitive_appraisal +cognitive_behavioral +cognitive_disability +cognitive_distortion +cognitive_impairment +cognitive_load +cognitive_neuroscience +cognitive_overload +cognitive_plausibility +cognitive_psychology +cognitive_science +cognitively +cognitively_demand +cognitively_inspire +cognitively_motivated +cognitively_plausible +coh +coh_erence +cohen +cohen_kappa +coherence +coherence_cohesion +coherent +coherent_contextually +coherent_fluent +coherent_informative +cohesion +cohesion_device +cohesive +cohort +coin +cold +cold_start +cole +cole_workshop +collaborate +collaboration +collaborative +collaborative_filtering +collaborative_interlingual +collaboratively +collapse +collect +collection +collective +collectively +college +college_student +collocation +colloquial +color +color_shape +column +column_row +com +com_emnlp +com_git +com_lab +com_microsoft +com_naacl +com_watch +combat +combat_disinformation +combat_hate +combat_misinformation +combat_online +combinaison +combination +combination_thereof +combinatorial +combinatorial_explosion +combinatorial_optimization +combinatory +combinatory_categorial +combine +combine_strength +combined +come +come_cost +come_expense +come_price +comet +comet_bleurt +comet_comet +comma +comma_icon +command +command_line +comme +comme_probl +comme_une +comment +comment_moderation +comment_youtube +commentary +commerce +commerce_company +commerce_platform +commerce_product +commerce_site +commerce_store +commerce_website +commercial +commercially +commercially_available +commission +commit +commit_message +commitment +common +common_crawl +common_practice +common_sense +common_subsequence +commonality +commonality_difference +commonly +commonly_accept +commonsense +commonsense_reasoning +commonsense_validation +commonsenseqa +communaut +communaut_scientifique +communicate +communication +communication_deaf +communicative +communicative_efficiency +communicative_function +communicative_intention +community +community_member +comp +compact +companion +company +company_brand +company_stock +compar +compar_ees +comparability +comparable +comparable_corpora +comparably +comparaison +comparaison_avec +comparaison_des +comparaison_entre +comparative +comparative_analysis +comparatively +comparatively_little +comparatively_small +compare +compare_favorably +comparison +comparon +comparon_deux +comparon_les +compatibility +compatible +compelling +compensate +compensate_lack +compete +competence +competency +competent +competition +competition_host +competitive +competitively +competitiveness +competitor +compilation +compile +compl +compl_ement +compl_ete +complaint +complement +complementarity +complementary +complementary_strength +complete +complete_picture +completely +completely_ignore +completely_unsupervised +completeness +completion +completion_kgc +complex +complexit +complexity +complexity_contour +compliance +compliant +complicate +complicated +comply +component +compos +compose +composite +composition +compositional +compositional_distributional +compositional_generalisation +compositional_generalization +compositionality +compositionality_emergent +compositionally +compositionally_generalize +compound +compound_constituent +compound_noun +compound_splitting +compounding +compr +compr_ehension +comprehend +comprehensible +comprehension +comprehension_mrc +comprehensive +comprehensive_overview +comprehensively +comprehensively_assess +comprehensively_evaluate +comprehensiveness +comprendre +comprendre_les +compress +compress_cache +compressed +compression +compression_rate +compression_ratio +comprise +comprise_approximately +comprising +compromise +compte +compte_des +compte_les +computation +computation_budget +computation_cost +computation_overhead +computational +computational_argumentation +computational_burden +computational_cost +computational_demand +computational_efficiency +computational_expense +computational_linguist +computational_linguistics +computational_notebook +computational_overhead +computationally +computationally_cheap +computationally_costly +computationally_demanding +computationally_efficient +computationally_expensive +computationally_heavy +computationally_inexpensive +computationally_intensive +computationally_prohibitive +compute +compute_budget +computed +computer +computer_aid +computer_assist +computer_interaction +computer_mediate +computer_science +computer_scientist +computer_vision +computing +con +con_pour +concatenate +concatenation +conceal +conceive +concentrate +concept +concept_drift +conception +conceptnet +conceptual +conceptual_metaphor +conceptualization +conceptualization_instantiation +conceptualize +conceptually +conceptually_simple +concern +concern_regard +concerned +concise +concise_summary +conciseness +conclude +conclude_discussion +conclude_tutorial +conclusion +conclusion_draw +conclusion_premise +concordance +concrete +concrete_abstract +concrete_concept +concrete_noun +concretely +concreteness +concreteness_rating +concurrent +concurrently +condense +condescend +condescend_detection +condescend_pcl +condition +conditional +conditional_independence +conditional_probability +conditional_random +conditional_vae +conditional_variational +conditioning +conduct +conduct_comprehensive +conduct_extensive +conduct_thorough +conference +conference_call +conference_journal +conference_lrec +conference_proceeding +confidence +confidence_calibration +confidence_estimation +confidence_estimator +confidence_interval +confidence_threshold +confident +configurable +configuration +configuration_file +configure +confine +confirm +confirm_effectiveness +confirm_superiority +confirmation +confirmation_bias +conflate +conflict +conflicting +conform +confound +confound_factor +confound_variable +confounder +confront +confuse +confusing +confusing_charge +confusion +confusion_matrix +conjecture +conjunction +conll +conll_bea +conll_conll +conll_format +conll_ontonote +conll_share +conll_sigmorphon +connaissance +connaissance_partir +connect +connect_dot +connect_europe +connected +connection +connectionist +connectionist_temporal +connective +connectivity +connotation +connotation_frame +conquer +consecutive +consensus +consensus_among +consequence +consequently +conservative +consid +consid_comme +consid_eration +consid_erer +consider +considerable +considerable_amount +considerable_interest +considerable_margin +considerable_room +considerably +consideration +consist +consiste +consiste_une +consistency +consistency_regularization +consistent +consistent_improvement +consistently +consistently_outperform +consolidate +consolidation +consonant +consonant_vowel +consonne +consonne_voyelle +consortium +consortium_ldc +constant +constantly +constantly_evolve +constantly_grow +constantly_update +constituency +constituency_parse +constituency_parser +constituency_parsing +constituency_tree +constituency_treebank +constituent +constituent_parsing +constituent_tree +constitute +constitution +constrain +constrain_decode +constrain_decoding +constrain_device +constrain_environment +constrain_unconstrained +constrained +constrained_beam +constrained_condition +constrained_unconstrained +constraint +constraint_impose +constraint_satisfaction +constraint_violation +construct +construction +constructive +constructive_comment +constructive_feedback +construire +construire_des +construire_une +construit +construit_partir +consult +consultation +consultation_note +consume +consume_expensive +consumer +consumer_grade +consumer_health +consumer_protection +consuming +consumption +contact +contact_center +contact_induce +contain +contamination +contemporary +contemporary_fiction +contemporary_romanian +contemporary_write +content +content_moderation +content_moderator +content_preservation +contenu +contest +contest_tackle +context +context_dependent +context_window +contexte +contexte_des +contexts +contextual +contextual_bandit +contextualise +contextualised +contextualization +contextualize +contextualize_embedding +contextualized +contextualized_embedding +contextualizing +contextually +contextually_appropriate +contextually_relevant +contiguous +continual +continual_learning +continual_pretraining +continually +continually_update +continuation +continue +continue_grow +continue_pretraining +continued +continued_pre +continuity +continuous +continuous_bag +continuous_discrete +continuous_relaxation +continuous_signing +continuously +continuously_evolve +continuously_update +continuum +contour +contr +contr_ole +contract +contradict +contradiction +contradictory +contrainte +contrary +contrary_expectation +contrary_previous +contrast +contrastive +contrastive_decoding +contrastive_learning +contrastive_loss +contribute +contribute_ongoing +contribution +contribution_threefold +contribution_twofold +contributor +control +control_trial +controllability +controllable +controller +controversial +controversial_topic +convenience +convenient +conveniently +convention +conventional +conventional_orthography +conventional_wisdom +conventionally +converge +converge_fast +convergence +convergence_speed +conversation +conversation_disentanglement +conversation_erc +conversation_thread +conversational +conversational_agent +conversational_assistant +conversational_partner +conversational_recommendation +conversational_recommender +converse +conversely +conversion +convert +converter +convex +convex_optimization +convey +convey_meaning +convincing +convincing_argument +convolution +convolution_kernel +convolution_network +convolutional +convolutional_network +convolutional_neural +convolutional_recurrent +cooking +cooking_recipe +cooperate +cooperation +cooperative +cooperative_competitive +cooperative_game +coordinate +coordination +coordination_boundary +cope +copy +copy_mechanism +copying +copying_mechanism +copyright +copyright_material +cor +cor_erence +cor_erences +cord +core +core_component +core_cpu +core_idea +coreference +coreference_chain +coreference_resolution +coreference_resolver +cornerstone +coronavirus +corpora +corporate +corporate_sustainability +corpus +corpus_annot +corpus_parall +corr +corr_acoustique +corr_elation +correct +correct_erroneous +correct_incorrect +correct_mistake +correction +correction_csc +correction_gec +corrective +corrective_feedback +correctly +correctly_classify +correctly_interpret +correctness +correctness_completeness +corrector +correlate +correlate_poorly +correlate_positively +correlate_strongly +correlation +correlation_coefficient +correspond +correspondence +corresponding +correspondingly +corroborate +corrupt +corrupted +corruption +cosine +cosine_distance +cosine_similarity +cost +cost_prohibitive +cost_saving +costly +costly_retraining +cot +cot_prompt +cot_prompting +could +could_potentially +council +council_meeting +counseling +counseling_conversation +counselor +count +count_liwc +counter +counter_argument +counter_intuitive +counter_intuitively +counter_narrative +counter_stereotype +counteract +counterfactual +counterfactual_statement +counterfactual_thinking +counterpart +counterspeech +counting +country +country_origin +couple +coupling +cours +course +course_discharge +court +court_decision +court_hearing +court_judgement +court_judgment +court_justice +court_view +cover +cover_wide +coverage +covid +covid_misinformation +covid_outbreak +covid_pandemic +covid_symptom +covid_vaccination +covid_vaccine +cpu +cpu_gpu +cqa +cqa_forum +crac +crac_share +craft +crawl +create +creation +creative +creative_common +creative_writing +creativity +creator +credibility +credible +credit +credit_assignment +credit_card +creole +creole_creole +crf +crime +crime_victim +criminal +criminal_court +crisis +crisis_management +crisis_situation +crit +crit_ere +criterion +critic +critical +critical_thinking +critically +critically_endanger +critically_examine +criticism +criticize +critique +critique_refine +croatian +croatian_serbian +cross +cross_disciplinary +cross_entropy +cross_lingual +cross_lingually +cross_modal +cross_validation +crosslingual +crosslingual_transfer +crowd +crowd_source +crowd_worker +crowdsource +crowdsource_worker +crowdsourced +crowdsourced_worker +crowdsourcing +crowdsourcing_platform +crowdworker +crs +crucial +crucial_role +crucially +csc +ctb +ctc +ctc_attention +ctc_loss +cube +cube_pruning +cue +cue_scope +cuet +cuet_dravidianlangtech +cuet_nlp +cultural +cultural_awareness +cultural_background +cultural_contexts +cultural_difference +cultural_heritage +cultural_norm +cultural_nuance +culturally +culturally_aware +culturally_relevant +culturally_sensitive +culture +cumbersome +cumulative +cumulative_gain +cuni +cuni_submission +curate +curation +curiosity +curiosity_drive +curious +curious_case +current +currently +currently_dominant +curriculum +curriculum_learn +curse +curse_multilinguality +curve +curve_auc +custom +customer +customer_care +customer_experience +customer_feedback +customer_review +customer_satisfaction +customer_service +customer_support +customizable +customization +customize +customized +cut +cut_edge +cutting +cutting_edge +cws +cxr +cyber +cyber_aggression +cyber_security +cyber_threat +cyberbullye +cyberbullye_detection +cyberbullying +cyberbullying_trac +cybersecurity +cycle +cycle_consistency +cycle_consistent +czech +czech_german +czech_polish +czech_ukrainian +dag +dag_automata +daily +daily_basis +daily_life +dailymail +damage +dan +dan_cet +dan_domaine +dan_les +dan_leur +dan_litt +dan_papi +dan_parole +dan_une +danger +dangerous +danish +danish_greek +danish_norwegian +dans +dans_cadre +dans_cas +dans_cet +dans_cette +dans_contexte +dans_des +dans_domaine +dans_eseau +dans_espace +dans_leur +dans_litt +dans_nombreuse +dans_nombreux +dans_notre +dans_papi +dans_premier +dans_projet +dans_quelle +dans_texte +dans_travail +dark +dark_web +dashboard +data +data_augmentation +data_collection +data_consortium +data_contamination +data_drive +data_hungry +data_protection +data_regime +data_scarcity +data_sparsity +database +database_schema +datasets +datastore +datum +datum_augmentation +davinci +day +day_day +day_trading +dbpedia +dbpedia_ontology +dbpedia_wikidata +dcu +dcu_submission +deaf +deaf_community +deaf_hard +deaf_hear +deaf_people +deaf_signer +deal +dearth +death +debate +debate_portal +deberta +debiase +debiasing +debug +debugging +decade +decade_ago +decay +deceive +decent +deception +deception_detection +deceptive +deceptive_opinion +deceptive_truthful +decide +decide_whether +decipher +decipherment +decision +decision_boundary +decision_maker +decision_making +declarative +decline +decode +decode_speed +decoder +decoder_fid +decoding +decompose +decomposition +decomposition_svd +decouple +decrease +decrease_perplexity +dedicate +dedicated +deduce +deduction +deduction_game +deduction_proof +deductive +deductive_reasoning +deem +deem_necessary +deep +deep_dive +deep_learning +deepen +deepen_understanding +deeply +default +defect +defend +defend_adversarial +defend_attack +defend_backdoor +defend_jailbreak +defense +defense_mechanism +deficiency +deficient +deficit +deficit_disorder +define +definite +definite_description +definition +deft +deft_notre +deft_nous +degenerate +degeneration +degeneration_problem +degradation +degrade +degree +degree_certainty +degree_compositionality +degree_freedom +delay +delete +deletion +deletion_insertion +deletion_substitution +deliberate +deliberately +deliberation +delineate +deliver +delivery +delta +delve +delve_deeply +demand +demanding +dementia +dementia_patient +demo +demo_video +democracy +democratic +democratize +democratize_access +demographic +demographic_axis +demographic_factor +demographic_group +demonstrate +demonstrate_effectiveness +demonstrate_efficacy +demonstrate_exceptional +demonstrate_superior +demonstrate_superiority +demonstration +denoise +denoise_auto +denoise_autoencoder +denoise_diffusion +denoising +denoising_autoencoder +denotation +denotation_connotation +denote +dense +dense_passage +dense_retrieval +dense_retriever +dense_sparse +densely +densely_connect +density +density_estimation +depart +department +depend +depend_availability +depend_heavily +dependence +dependencie +dependency +dependency_parse +dependency_parser +dependency_parsing +dependency_tree +dependency_treebank +dependent +depict +depiction +deploy +deploy_production +deployment +depressed +depressed_individual +depressed_moderately +depressed_mood +depressed_severely +depression +depression_anxiety +depression_diagnosis +depression_severity +depression_symptom +depth +depth_analysis +depth_bound +depth_examination +depth_investigation +depth_width +depuis +derivation +derivation_tree +derivational +derivational_family +derivational_inflectional +derivational_morphology +derivative +derivative_free +derive +derni +derni_ere +des +des_aches +des_apprenant +des_approches +des_caract +des_cas +des_cha +des_connaissance +des_consonne +des_crit +des_diff +des_discour +des_documents +des_domaines +des_donn +des_dur +des_egles +des_ements +des_enom +des_enonc +des_entit +des_eponse +des_equences +des_erreur +des_erreurs +des_eseaux +des_esultats +des_esum +des_ethodes +des_exemple +des_exp +des_indice +des_informations +des_issus +des_jeux +des_langue +des_langues +des_locuteur +des_locuteurs +des_mesures +des_mod +des_mot +des_mots +des_nom +des_outils +des_param +des_performances +des_personne +des_plongement +des_probl +des_propri +des_questions +des_relations +des_repr +des_requ +des_ressource +des_strat +des_syst +des_texte +des_travaux +des_tweets +des_valeur +des_vid +des_voix +des_voyelle +descent +descent_sgd +describe +describe_naist +describe_participation +describe_submission +description +descriptive +descriptive_caption +descriptive_statistic +descriptor +deserve +deserve_attention +design +designate +designer +designing +desirable +desirable_property +desire +despite +despite_advancement +despite_impressive +despite_popularity +despite_remarkable +despite_simplicity +despite_success +despite_widespread +detail +detailed +detailed_ablation +detailed_analysis +detailed_description +detect +detect_abusive +detect_misogynistic +detection +detection_dravidian +detection_equality +detector +detector_corrector +deteriorate +deterioration +determination +determine +determine_rumour +determine_veracity +determine_whether +determined +determiner +deterministic +detoxification +detrimental +detrimental_effect +deux +deux_ache +deux_aches +deux_approche +deux_approches +deux_autres +deux_ethodes +deux_exp +deux_langue +deux_langues +deux_locuteurs +deux_mod +deux_syst +dev +dev_set +dev_test +devanagari +devanagari_script +develop +developed +developer +development +development_cycle +developmental +developmental_stage +developmental_trajectory +deviate +deviate_norm +deviation +device +device_mobile +devise +devlin +devote +devoted +diachronic +diachronic_change +diacritic +diacritic_restoration +diacritization +diagnose +diagnose_depression +diagnosis +diagnosis_cged +diagnosis_treatment +diagnostic +diagram +dialect +dialect_continuum +dialect_egyptian +dialect_identification +dialect_msa +dialectal +dialectal_arabic +dialectal_variant +dialectal_variation +dialog +dialog_act +dialog_flow +dialog_history +dialog_manager +dialog_tod +dialogue +dialogue_act +dialogue_breakdown +dialogue_history +dialogue_manager +dialogue_tod +diarization +dictionary +dictionary_definition +dictionary_entry +dictionary_lookup +dictionnaire +diet +diff +diff_erence +diff_erences +diff_erent +diff_erentes +diff_erents +diff_explainer +differ +differ_substantially +difference +different +differentiable +differentiable_relaxation +differential +differential_diagnosis +differential_equation +differential_privacy +differentially +differentially_private +differentiate +differentiation +differently +difficile +difficult +difficult_impossible +difficult_interpret +difficulty +diffusion +digital +digital_age +digital_archive +digital_assistant +digital_edition +digital_equality +digital_era +digital_humanity +digital_landscape +digital_lexicography +digital_library +digitization +digitize +dilemma +dimension +dimensional +dimensional_subspace +dimensional_vector +dimensionality +dimensionality_reduction +diminish +diminish_return +dire +direct +direct_acyclic +direct_assessment +direct_indirect +direct_preference +direct_quotation +direction +direction_future +directional +directional_lstm +directional_lstms +directly +directly_indirectly +dirichlet +dirichlet_process +dis +dis_agreement +dis_similarity +disability +disadvantage +disagree +disagreement +disambiguate +disambiguation +disaster +disaster_management +discard +discern +discharge +discharge_instruction +discharge_letter +discharge_note +discharge_summary +disciplinary +disciplinary_collaboration +discipline +disclose +disclosure +disconnect +discontinuous +discontinuous_constituency +discontinuous_constituent +discontinuous_ner +discour +discour_langue +discourage +discourse +discourse_connective +discourse_marker +discourse_unit +discover +discovery +discrepancy +discrete +discrete_continuous +discrete_diffusion +discrete_latent +discrete_unit +discriminant +discriminant_analysis +discriminate +discriminate_similar +discriminating +discriminating_similar +discrimination +discriminative +discriminative_attribute +discriminative_power +discriminative_reranking +discriminator +discriminator_generator +discriminatory +discriminatory_power +discursive +discuss +discuss_implication +discussion +discussion_forum +discussion_thread +disease +disease_diagnosis +disease_gene +disease_icd +disease_outbreak +disease_progression +disease_surveillance +disease_symptom +disentangle +disentanglement +disfluency +disfluency_detection +disfluency_removal +disfluent +disfluent_fluent +disgust +disgust_anger +disgust_fear +disinformation +disinformation_campaign +disjoint +disorder +disorder_autism +disorder_depression +disorder_schizophrenia +disparate +disparity +display +disponible +disponible_pour +dispute +dispute_resolution +disregard +disrupt +dissect +disseminate +dissemination +dissemination_misinformation +dissimilar +dissimilarity +distance +distance_asymmetry +distance_minimization +distant +distant_supervision +distantly +distantly_supervise +distantly_supervised +distil +distilbert +distilbert_roberta +distilbert_xlnet +distill +distillation +distilled +distilled_version +distinct +distinction +distinctive +distinctive_characteristic +distinctive_feature +distinguish +distinguish_confusing +distinguishable +distort +distortion +distract +distractor +distress +distribute +distribution +distribution_ood +distribution_shift +distributional +distributional_hypothesis +distributional_semantic +distributional_thesauri +distributional_thesaurus +dive +dive_deeply +diverge +divergence +divergent +divergent_thinking +diverse +diverse_array +diversification +diversified +diversify +diversity +diversity_inclusion +divide +divide_conquer +division +dnn +dnn_aes +dnn_hmm +doc +doc_doc +doc_vec +docre +doctor +doctor_nurse +doctor_patient +document +documentary +documentary_linguist +documentation +documentation_endanger +documentation_revitalization +documents +dog +domain +domain_adaptation +domain_adaption +domaine +domaine_biom +domaine_clinique +domaine_ecialit +domaine_edical +domaine_recherche +domaine_traitement +domaines +domaines_ecialis +domaines_ecialit +dominance +dominant +dominant_paradigm +dominate +donc +donn +donn_ees +door +dot +dot_ees +dot_product +double +double_blind +doubt +doubt_reliability +download +downstream +downstream_tasks +dozen +dpo +dpr +draft +drama +dramatic +dramatic_improvement +dramatically +dramatically_reduce +drastic +drastically +drastically_reduce +dravidian +dravidian_dravidianlangtech +dravidian_eacl +dravidian_family +dravidian_tamil +dravidianlangtech +dravidianlangtech_abusive +dravidianlangtech_acl +dravidianlangtech_eacl +dravidianlangtech_fake +dravidianlangtech_naacl +dravidianlangtech_ranlp +draw +draw_conclusion +draw_connection +draw_inspiration +draw_upon +drawback +dream +drift +drift_away +drive +driver +drop +drop_dramatically +drop_pronoun +drop_replacement +dropout +drug +drug_adverse +drug_discovery +drug_drug +drug_interaction +drug_reaction +dsl +dsl_share +dst +dst_multiwoz +dstc +dstc_dstc +dstc_track +dual +dual_channel +dual_decomposition +dual_encoder +dual_stream +dub +duc +duc_duc +duc_tac +due +due_absence +due_inability +due_lack +due_paucity +due_quadratic +due_reliance +due_scarcity +due_sheer +due_unavailability +duplicate +duplication +dur +dur_des +dur_ees +duration +dutch +dutch_subtitle +dyadic +dyadic_conversation +dyadic_interaction +dynamic +dynamic_oracle +dynamic_programming +dynamic_routing +dynamically +dynamically_adjust +dynamically_allocate +dynamically_decide +dynamically_select +dynamically_update +dynamique +eacl +eacl_hope +eacl_offensive +eae +ealis +ealis_par +ealis_sur +ealisation +ealisation_des +early +early_childhood +early_exit +early_middle +early_modern +early_rumor +early_stage +early_stop +early_warning +earning +earning_call +earning_conference +ease +easily +easily_accessible +easily_adaptable +easily_extend +easily_extendable +easily_extensible +easily_overfit +easily_understandable +east +east_asia +east_asian +east_slavic +eastern +eastern_european +easy +easy_access +easy_hard +easy_implement +easy_read +eat +eat_disorder +eation +ecemment +ecessaire +ecessaire_pour +echo +ecialis +ecialis_pour +ecialit +ecificit +ecifique +ecifique_aux +ecifique_domaine +ecis +ecis_ement +ecision +ecnu +ecnu_semeval +ecological +ecological_validity +economic +economic_indicator +economy +ecosystem +ecrit +ecrit_par +ecrit_participation +ecrivon +ecrivon_une +edge +edge_device +edi +edi_acl +edi_aux +edi_eacl +edi_homophobia +edi_ranlp +edical +edical_dans +edical_fran +edicale +edicaux +ediction +edinburgh +edinburgh_submission +edire +edire_automatiquement +edit +edit_distance +edit_headline +edit_operation +edited +editing +editing_effort +edition +edition_efi +editor +editorial +edo +edu +educate +education +educational +educational_material +educator +eeg +eeg_signal +ees +ees_afin +ees_annot +ees_apprentissage +ees_aux +ees_biom +ees_comme +ees_dan +ees_dans +ees_disponible +ees_entra +ees_erence +ees_lor +ees_par +ees_pour +ees_sont +ees_structur +ees_sur +ees_textuelle +effect +effective +effectively +effectively_manage +effectiveness +effectiveness_generality +effectiveness_universality +effectu +effectu_ees +effectu_sur +effet +effet_sur +efficace +efficace_pour +efficace_que +efficacy +efficiency +efficient +efficient_finetuning +efficiently +effort +effort_dedicate +effort_devote +efi +efi_fouille +efinition +efinition_des +egalement +egalement_les +egalement_que +egalement_une +egie +egies +egles +egles_pour +egyptian +egyptian_arabic +egyptian_gulf +egyptian_levantine +egyptian_moroccan +ehension +ehension_automatique +ehension_langage +ehension_parole +ehr +ehr_note +ehrs +eight +either +elaborate +elaboration +elation +elation_entre +elderly +elderly_people +ele +ele_entra +ele_langage +ele_langue +ele_qui +election +electra +electronic +electronic_dictionary +electronic_health +electronic_medical +elegant +element +element_wise +elementary +elementary_discourse +elementary_school +elementary_science +elementary_unit +eles +eles_acoustique +eles_apprentissage +eles_entra +eles_langage +eles_langue +eles_neuronaux +eles_sont +elev +elevate +eleven +elicit +elicitation +eliminate +eliminate_need +eliminate_redundant +elimination +elior +elior_les +elioration +elioration_des +elioration_significative +eliorer +eliorer_les +elisation +elle +elle_est +elle_permet +elle_peut +elle_sont +ellipsis +ellipsis_resolution +elmo +elmo_bert +else +elucidate +elusive +email +email_thread +emantique +emantique_entre +emantique_multilingue +emantique_par +emantique_pour +emantique_qui +emantiques +emantiques_dans +emantiques_entre +emantiques_pour +ematique +ematiques +embe +embed +embed_kge +embed_space +embedding +embodied +embodied_agent +embody +embody_agent +embrace +eme +eme_esolution +eme_esum +eme_etat +eme_etection +eme_evelopp +eme_qui +eme_recherche +eme_reconnaissance +eme_synth +eme_temp +eme_traduction +ement +ements +emerge +emerge_powerful +emerge_promising +emergence +emergency +emergency_department +emergency_response +emergent +emergent_ability +emergent_communication +emergent_modularity +emes +emission +emnlp +emoji +emoji_emoticon +emoji_hashtag +emoji_prediction +emonstration +emoticon +emoticon_emoji +emotion +emotion_anger +emotion_cause +emotion_flip +emotion_intensity +emotional +emotional_connotation +emotional_intelligence +emotional_intensity +emotional_reaction +emotional_support +emotional_trajectory +emotionally +emotionally_intelligent +empathetic +empathetic_chatbot +empathetic_dialogue +empathetic_response +empathy +empathy_distress +empathy_emotion +empathy_personality +emphasis +emphasis_selection +emphasise +emphasize +emphasize_importance +emphasize_necessity +emphasize_need +emphasize_significance +empirical +empirical_evidence +empirical_investigation +empirically +empirically_validate +employ +employment +empower +empty +empty_category +empty_string +emulate +enable +enable_seamless +encapsulate +encode +encoder +encoder_decoder +encoding +encoding_bpe +encompass +encore +encore_peu +encounter +encourage +encouraging +encyclopedia +encyclopedic +end +end_end +endanger +endanger_documentation +endanger_indigenous +endangered +endangered_documentation +endeavor +endeavour +ended +ending +endow +ene +enement +enement_partir +energy +energy_consumption +enes +enfin +enfin_nous +enforce +enforce_consistency +engage +engagement +engaging +engine +engineer +engineering +english +english_german +english_manipuri +enhance +enhanced +enhancement +enjoy +enlarge +enom +enom_ene +enom_enes +enonc +enormous +enormous_amount +enough +enrich +enriched +enrichissement +enrichment +ensemble +ensembling +ensuite +ensuite_nous +ensuite_utilis +ensure +ensure_equitable +ensure_integrity +ensure_reliability +ensure_reproducibility +ensure_safe +entail +entail_contradict +entail_hypothesis +entailment +entailment_contradiction +entailment_rte +enter +enter_abstract +enterprise +entertainment +entire +entirely +entit +entit_dans +entit_nomm +entity +entity_link +entity_recognition +entity_recognizer +entity_typing +entr +entr_ees +entra +entra_inement +entra_sur +entre +entre_autre +entre_deux +entre_entit +entre_les +entre_terme +entropy +entropy_loss +entropy_minimization +entropy_regularization +entry +entry_barrier +enumerate +enumerate_possible +enumeration +environment +environmental +environmental_feedback +environmental_impact +environmental_sound +envision +ependance +ependance_syntaxique +episode +episodic +episodic_logic +episodic_memory +epistemic +epistemic_marker +epistemic_uncertainty +epoch +eponse +equal +equal_importance +equality +equality_diversity +equally +equally_important +equation +equence +equences +equip +equipe +equitable +equitable_access +equitable_technology +equity +equivalence +equivalent +era +eralement +eration +eration_automatique +eration_texte +erature +erc +ere +ere_etape +ere_exp +erement +erement_dan +erence +erence_entre +erence_pour +erences +erences_entre +erent +erent_que +erentes +erentes_ethodes +erents +erents_mod +erents_niveaux +erer +erer_des +eressons +eressons_aux +erience +erience_visant +eriences +eriences_avec +eriences_men +eriences_montrent +eriences_sont +eriences_sur +eristiques +eristiques_prosodique +ernie +erreur +erreurs +erreurs_traduction +erroneous +error +error_accumulation +error_correction +error_diagnosis +error_prone +error_propagation +error_rate +esambigu +esambigu_isation +escalate +ese +ese_parole +ese_que +eseau +eseau_lexical +eseau_lexico +eseau_neurone +eseaux +eseaux_neuronaux +eseaux_neurone +esent +esent_dan +esent_dans +esentation +esentation_campagne +esentation_des +esentation_emantique +esentations +esentations_continue +esentations_ees +esente +esente_dan +esente_etude +esente_syst +esente_travail +esente_une +esentent +esentent_des +esentent_une +esenton +esenton_les +esenton_une +esentons +esentons_dan +esentons_egalement +esentons_ici +esentons_outil +esentons_syst +esg +esg_impact +esl +esl_learner +esolution +esolution_cor +esolution_des +espace +especially +essay +essay_authenticity +essay_fluency +essay_grade +essay_rhetoric +essay_scoring +essence +essential +essential_ingredient +essentially +est +est_appliqu +est_bas +est_celui +est_compos +est_consid +est_dire +est_disponible +est_donc +est_ealis +est_ecessaire +est_effectu +est_elle +est_ensuite +est_etudi +est_evalu +est_fond +est_particuli +est_pas +est_peu +est_plus +est_probl +est_souvent +est_une +est_utile +est_utilis +establish +establish_connection +established +establishment +estimate +estimation +estimation_mle +estimator +estonian +estonian_finnish +estonian_latvian +estonian_wordnet +esultat +esultat_obtenus +esultats +esultats_etat +esultats_exp +esultats_indiquent +esultats_montrent +esultats_obtenus +esultats_ont +esultats_que +esultats_sont +esultats_sugg +esum +esum_automatique +esum_fran +etant +etant_donn +etape +etat +etat_art +etc +ete +ete_utilisateur +etecter +etecter_les +etection +etection_automatique +etection_des +etection_enement +etection_entit +eterminer +ethic +ethical +ethical_concern +ethical_consideration +ethical_implication +ethical_issue +ethnic +ethnic_group +ethnic_minority +ethnicity +ethnicity_gender +ethnicity_religion +ethode +ethode_est +ethode_permet +ethode_permettant +ethode_pour +ethode_propos +ethode_supervis +ethode_sur +ethodes +ethodes_apprentissage +ethodes_etat +ethodes_existante +ethodes_neuronale +ethodes_pour +ethodes_propos +ethodes_statistique +ethodes_supervis +ethodes_traitement +ethodologie +ethodologie_pour +etique +etique_les +etiquetage +etiquetage_equence +etiquetage_morpho +etiquetage_party +etre +etre_acoustique +etre_appliqu +etre_consid +etre_ees +etre_prosodique +etre_utile +etre_utilis +etrique +etude +etude_acoustique +etude_cas +etude_comparative +etude_des +etude_est +etude_exp +etude_int +etude_montre +etude_nous +etude_sur +etudi +etudi_les +etudion +euclidean +euclidean_distance +euclidean_hyperbolic +euclidean_space +europarl +europarl_corpus +europe +european +european_commission +european_country +european_court +european_grid +european_parliament +european_portuguese +european_union +evade +evade_detection +eval +evalu +evalu_par +evalu_sur +evaluate +evaluated +evaluation +evaluation_campaign +evaluation_understudy +evaluative +evaluator +evaluer +evaluer_les +evaluer_leur +evaluon +evaluon_notre +evelopp +evelopp_dans +evelopp_par +evelopp_pour +eveloppement +eveloppement_syst +eveloppement_une +even +even_pronounced +even_surpass +even_though +evenly +evenly_distribute +event +event_ade +event_causality +event_trigger +event_unfold +eventually +ever +ever_evolve +ever_grow +ever_increase +every +every_day +everyday +everyday_activity +everyday_life +everyday_thing +everyone +everything +evidence +evidence_medicine +evident +evoke +evolution +evolutionary +evolve +evolve_landscape +exacerbate +exact +exact_age +exact_match +exact_matching +exactly +exam +exam_assembly +examination +examine +example +exceed +excel +excellent +except +exception +exceptional +exceptional_capability +exceptional_performance +exceptional_proficiency +excerpt +excessive +excessively +exchange +exciting +exciting_opportunity +exclude +exclusion +exclusive +exclusively +executable +executable_logical +executable_program +executable_sql +execute +execution +execution_trace +exemplar +exemple +exemplify +exercise +exhaustive +exhaustive_experimentation +exhaustively +exhibit +exhibit_exceptional +exhibit_impressive +exhibit_outstanding +exhibit_remarkable +exhibit_superior +exist +existante +existe +existe_pas +existence +existent +existing +exit +exp +exp_erience +exp_eriences +exp_sur +expand +expand_scope +expansion +expect +expectation +expectation_maximization +expected +expected_calibration +expedite +expense +expensive +expensive_impractical +experience +experience_replay +experienced +experiment +experimental +experimental_setup +experimentally +experimentation +expert +expert_moe +expertise +explain +explain_variance +explainability +explainable +explainable_artificial +explainable_recommendation +explainable_xai +explainer +explanation +explanation_lime +explanation_regeneration +explanatory +explanatory_note +explanatory_power +explicit +explicit_connective +explicit_implicit +explicitly +exploit +exploitation +exploration +exploration_exploitation +exploratory +explore +explore_possibility +explorer +explosion +exponential +exponential_growth +exponentially +export +export_format +expose +expose_vulnerability +exposure +exposure_bias +express +express_opinion +expression +expression_mwe +expression_temporelle +expressive +expressive_power +expressiveness +expressivity +extend +extendable +extended +extended_version +extensibility +extensible +extensible_toolkit +extension +extensive +extensive_experiment +extensive_experimentation +extensively +extent +external +external_datastore +external_knowledge +extra +extra_sentential +extract +extraction +extraction_aste +extraction_ate +extraction_eae +extraction_oie +extraction_openie +extractive +extractive_abstractive +extractive_summarisation +extractive_summarization +extractive_summarizer +extractor +extraire +extraire_automatiquement +extraire_des +extrait +extrait_des +extraite +extrapolate +extrapolation +extreme +extremely +extremely_challenging +extremely_low +extremely_scarce +extremely_weakly +extrinsic +extrinsic_evaluation +extrinsic_intrinsic +extrinsically +eye +eye_gaze +eye_movement +eye_track +eye_tracker +eye_tracking +face +face_face +face_hub +facebook +facebook_comment +facebook_post +facebook_twitter +facet +faceted +facial +facial_expression +facial_motion +facilitate +facilitate_future +facility +fact +fact_check +fact_checker +fact_checking +fact_verification +facto +facto_standard +factoid +factoid_list +factoid_question +factor +factor_affect +factor_contribute +factor_influence +factored +factorization +factorize +factual +factual_consistency +factual_correctness +factual_inaccuracy +factual_inconsistency +factuality +factuality_faithfulness +factuality_generated +factuality_reporting +factually +factually_accurate +factually_consistent +factually_correct +factually_inaccurate +factually_inconsistent +factually_incorrect +faible +faible_ressource +fail +fail_adequately +fail_capture +fail_generalize +failure +failure_mode +fair +fair_comparison +faire +faire_des +faire_nous +fairly +fairness +fait +fait_objet +fait_que +faithful +faithful_explanation +faithful_plausible +faithfully +faithfully_reflect +faithfulness +fake +fake_news +falcon +fall +fall_back +fall_behind +fall_outside +fall_short +fallacy +false +false_claim +false_friend +false_misleading +false_negative +false_positive +false_premise +familiar +familiarity +family +famous +fan +far +far_complicate +far_exacerbate +fare +farsi +fashion +fast +fast_accurate +fast_cheap +fast_convergence +fast_pace +fasttext +fasttext_elmo +favor +favor_neutral +favorable +favorably +favour +fbk +fbk_iwslt +fbk_participation +fear +fear_disgust +fear_joy +fear_sadness +feasibility +feasible +feature +feature_engineering +feature_extractor +federal +federate +federate_learning +federated +federated_learning +feed +feed_forward +feedback +feedback_comment +feedback_loop +feedback_rlhf +feedforward +feedforward_neural +feel +feeling +female +female_male +feminine +feminine_masculine +fetch +fever +fever_share +fiction +fiction_fiction +fictional +fictional_character +fictional_narrative +fid +fidelity +field +field_crf +fieldwork +fifth +fifth_edition +fifth_place +fight +fight_covid +figurative +figurative_figurative +figurative_literal +figure +figure_caption +figure_table +file +file_format +filipino +fill +fill_blank +fill_gap +fill_pause +filler +filler_gap +filler_insertion +filling +film +film_subtitle +filter +filter_irrelevant +filter_noisy +filtering +fin +final +finally +finance +finance_healthcare +finance_law +financial +financial_analyst +financial_disclosure +financial_earning +financial_forecasting +financial_institution +financial_market +financial_microblog +financial_misinformation +financial_narrative +financial_sector +financial_transaction +fincausal +fincausal_share +find +finding +finding_highlight +finding_impression +finding_indicate +finding_reveal +finding_suggest +finding_underscore +fine +fine_grain +fine_tune +fine_tuned +fine_tuning +finer +finer_grain +finer_grained +finetune +finetuning +finish +finite +finite_automata +finite_state +finnish +finnish_hungarian +finnish_swedish +finnlp +finnlp_workshop +fire +firm +first +firstly +fisher +fit +fitting +five +five_personality +fix +fix_bug +fix_length +fix_size +fix_window +fixed +flag +flair +flan +flat +flat_ner +flat_nest +flat_nested +flavor +flaw +flawed +flexibility +flexible +flexible_enough +flexibly +flickr +flickr_coco +flip +float +float_point +flop +flores +flow +fluctuation +fluency +fluency_adequacy +fluency_coherence +fluent +fluent_coherent +fluent_informative +fly +fmri +focal +focal_loss +focal_point +focus +focused +focusse +fois +fold +fold_cross +follow +follower +following +following_url +fonction +fonction_des +fonction_leur +fond +fond_ees +fond_sur +font +food +food_drug +fool +footprint +force +force_aligner +force_labour +forecast +forecasting +foreign +foreign_learner +forensic +forensic_voice +forest +forest_classifier +forest_regressor +forest_string +forget +forget_gate +forget_old +forget_previously +forgetting +forgetting_continual +form +formal +formal_definition +formal_informal +formalism +formality +formality_control +formality_style +formalization +formalize +formally +formally_define +formant +format +formation +formatting +forme +former +formidable +formidable_challenge +formula +formulate +formulation +forth +fortunately +forum +forum_blog +forum_reddit +forward +forward_backward +forward_look +forward_pass +foster +foster_future +foster_research +fouille +fouille_opinion +fouille_texte +found +foundation +foundation_future +foundational +four +fourth +fourth_conference +fourth_edition +fourth_place +fourth_workshop +fraction +fragment +frame +frame_element +frame_evoke +frame_induction +framenet +framenet_frame +framenet_project +framenet_verbnet +framework +framing +framing_persuasion +fran +fran_ais +fran_aise +france +france_paris +free +free_grammar +free_verse +freebase +freedom +freely +freely_accessible +freely_available +freely_tell +freeze +french +french_italian +frequency +frequency_idf +frequency_inverse +frequent +frequently +frequently_appear +frequently_encounter +frequently_occur +fresh +fresh_perspective +friend +friend_family +friendly +front +front_end +frontier +frozen +frustratingly +frustratingly_easy +frustratingly_simple +fst +fuel +fulfil +fulfill +full +fully +fully_connect +fully_differentiable +fully_exploit +fully_supervise +function +functional +functional_correctness +functional_distributional +functional_equivalence +functional_expression +functional_imaging +functional_pressure +functional_specialization +functionality +functionally +functionally_similar +fund +fund_european +fund_project +fundamental +fundamentally +funding +funniness +funniness_edit +funny +furthermore +fuse +fusion +future +future_direction +future_research +fuzzy +fuzzy_logic +fuzzy_match +fuzzy_matching +fuzzy_string +gain +gain_immense +gain_momentum +gain_popularity +gain_prominence +gain_traction +galician +galician_portuguese +game +game_commentary +game_player +game_purpose +game_theoretic +game_theory +gan +gap +garner +garner_considerable +garner_significant +gat +gate +gate_mechanism +gate_recurrent +gather +gathering +gating +gating_mechanism +gauge +gaussian +gaussian_distribution +gaussian_mixture +gaussian_noise +gaussian_process +gaze +gaze_behaviour +gazetteer +gcn +gcns +gear +gear_towards +gec +gemini +gemini_claude +gemini_pro +gemma +gemma_llama +gemma_mistral +gen +genai +genai_detection +gender +gender_age +gender_bias +gender_equality +gender_ethnicity +gender_fair +gender_inclusive +gender_inequality +gender_marking +gender_nationality +gender_neutral +gender_race +gender_racial +gender_religion +gender_stereotype +gendere +gendere_ambiguous +gendere_pronoun +gene +gene_disease +gene_mutation +gene_protein +gene_regulation +general +general_purpose +generalisability +generalisation +generalise +generalise_unseen +generalist +generality +generalizability +generalizable +generalization +generalization_ability +generalization_capability +generalize +generalize_compositionally +generalize_poorly +generalize_unseen +generalized +generalized_quantifier +generally +generate +generated +generating +generation +generation_nlg +generation_rag +generative +generative_adversarial +generative_discriminative +generative_replay +generator +generator_discriminator +generic +genetic +genetic_algorithm +genia +genre +genre_framing +genre_textuel +genuine +genuinely +geo +geo_cultural +geo_locate +geographic +geographic_coordinate +geographic_location +geographic_region +geographical +geographical_location +geographical_origin +geographical_region +geographically +geographically_diverse +geography +geolocation +geometric +geometric_operation +geometric_property +geometric_transformation +geometry +german +german_italian +german_particle +german_upper +germanet +germanic +germany +germeval +germeval_share +gesture +get +gigaword +gigaword_corpus +git +github +github_com +github_repository +give +give_rise +giza +glass +glass_box +global +global_local +global_pandemic +globale +globally +globally_coherent +globally_normalize +globally_optimal +globe +gloss +gloss_free +glossary +glossed +glossing +glove +glove_fasttext +glue +glue_benchmark +glue_squad +glue_superglue +glyph +glyph_phonetic +gmm +gmm_hmm +gnn +gnns +goal +goal_orient +gold +gold_standard +golden +golden_standard +good +good_performing +good_practice +google +google_assistant +google_cloud +google_gemini +google_gram +google_home +google_ngram +google_scholar +google_translate +govern +governance +governance_esg +government +government_agency +government_opposition +government_organization +gpt +gpt_claude +gpt_davinci +gpt_gemini +gpt_llama +gpt_mini +gpt_mixtral +gpt_neo +gpt_palm +gpt_turbo +gpu +gpu_cpu +gpu_hour +gpu_memory +grade +grade_essay +gradient +gradient_accumulation +gradient_boost +gradient_boosting +gradient_descent +gradient_reversal +gradient_saliency +grading +gradual +gradually +gradually_become +gradually_increase +grain +grained +gram +grammaire +grammaire_cat +grammaire_pour +grammar +grammar_ccg +grammar_checker +grammar_formalism +grammar_induction +grammatical +grammatical_correctness +grammatical_error +grammatical_gender +grammatical_ungrammatical +grammaticality +grammaticality_judgment +grammatically +grammatically_correct +grammatically_incorrect +grand +grand_mod +grand_nombre +grand_public +grande +grande_quantit +grande_taille +grant +granular +granularity +graph +graph_completion +graph_convolution +graph_convolutional +graph_dag +graph_tkg +graphe +graphe_pour +grapheme +grapheme_phoneme +graphic +graphical +graphical_interface +grasp +great +great_deal +great_potential +great_progress +great_promise +great_significance +great_success +greatly +greatly_benefit +greatly_reduce +greedy +greedy_algorithm +greedy_decode +greedy_decoding +greedy_search +greedy_transition +greek +greek_latin +greek_turkish +green +green_red +grid +grid_puzzle +grid_search +grid_tagging +ground +ground_truth +grounded +grounding +groundwork +groundwork_future +group +group_member +groupe +grouping +grow +grow_body +grow_concern +grow_demand +grow_exponentially +grow_interest +grow_linearly +grow_popularity +grow_prevalence +grow_rapidly +grow_volume +growth +gru +gru_cnn +gsm +gsm_math +guarantee +guardrail +guess +guess_game +guessing +guessing_game +gui +gui_agent +guidance +guide +guideline +gujarati +gujarati_hindi +gulf +gulf_arabic +gulf_egyptian +gulf_levantine +gun +gun_control +gun_violence +habit +half +half_century +half_million +half_true +hallucinate +hallucination +hallucination_mitigation +hallucination_omission +hamper +hamper_lack +han +hand +hand_craft +hand_engineer +hand_gesture +handcraft +handcraft_feature +handcrafted +handcrafted_feature +handful +handle +handle_lengthy +handling +handwritten +happen +happen_next +happiness +happiness_sadness +happy +happy_sad +harassment +hard +hard_hear +hard_hearing +hard_monotonic +hard_negative +hardly +hardware +harm +harmful +harmful_content +harmful_meme +harmful_stereotype +harmfulness +harmonize +harness +harness_power +harvest +harvesting +hash +hashing +hashtag +hate +hate_speech +hateful +hateful_content +hateful_meme +hateful_offensive +hatred +hausa +head +head_modifier +head_movement +head_tail +header +heading +headline +health +health_advice +health_care +health_condition +health_counseling +health_disorder +health_forum +health_monitoring +health_professional +health_record +health_smm +health_status +health_surveillance +healthcare +healthcare_finance +healthcare_professional +healthcare_provider +healthcare_worker +healthy +healthy_control +hear +hearing +hearing_impair +heart +heart_disease +heart_failure +heart_rate +heart_sound +heavily +heavily_depend +heavily_reliant +heavily_rely +heavy +heavy_computation +heavy_reliance +hebrew +hebrew_bible +hebrew_manuscript +hedge +hedge_scope +heighten +help +helpful +helpfulness +helpfulness_prediction +helsinki +helsinki_finite +helsinki_nlp +helsinki_submission +hence +herein +heritage +heterogeneity +heterogeneous +heterogeneous_graph +heuristic +heuristic_rule +heuristically +hidden +hidden_layer +hidden_markov +hidden_state +hide +hide_layer +hide_markov +hide_state +hierarchical +hierarchical_clustering +hierarchical_dirichlet +hierarchically +hierarchically_organize +hierarchy +hierarchy_aware +high +high_dimensional +high_fidelity +high_precision +high_quality +high_school +high_stake +high_throughput +higher +highest +highest_ranking +highest_scoring +highlight +highlight_importance +highlight_necessity +highlight_potential +highlight_significance +highlight_urgent +highly +highly_accurate +highly_agglutinative +highly_configurable +highly_correlate +highly_dependent +highly_desirable +highly_imbalanced +highly_inflect +highly_productive +highly_sensitive +highly_skewed +highly_subjective +hinder +hinder_applicability +hindi +hindi_bengali +hindi_kannada +hindi_marathi +hindi_nepali +hindi_punjabi +hindi_telugu +hindi_urdu +hinge +hinge_upon +hinglish +hint +hire +hire_decision +historian +historic +historical +historical_newspaper +historically +history +history_culture +hit +hit_rate +hits +hlt +hmm +hmm_crf +hoc +hoc_abbreviation +hold +hold_annual +hold_conjunction +hold_immense +hold_promise +hold_true +holder +holder_target +holistic +holistic_view +home +home_automation +homogeneous +homogeneous_heterogeneous +homograph +homograph_disambiguation +homophobia +homophobia_transphobia +homophone +homophone_homograph +hop +hop_reasoning +hope +hope_spur +hopefully +horizon +hospital +hospital_course +hospital_discharge +hospital_stay +host +hot +hot_encoding +hot_topic +hot_vector +hotel +hotel_review +hotpotqa +hour +hour_audio +hour_minute +hour_recording +hour_transcribed +house +however +hpcc +hpcc_semeval +hpsg +hpsg_grammar +hpsg_treebank +html +html_file +html_page +http +https +https_anonymous +https_edu +https_github +https_huggingface +https_org +https_www +huawei +huawei_translation +hub +hug +hug_face +huge +huge_amount +huge_volume +huggingface +huggingface_datasets +huggingface_hub +huggingface_repository +humain +human +human_cognition +human_judgement +human_judgment +human_loop +human_parity +human_readable +humaneval +humanity +humanity_scholar +humor +humor_commonality +humor_offense +humorous +humorous_humorous +humour +humour_sarcasm +hundred +hundred_billion +hundred_million +hundred_thousand +hungarian +hungry +hurdle +hurt +hybrid +hyper +hyper_parameter +hyper_relational +hyperbolic +hyperbolic_geometry +hyperbolic_space +hypergraph +hyperlink +hypernetwork +hypernym +hypernym_discovery +hypernym_hyponym +hypernyms +hypernymy +hypernymy_detection +hypernymy_hyponymy +hyperparameter +hyperparameter_optimization +hyperparameter_tuning +hyperpartisan +hyperpartisan_news +hyponym +hyponym_hypernym +hyponymy +hyponymy_hypernymy +hypoth +hypoth_ese +hypothesis +hypothesis_testing +hypothesise +hypothesize +hypothetical +iaa +ibm +icd +icd_code +icd_coding +icelandic +icelandic_irish +ici +ici_une +icl +icon +icon_share +idea +idea_behind +ideal +ideally +ideation +identical +identifi +identifi_les +identifiable +identifiable_information +identification +identification_dravidian +identification_lid +identification_nadi +identifie +identifier +identifier_les +identify +identity +identity_group +ideological +ideology +idf +idiom +idiom_metaphor +idiomatic +idiomatic_expression +idiomatic_literal +idiomaticity +idiomaticity_detection +idiosyncrasy +idiosyncratic +iemocap +ignore +iit +iit_semeval +iit_submission +ijcnlp +ill +ill_define +ill_form +ill_suited +illness +illuminate +illustrate +illustrate_utility +illustration +illustrative +illustrative_example +ilp +ilp_formulation +image +image_caption +image_captioning +imagery +imagination +imagine +imaging +imaging_report +imbalance +imbalanced +imdb +imdb_movie +imdb_yelp +imitate +imitation +immediate +immediately +immense +immense_potential +immensely +immigrant +immigrant_woman +impact +impactful +impair +impairment +impede +impede_progress +imperative +imperfect +impl +impl_ement +implement +implementation +implementation_detail +implication +implicature +implicit +implicit_discourse +implicit_explicit +implicit_underspecified +implicitly +implicitly_abusive +implicitly_assume +implicitly_encode +imply +import +import_export +importance +important +important_role +importante +importante_pour +importantly +impose +impose_constraint +impose_restriction +impossible +impractical +impractical_deploy +impression +impression_section +impressive +impressive_capability +impressive_progress +improve +improved +improvement +inability +inaccessible +inaccuracy +inaccurate +inadequacy +inadequate +inadequately +inadvertently +inappropriate +incapable +incentive +incentivize +inception +incident +incite +incite_violence +include +inclusion +inclusion_edi +inclusion_exclusion +inclusive +inclusivity +incoherent +incoming +incompatible +incomplete +incomplete_utterance +incompleteness +incongruity +incongruity_theory +inconsistency +inconsistent +incorporate +incorporation +incorrect +incorrectly +increase +increase_interest +increase_popularity +increase_visibility +increasingly +increasingly_apparent +increasingly_important +increasingly_interested +increasingly_popular +increasingly_prevalent +increasingly_vital +incremental +incrementally +incur +incur_additional +incur_high +incur_substantial +ind +ind_intent +ind_ood +indeed +independence +independence_assumption +independent +independently +independently_ignore +index +indexation +indexing +india +indian +indian_regional +indian_sign +indic +indic_indic +indicate +indication +indicative +indicator +indicbert +indice +indigenous +indigenous_america +indigenous_american +indigenous_canada +indigenous_community +indigenous_south +indigenous_speak +indiquent +indiquent_que +indirect +indirect_supervision +indirectly +indispensable +indistinguishable +individual +individual_neuron +individually +indo +indo_aryan +indo_european +indonesian +indonesian_tagalog +induce +induced +induction +induction_bli +inductive +inductive_bias +industrial +industrial_application +industry +industry_academia +ine +ine_traitement +ineffective +inefficiency +inefficient +inement +inequality +inevitable +inevitably +inevitably_suffer +inexpensive +inf +inf_erence +inf_erer +infeasible +infer +infer_miss +infer_missing +inference +inference_acceleration +inference_nli +inference_speed +inference_speedup +inferential +inferior +infille +infilling +infinite +inflate +inflect +inflect_form +inflection +inflection_derivation +inflection_sigmorphon +inflection_table +inflectional +inflectional_affix +inflectional_derivational +inflectional_morphology +inflectional_paradigm +influence +influential +influential_factor +inform +informal +informal_nature +informal_persian +information +information_overload +information_theoretic +informational +informations +informative +informative_covid +informativeness +informed +informed_decision +infrastructure +infrastructure_clarin +infrequent +infuse +ing +ingredient +inherent +inherent_ambiguity +inherent_limitation +inherently +inherit +inhibit +initial +initial_seed +initialization +initialize +initially +initiate +initiative +initiative_tei +inject +inject_backdoor +injection +injection_attack +inner +inner_product +inner_working +innovation +innovative +innovatively +input +inquiry +insensitive +insert +insertion +insertion_deletion +insertion_substitution +inside +inside_outside +insight +insightful +inspect +inspection +inspiration +inspire +inspire_observation +instability +instagram +instagram_post +instance +instant +instant_message +instantiate +instantiation +instead +institute +institute_technology +institution +institutional +instruct +instructgpt +instruction +instruction_follow +instruction_following +instruction_tuning +instructional +instructional_video +instructor +instrument +instrumental +insufficient +insufficiently +insult +int +int_eressons +int_pour +intact +integer +integer_linear +integer_programming +integral +integral_component +integral_part +integrate +integrated +integrated_gradient +integration +integrity +intellectual +intellectual_disability +intellectual_property +intelligence +intelligence_xai +intelligent +intelligent_agent +intelligent_assistant +intelligent_personal +intelligent_tutoring +intelligent_virtual +intelligibility +intelligible +intend +intend_sarcasm +intended +intended_audience +intended_meaning +intended_sarcasm +intense +intensity +intensive +intensive_care +intent +intent_discovery +intent_slot +intention +intentional +intentionally +inter +inter_annotator +inter_coder +inter_connected +inter_intra +inter_rater +inter_sentential +interact +interaction +interactive +interactive_fiction +interactive_predictive +interactive_visualization +interactively +interactivity +interchange +interchange_format +interconnect +interdependence +interdependency +interdisciplinary +interdisciplinary_collaboration +interdisciplinary_research +interest +interested +interesting +interesting_finding +interesting_insight +interesting_observation +interestingly +interface +interface_gui +interface_syntaxe +interfere +interference +interference_among +interference_irrelevant +interleave +interleaved +interlinear +interlinear_glossed +interlinear_glossing +interlingual +interlingual_homograph +interlingual_index +interlink +interlocutor +intermediary +intermediate +intermediate_layer +intermediate_step +internal +internal_working +internalize +internally +international +international_conference +international_organization +international_phonetic +international_workshop +internet +internet_forum +internet_meme +internet_slang +interoperability +interoperable +interpersonal +interpersonal_communication +interpersonal_relationship +interplay +interpolate +interpolation +interpolation_extrapolation +interpr +interpr_des +interpret +interpretability +interpretability_controllability +interpretable +interpretation +interpreter +interpreting +interrogative +intersection +intersection_computer +intersection_union +intersectional +intersectional_bias +interval +intervene +intervention +intervention_outcome +interview +intimacy +intimacy_analysis +intimacy_tweet +intonation +intonation_unit +intra +intra_inter +intra_modal +intra_sentential +intractable +intricacy +intricate +intricate_nature +intriguing +intrinsic +intrinsic_extrinsic +intrinsically +intrinsically_extrinsically +introduce +introduction +introduction_special +introductory +introductory_programming +intuition +intuitive +intuitively +invalid +invaluable +invariance +invariant +invent +inventory +inverse +inverse_document +inverse_perplexity +inverse_reinforcement +inverse_scaling +inversion +inversion_attack +inversion_transduction +invert +invert_index +invest +investigate +investigate_extent +investigate_whether +investigation +investment +investment_decision +investor +invite +invite_talk +invoke +involve +involved +involvement +irish +ironic +ironic_tweet +irony +irony_detection +irony_sarcasm +irregular +irregularity +irrelevant +irrelevant_misleading +irrespective +isation +isation_entit +isation_lexicale +island +iso +iso_space +iso_standard +iso_timeml +isolate +isolated +isolated_sign +isolation +issue +issus +italian +italian_administrative +italian_portuguese +italian_spanish +italy +item +iterate +iteration +iterative +iterative_back +iterative_refinement +iteratively +iteratively_refine +iteratively_update +ive +ive_baye +iwslt +iwslt_dialectal +iwslt_offline +iwslt_simultaneous +iwslt_ted +jailbreak +jailbreak_attack +jailbreake +jailbreake_attack +japan +japanese +japanese_functional +jargon +java +java_library +java_python +jeu +jeu_donn +jeux +jeux_donn +job +job_advertisement +job_description +job_interview +job_market +job_posting +job_seeker +job_title +join +join_force +joint +joint_ctc +jointly +jointly_optimize +joke +journal +journal_article +journal_wsj +journalism +journalist +journalistic +journey +joy +joy_anger +joy_sadness +json +json_file +json_format +judge +judgement +judgment +judicial +judicial_decision +jump +jump_start +justice +justification +justify +justify_decision +kaldi +kaldi_asr +kaldi_toolkit +kannada +kannada_english +kannada_gujarati +kannada_malayalam +kannada_tamil +kannada_tulu +kappa +kappa_agreement +kappa_coefficient +kappa_score +kappa_value +kazakh +kazakh_russian +kbp +kbqa +kbs +keep +keep_pace +keep_track +kernel +key +key_component +key_factor +key_idea +key_ingredient +key_innovation +keyboard +keynote +keynote_lecture +keyphrase +keyphrase_extraction +keystroke +keystroke_log +keystroke_saving +keyword +keyword_spot +kgc +kge +kgqa +kind +kit +kit_iwslt +kit_lecture +knn +knn_box +know +know_priori +knowledge +knowledge_basis +knowledge_distillation +knowledgeable +known +known_unknown +korean +korean_framenet +krippendorff +krippendorff_alpha +kullback +kullback_leibler +lab +lab_protocol +lab_submission +label +label_smoothing +labeler +labeling +labelling +labor +labor_cost +labor_intensive +labor_market +laboratory +laborious +labour +labour_intensive +labse +lack +lack_adequate +lack_interpretability +lack_standardized +lack_sufficient +lack_thereof +lack_transparency +lag +lag_behind +lan +lan_guage +landmark +landscape +lang +lang_vec +langage +langage_entra +langage_naturel +languages +langue +langue_arabe +langue_cible +langue_ecialit +langue_entra +langue_ere +langue_fran +langue_peu +langue_tal +langues +laptop +laptop_restaurant +large +large_margin +large_scale +largely +largely_ignore +largely_neglect +largely_overlook +largely_unexplored +largely_unknown +las +las_score +las_uas +laser +laser_labse +last +last_decade +last_layer +last_least +last_minute +lastly +late +late_advancement +late_century +late_fusion +lately +latency +latency_regime +latency_requirement +latency_throughput +latency_trade +latent +latent_dirichlet +latent_space +latent_variable +later +later_layer +lateral +lateral_thinking +latin +latin_alphabet +latin_america +latin_american +latin_script +latin_treebank +latter +lattice +latvian +latvian_lithuanian +laughter +launch +law +law_regulation +lawyer +lay +lay_foundation +lay_groundwork +lay_summarisation +lay_summary +layer +layer_perceptron +layer_wise +layered +layout +layout_aware +lda +lda_topic +ldc +ldc_catalog +lead +lead_improved +lead_inaccurate +lead_poor +lead_severe +lead_suboptimal +lead_subpar +lead_unfair +leader +leader_board +leaderboard +leak +leak_private +leakage +lean +leap +leap_forward +learn +learnability +learnable +learnable_parameter +learner +learner_essay +learning +learning_icl +learning_mtl +learning_ssl +least +least_partially +leave +leave_ample +leave_behind +leave_right +leave_room +lecture +lee +left +left_branch +left_lean +left_right +legacy +legal +legal_contract +legal_ethical +legal_judgement +legal_judgment +legal_practitioner +legal_professional +legal_violation +legislation +legislative +legitimate +lemma +lemma_inflect +lemmas +lemmatisation +lemmatization +lemmatization_morphosyntactic +lemmatization_pos +lemmatize +lemon +lend +length +length_extrapolation +length_minimization +length_stay +lengthy +lens +les +les_aches +les_apprenant +les_approches +les_autre +les_capacit +les_caract +les_consonne +les_contrainte +les_deux +les_diff +les_domaines +les_donn +les_ecificit +les_effet +les_ements +les_emes +les_erreur +les_erreurs +les_eseaux +les_esultat +les_esultats +les_ethodes +les_exp +les_grammaire +les_graphe +les_jeux +les_locuteur +les_locuteurs +les_meilleur +les_mesures +les_mod +les_mot +les_mots +les_niveaux +les_param +les_performances +les_personne +les_plongement +les_premier +les_propri +les_relations +les_repr +les_sont +les_syst +les_terme +les_texte +les_travaux +les_trois +les_utilisateur +les_voyelle +less +less_frequent +less_half +less_likely +less_studied +lesson +lesson_learn +let +let_alone +let_think +letter +leur +leur_capacit +leur_caract +leur_compr +leur_leur +leur_niveau +leur_pertinence +leur_qualit +leur_utilisation +levantine +levantine_arabic +level +level_abstraction +levenshtein +levenshtein_distance +levenshtein_transformer +leverage +leveraged +lexeme +lexica +lexical +lexical_borrowing +lexical_cohesion +lexical_item +lexical_overlap +lexical_richness +lexical_simplification +lexical_substitution +lexicale +lexicalization +lexicalize +lexicalize_reordering +lexically +lexically_constrain +lexically_syntactically +lexicaux +lexico +lexico_emantique +lexico_semantic +lexico_syntactic +lexico_syntaxiques +lexicographer +lexicographic +lexicography +lexicon +lexicon_induction +lexique +lexique_bilingue +lexique_morphologique +liberal +liberal_conservative +library +library_reproducible +libre +licence +license +licensing +lid +lie +lie_foundation +lie_groundwork +lie_heart +lie_summarisation +lien +lien_avec +lien_entre +lieu +life +life_cycle +life_science +lifecycle +lifelong +lifelong_editing +lifelong_learning +lift +light +light_verb +light_weight +lightweight +lightweight_adapter +ligne +like +likelihood +likelihood_estimation +likelihood_ratio +likely +likert +likert_scale +likewise +lime +limit +limit_applicability +limit_practicality +limit_scalability +limitation +limited +limited_amount +limited_availability +limsi +lin +line +linear +linear_discriminant +linear_interpolation +linear_programming +linear_projection +linear_regression +linear_subspace +linear_svm +linear_transformation +linearization +linearize +linearly +linearly_interpolate +lingua +lingua_libre +lingual +lingual_linking +lingual_transfer +linguist +linguistic +linguistic_acceptability +linguistic_inquiry +linguistic_phenomena +linguistic_phenomenon +linguistic_typology +linguistically +linguistically_applicable +linguistically_inform +linguistically_motivate +linguistically_motivated +linguistically_sound +linguistics +linguistics_literature +linguistique +linguistique_pour +link +linkage +linking +lip +lip_movement +lip_read +lip_reading +list +listen +listener +listener_gaze +listening +listening_comprehension +literacy +literal +literal_copying +literal_figurative +literal_idiomatic +literal_literal +literal_meaning +literal_metaphorical +literal_usage +literary +literary_criticism +literary_fiction +literature +literature_review +lithuanian +litt +litt_erature +little +little_attention +little_know +liu +live +live_broadcast +live_chat +live_commentary +live_demo +live_experience +live_stream +live_streaming +liwc +llama +llama_alpaca +llama_bloom +llama_chat +llama_falcon +llama_gemma +llama_instruct +llama_llama +llama_mistral +llama_mixtral +llama_opt +llama_qwen +llama_vicuna +llava +llava_llava +llm +llms +lmms +lms +load +local +local_assembly +local_global +local_neighborhood +local_optimum +localisation +locality +locality_sensitive +localization +localization_industry +localize +locally +locally_aggregate +locally_linear +locally_normalize +locate +locate_acl +locate_eacl +locate_emnlp +locate_naacl +location +location_indicative +location_mention +location_organisation +location_organization +locuteur +locuteur_qui +locuteurs +locuteurs_fran +log +log_bilinear +log_likelihood +log_linear +log_odd +log_probability +logic +logic_rule +logical +logical_fallacy +logical_form +logical_formula +logical_reasoning +logically +logically_consistent +logically_entail +logically_sound +logistic +logistic_regression +logit +logit_lens +long +long_distance +long_horizon +long_range +long_short +long_stand +long_tail +longformer +longformer_encoder +longitudinal +longstanding +longstanding_challenge +look +look_ahead +lookup +lookup_table +loop +loose +loosely +loosely_couple +lor +lor_une +lora +lora_adapter +lorsqu +lorsqu_elle +lorsqu_sont +lorsque +lorsque_les +lose +lose_middle +loss +loss_function +loss_landscape +lossless +lossless_acceleration +lot +lot_attention +lot_interest +love +low +low_dimensional +low_latency +low_resource +lower +lower_barrier +lower_bind +lrec +lrec_conference +lrl +lrls +lrs +lsp +lstm +lstm_bilstm +lstm_cell +lstm_cnn +lstm_crf +lstm_gru +lstms +lvlm +lyric +lyric_imitation +lyric_motion +lyric_song +macedonian +macedonian_arabic +machine +machine_readable +machine_reading +machine_translation +macro +macro_average +macro_micro +macro_score +mae +magnitude +magnitude_fast +magnitude_pruning +mail +main +main_contribution +main_drawback +main_goal +main_idea +main_motivation +main_obstacle +mainly +mainly_focus +mainstream +maintain +maintenance +mais +mais_aussi +mais_egalement +mais_pas +major +major_bottleneck +major_challenge +major_drawback +major_minor +major_obstacle +majority +majority_vote +majority_voting +make +make_impractical +make_informed +make_sure +make_unsuitable +maker +making +malayalam +malayalam_kannada +malayalam_marathi +malayalam_tamil +malayalam_telugu +male +male_female +malicious +malicious_actor +maltese +man +man_woman +manage +manageable +management +manager +mandarin +mandarin_cantonese +mandarin_chinese +mandarin_tone +mani +mani_ere +manifest +manifestation +manifold +manifold_mixup +manipulate +manipulation +manipuri +manipuri_manipuri +manner +manual +manual_annotation +manual_coding +manual_curation +manual_inspection +manual_labor +manually +manually_annotate +manually_craft +manually_curate +manually_engineer +manually_evaluated +manually_transcribe +manually_transcribed +manuelle +manuellement +manuscript +many +map +map_onto +mapping +mapping_plwordnet +marathi +marathi_hindi +marathi_marathi +marathi_nepali +marathi_punjabi +marathi_sanskrit +marbert +march +marco +marco_passage +marco_trec +margin +margin_loss +marginal +marginal_likelihood +marginal_probability +marginal_relevance +marginalize +marginalized +marginalized_group +marginally +mark +marked +marked_improvement +markedly +marker +market +marketing +marketing_theme +marking +markov +markov_chain +markov_decision +markov_hmm +markov_logic +markov_random +markup +markup_tag +masculine +masculine_feminine +masculine_gender +mask +mask_mlm +mask_modeling +mask_modelling +masked +masking +masking_rate +masking_ratio +masking_strategy +mass +massive +massive_amount +massive_multitask +massive_unlabeled +massively +massively_multilingual +master +match +matching +material +material_science +math +math_equation +mathematic +mathematical +mathematical_expression +mathematical_formulation +mathematical_notation +mathematical_proof +mathematical_reasoning +mathematically +mathematically_equivalent +matrix +matrix_factorization +matrix_multiplication +matter +mature +max +max_institute +max_margin +max_pool +max_pooling +maximal +maximal_loss +maximal_marginal +maximally +maximization +maximize +maximize_likelihood +maximize_mutual +maximize_utilization +maximum +maximum_entropy +maximum_likelihood +maximum_subgraph +may +may_suffice +mbart +mbart_nllb +mbert +mbert_xlm +mbr +mbr_decode +mcq +mcqs +mct +mds +mean +mean_max +mean_median +mean_preservation +mean_reciprocal +mean_square +meaning +meaning_bank +meaningful +meaningfully +means +meanwhile +measurable +measure +measurement +mechanical +mechanical_turk +mechanism +mechanistic +mechanistic_interpretability +media +median +mediate +mediate_communication +mediation +mediation_analysis +medical +medical_consultation +medical_imaging +medical_jargon +medical_licensing +medical_record +medication +medicine +medicine_law +medieval +medieval_french +medieval_latin +medieval_manuscript +mediqa +mediqa_chat +mediqa_corr +medium +medium_monitoring +medium_outlet +medium_platform +medium_sized +meet +meet_exceed +meet_requirement +meet_transcript +meeting +meeting_minute +meeting_summarization +meeting_transcript +meilleur +meilleur_esultat +meilleur_esultats +meilleur_syst +meilleure +mel +mel_frequency +mel_spectrogram +member +member_parliament +membership +membership_inference +meme +meme_meme +meme_troll +memorization +memorize +memorized +memory +memory_bank +memory_bilstm +memory_consumption +memory_footprint +memory_lstm +memory_overhead +memory_replay +memory_requirement +memory_saving +memory_usage +men +men_ees +men_sur +ment +mental +mental_disorder +mental_distress +mental_health +mental_illness +mention +mention_adverse +mention_medication +mere +merely +merge +merge_operation +merging +merit +mesh +mesh_indexing +mesh_term +message +message_board +message_exchange +message_pass +message_polarity +message_send +mesure +mesure_similarit +mesures +mesures_similarit +meta +meta_learner +metadata +metaphor +metaphor_detection +metaphor_novelty +metaphoric +metaphoric_expression +metaphorical +metaphorical_expression +metaphorical_literal +meteor +meteor_bertscore +meteor_rouge +meteor_score +meteor_ter +methodological +methodological_consideration +methodology +meticulous +meticulously +meticulously_annotate +meticulously_craft +meticulously_curate +metric +metric_mqm +mettre +mettre_evidence +micro +micro_average +micro_blog +micro_macro +micro_score +microblog +microblog_message +microblog_post +microblogge +microblogge_platform +microsoft +microsoft_translator +mid +middle +middle_age +middle_east +middle_layer +middle_school +mieux +mieux_comprendre +mieux_les +might +migration +migration_hate +mild +mild_cognitive +milestone +million +million_billion +million_people +mimic +mimic_cxr +min +min_max +mind +mind_map +mind_tom +mine +mine_bitext +mini +mini_batch +minimal +minimal_modification +minimal_recursion +minimal_supervision +minimalist +minimalist_grammar +minimally +minimally_perturb +minimally_supervise +minimise +minimization +minimize +minimize_distance +minimize_interference +minimum +minimum_baye +minimum_description +minimum_risk +mining +mining_health +minor +minor_greek +minor_modification +minority +minority_class +minority_group +minute +mirror +mis +mis_disinformation +misalign +misalignment +misclassification +misconception +mise +mise_fisher +mise_place +misinformation +misinformation_disinformation +misinformation_spread +mislead +misleading +mismatch +misogynistic +misogynistic_aggression +misogynistic_content +misogynistic_meme +misogynous +misogynous_meme +misogyny +misogyny_identification +misogyny_meme +misogyny_sexism +miss +miss_modality +missing +missing_link +missing_modality +mission +mission_critical +misspell +misspelling +mistake +mistral +mistral_falcon +mistral_instruct +mistral_llama +mistral_mixtral +mistral_qwen +mistranslation +misunderstanding +misuse +mit +mit_license +mitigate +mitigate_catastrophic +mitigate_hallucination +mitigate_harm +mitigate_spurious +mitigation +mitigation_strategy +mix +mix_hinglish +mix_kannada +mix_tamil +mixed +mixed_initiative +mixing +mixtral +mixtral_instruct +mixtral_llama +mixture +mixture_expert +mixture_gmm +mixup +mle +mllm +mlm +mlp +mmlu +mmt +mnli +mnli_snli +mnmt +mobile +mobile_app +mobile_device +mobile_phone +mod +mod_ele +mod_eles +mod_elisation +modal +modality +mode +mode_collapse +mode_connectivity +modeling +modeling_mlm +modelling +models +models_llms +moderate +moderate_severe +moderate_sized +moderately +moderately_sized +moderation +moderator +modern +modern_greek +modern_hebrew +modern_society +modern_standard +modest +modest_gain +modification +modifier +modify +modify_version +modular +modularity +modularity_extensibility +modularize +modularize_extensible +modularize_mnmt +modulate +modulation +modulation_spectrum +module +moe +moin +molecular +molecular_property +molecule +molecule_captioning +moment +momentum +momentum_contrastive +money +monitor +monitoring +mono +mono_lingual +mono_multilingual +monolingual +monologue +monotonic +monotonic_alignment +monotonicity +monte +monte_carlo +month +montr +montr_que +montre +montre_egalement +montre_que +montrent +montrent_que +montrent_une +montron +montron_comment +montron_que +mood +moral +moral_foundation +moral_judgment +moral_norm +moral_rhetoric +moral_value +morality +moreover +moroccan +moroccan_arabic +morph +morph_analyzer +morph_msa +morpheme +morpheme_boundary +morpheme_segmentation +morpho +morpho_syntactic +morpho_syntactically +morpho_syntaxique +morphological +morphological_analyser +morphological_analyzer +morphological_inflection +morphological_reinflection +morphological_richness +morphological_segmentation +morphological_tagging +morphological_typology +morphologically +morphologically_complex +morphologically_rich +morphologically_syntactically +morphologique +morphology +morphology_syntax +morphosyntactic +morphosyntactic_tagging +mosei +moses +moses_statistical +mostly +mot +mot_cible +mot_dan +mot_dans +mot_mot +motion +motion_appearance +motion_capture +motivate +motivate_intuition +motivate_observation +motivated +motivation +motivation_behind +motivational +motivational_interview +mots +mots_partir +mots_puis +move +move_away +move_beyond +move_forward +move_towards +movement +mover +mover_distance +movie +movie_plot +movie_review +movie_script +movie_subtitle +movie_ticket +moyen +moyen_une +moyenne +mqm +mrc +mrl +mrp +mrr +mrr_hits +mrr_ndcg +mrs +msa +msa_dialectal +mscoco +msr +mtl +much +much_room +multi +multi_dimensional +multi_faceted +multi_grained +multi_hop +multi_layered +multi_modal +multi_party +multi_tasking +multi_turn +multiclass +multiclass_classification +multiconer +multidimensional +multidimensional_quality +multidisciplinary +multidomain +multidomain_multilingual +multifaceted +multifaceted_nature +multilabel +multilabel_classification +multilayer +multilayer_perceptron +multilingual +multilingualism +multilinguality +multilingue +multimedia +multimedia_automatic +multimedia_content +multimodal +multimodal_lmms +multimodality +multinomial +multinomial_naive +multiparty +multiparty_conversation +multiparty_dialogue +multiple +multiple_choice +multiplication +multitask +multitask_learning +multitaske +multitude +multivariate +multivariate_gaussian +multiword +multiword_expression +multiwoz +multiwoz_multiwoz +multiwoz_sgd +muril +muril_xlm +music +musical +must +must_satisfy +must_tst +mutation +mutual +mutual_information +mutual_intelligibility +mutual_promotion +mutually +mutually_beneficial +mutually_exclusive +mutually_intelligible +mutually_reinforce +mwe +mwp +mwp_solver +mwps +myriad +mystery +naacl +nadi +nadi_share +naist +naist_nict +naive +naive_baye +naive_bayes +naively +name +name_entity +namely +naming +nar +narration +narrative +narrative_schemas +narrow +narrow_gap +narrow_scope +nat +nation +national +national_archive +national_institute +national_international +national_library +national_university +nationality +nationality_bias +native +native_accent +native_identification +native_native +native_script +native_signer +native_speaker +natural +natural_processing +naturalistic +naturally +naturally_arise +naturally_occur +naturalness +nature +naturel +naturel_dans +navigate +navigation +navigation_instruction +ndcg +near +near_duplicate +near_identity +near_neighbor +near_neighbour +near_perfect +near_synonym +near_synonymous +near_synonyms +nearby +nearly +nearly_double +nearly_half +nearly_identical +necessarily +necessary +necessitate +necessity +need +negate +negated +negated_statement +negation +negation_cue +negation_scope +negation_speculation +negative +negative_attitude +negative_consequence +negative_interference +negative_neutral +negative_sample +negative_sampling +negatively +negatively_affect +negatively_correlate +negatively_impact +neglect +negligible +negligible_computational +negligible_impact +negotiation +negotiation_dialogue +negotiation_outcome +neighbor +neighbor_search +neighborhood +neighboring +neighboring_node +neighbour +neighbour_knn +neither +neo +neo_latin +neologism +nepali +ner +nest +nest_name +nest_ner +nested +nested_ner +net +network +network_cnns +network_dnn +network_gan +network_gat +network_gcn +network_gcns +network_rnn +network_rnns +neural +neural_network +neuro +neuro_symbolic +neuron +neuron_activation +neuronale +neuronale_pour +neuronaux +neuronaux_pour +neurone +neurone_pour +neurone_profond +neuroscience +neutral +neutral_negative +neutral_rewriting +never +never_see +nevertheless +new +new_york +newly +newly_collect +newly_compile +newly_create +newly_emerge +news +news_aggregator +news_article +news_editorial +news_headline +news_outlet +news_recommendation +newspaper +newspaper_article +newswire +next +next_iteration +next_token +ngram +niche +nict +nict_participation +nict_submission +nine +ninth +ninth_conference +nist +nist_chinese +niveau +niveau_des +niveaux +nlg +nli +nllb +nlp +nlu +nlu_devanagari +nlu_nlg +nmt +node +node_edge +noise +noise_injection +noisy +noisy_channel +nom +nom_propre +nombre +nombre_mots +nombreuse +nombreuse_ache +nombreuse_application +nombreux +nombreux_travaux +nominal +nominal_antecedent +nominal_compound +nominal_modifier +nominal_predicate +nomm +nomm_ees +none +nonetheless +nonlinear +nonparametric +nonparametric_bayesian +nonsensical +nontrivial +nonverbal +nonverbal_behavior +nonverbal_communication +norm +norm_violation +normal +normalisation +normalise +normalization +normalization_adverse +normalize +normalize_flow +normalize_mutual +normalizing +normalizing_flow +normally +normative +north +north_african +north_america +north_american +north_ami +north_east +north_germanic +north_korean +north_levantine +north_macedonian +northern +northern_ami +norwegian +norwegian_dialect +norwegian_swedish +nos +nos_donn +nos_esultats +nos_exp +nos_mod +nos_travaux +notable +notable_exception +notable_success +notably +notamment +notamment_pour +notation +note +note_discharge +notebook +noteworthy +nothing +notice +noticeable +noticeable_margin +noticeably +notion +notorious +notoriously +notoriously_difficult +notre +notre_approche +notre_connaissance +notre_equipe +notre_ethode +notre_etude +notre_hypoth +notre_mod +notre_objectif +notre_participation +notre_projet +notre_recherche +notre_syst +notre_travail +noun +noun_adjective +noun_compound +noun_ellipsis +noun_noun +noun_phrase +noun_verb +nous +nous_appuyant +nous_aussi +nous_avon +nous_comparon +nous_consid +nous_ecrivon +nous_egalement +nous_ensuite +nous_esenton +nous_esentons +nous_etudion +nous_evaluon +nous_hypoth +nous_int +nous_montron +nous_notre +nous_nous +nous_permet +nous_permettent +nous_permis +nous_proc +nous_proposon +nous_utilison +nouveau +nouveau_corpus +nouveau_jeu +nouveau_mod +nouveaux +nouvelle +nouvelle_ache +nouvelle_approche +nouvelle_ethode +nouvelle_ethodes +novel +novelty +novice +nowadays +nrc +nuance +nuanced +nuanced_arabic +nucleus +nucleus_sampling +nucleus_top +null +null_subject +num +number +number_trainable +numeral +numeral_aware +numeric +numeric_attribute +numeric_value +numerical +numerical_reasoning +numerical_value +numerous +nurse +nut +nut_share +nyt +obfuscate +obfuscation +object +object_detector +object_hallucination +object_naming +object_scene +objectif +objectif_cette +objectif_est +objectif_projet +objectif_travail +objective +objective_function +objectively +objet +obscure +observ +observ_ees +observable +observation +observational +observe +observed +observer +obstacle +obtain +obtenir +obtenir_des +obtenu +obtenu_par +obtenue +obtenue_par +obtenus +obtenus_avec +obtenus_montrent +obtenus_par +obtenus_sont +obtient +obtient_des +obtient_une +obvious +obviously +occasionally +occupation +occupy +occur +occur_frequently +occurrence +occurrence_count +occurrence_statistic +occurring +ocr +ocr_correction +odd +odd_ratio +odqa +offense +offenseval +offenseval_competition +offensive +offensive_identification +offensiveness +offer +offer_insight +offer_promising +offer_valuable +office +official +official_blind +official_leaderboard +official_ranking +official_submission +officially +officially_rank +officially_release +offline +offline_harm +offline_online +offline_reinforcement +offset +often +often_fail +often_fall +often_neglect +often_overlook +often_struggle +oie +old +old_adult +old_irish +ole +ole_dan +omission +omit +one +one_hot +ongoing +ongoing_debate +ongoing_effort +ongoing_project +online +online_abuse +online_course +online_debate +online_discussion +online_forum +online_harassment +online_platform +online_sexism +online_shopping +onset +ont +ont_ealis +ont_evalu +ont_mis +ont_montr +ont_pas +ont_permis +ont_propos +ont_utilis +onto +ontolex +ontolex_lemon +ontolex_morph +ontological +ontology +ontology_population +ontonote +ood +ood_detection +ood_detector +ood_intent +oov +oov_rate +oov_word +opaque +opaque_nature +open +open_door +open_ended +open_license +open_source +open_sourced +openai +openai_chatgpt +openai_gpt +openai_whisper +openie +openly +openly_accessible +openly_available +operate +operation +operational +operational_efficiency +operationalize +operator +opinion +opinion_diversification +opinion_holder +opinion_mining +opinion_spam +opinionate +opportunity +oppose +opposite +opposite_direction +opposition +opt +opt_bloom +opt_llama +optical +optical_character +optical_flow +optimal +optimal_transport +optimality +optimality_theory +optimally +optimisation +optimise +optimistic +optimization +optimization_dpo +optimization_ppo +optimize +optimizer +optimum +option +option_symbol +optional +opus +opus_cat +oracle +oral +oral_history +oral_presentation +oral_proficiency +oral_reading +order +order_logic +order_magnitude +ordering +ordinal +ordinal_classification +ordinal_regression +ordinary +ordinary_differential +org +organisation +organise +organise_part +organization +organization_location +organize +organize_dravidianlangtech +organize_part +organizer +orient +orient_dialog +orient_dialogue +orientation +orientation_location +origin +original +originale +originale_pour +originally +originally_design +originally_develop +originally_write +originate +orthogonal +orthogonal_matrix +orthogonal_transformation +orthographic +orthographic_phonetic +orthographic_phonological +orthographic_syllable +orthographic_transcription +orthographic_variation +orthography +otherwise +outbreak +outcome +outdated +outil +outil_permettant +outil_pour +outils +outlet +outli +outli_dimension +outlier +outlier_dimension +outline +outlook +outperform +output +outside +outside_scope +outstanding +outstanding_performance +overall +overcome +overcome_barrier +overcome_catastrophic +overcome_drawback +overcome_limitation +overcome_obstacle +overcome_shortcoming +overconfidence +overestimate +overfit +overfitte +overfitting +overhead +overlap +overlapping +overload +overlook +overly +overly_optimistic +overly_simplistic +oversample +oversight +overt +overview +overview_alta +overview_ccl +overview_clpsych +overwhelming +owe +owl +owl_ontology +pace +pack +package +page +pagerank +pair +paire +paire_langue +paire_phrase +paired +pairing +pairwise +pairwise_comparison +pairwise_preference +pairwise_rank +palm +pan +pandemic +panel +papi +papi_ecrit +papi_esente +papi_nous +par +par_apprentissage +par_des +par_deux +par_equipe +par_exemple +par_les +par_locuteur +par_rapport +para +paradigm +paradigm_shift +paradigmatic +paradigmatic_relation +paragraph +paralinguistic +parall +parall_ele +parallel +parallel_bible +parallel_corpora +parallelism +param +param_etre +param_etrique +parameter +parameter_count +parameter_efficient +parameter_sharing +parameterization +parameterize +parameterized +parametric +parametric_bayesian +parametric_knowledge +parametric_memory +parametric_parametric +paramount +paramount_importance +paraphrase +paraphrasing +parent +parent_child +pareto +pareto_front +pareto_frontier +pareto_optimal +paris +parity +parl +parliament +parliamentary +parliamentary_debate +parliamentary_proceeding +parmi +parmi_les +parole +parole_avec +parole_chez +parole_est +parole_hearing +parole_spontan +parole_ver +parse +parse_tree +parseme +parseme_share +parser +parsing +parsing_mrp +part +part_speech +partial +partial_diacritization +partially +partially_observable +participant +participant_ask +participate +participate_subtask +participate_team +participation +participation_campagne +participation_deft +participation_equipe +participation_iwslt +participation_semeval +participation_team +participation_wat +participation_wmt +particle +particle_optimization +particle_verb +particle_verbs +particular +particular_emphasis +particularly +particularly_evident +particularly_pronounce +particuli +particuli_ere +particuli_erement +particulier +particulier_nous +partie +partir +partir_des +partir_donn +partir_peu +partir_ressource +partir_texte +partir_une +partition +partly +partly_due +partner +partner_violence +party +party_affiliation +party_conversation +party_discour +party_meeting +pas +pas_cas +pas_compte +pas_encore +pas_etre +pas_mais +pass +passage +passage_reranking +passage_retrieval +passage_retriever +passive +passive_aggressive +passive_voice +past +past_decade +past_tense +patch +patent +patent_office +path +pathological +pathological_description +pathology +pathology_report +pathway +patient +patient_care +patient_doctor +patient_note +patient_physician +patient_record +patient_therapist +patient_visit +patronize +patronize_condescend +pattern +paucity +pause +pave +pave_way +pay +pay_attention +pay_little +pay_special +pbsmt +pca +pcfg +pcfg_induction +pcl +pdf +pdf_document +pdf_file +pdf_format +pdtb +pdtb_pdtb +pdtb_style +peak +pearson +pearson_correlation +pearson_spearman +peculiarity +pedagogical +pedagogy +peer +peer_peer +peer_review +peer_reviewing +peer_tutoring +peft +pegasus +penalize +penalty +penn +penn_discourse +penn_treebank +people +people_disability +people_worldwide +per +per_minute +perceive +percent +percentage +percentage_point +perception +perception_production +perceptive +perceptron +perceptron_mlp +perceptual +perfect +perfectly +perform +perform_comparably +perform_competitively +perform_poorly +perform_reasonably +perform_similarly +performance +performance_degradation +performances +performances_des +performant +performer +performing +perhaps +perhaps_surprisingly +period +permet +permet_eliorer +permet_obtenir +permet_une +permettant +permettent +permettre +permis +permis_des +permit +permutation +permutation_invariant +perpetuate +perpetuate_harmful +perplexity +perplexity_wikitext +persian +persian_farsi +persist +persistent +persistent_challenge +person +person_location +person_name +person_organization +person_singular +persona +persona_chat +personal +personal_assistant +personal_experience +personal_identifiable +personal_narrative +personal_pronoun +personal_trait +personality +personality_profiling +personality_subtype +personality_trait +personalization +personalize +personalized +personally +personally_identifiable +personne +personne_ees +perspective +perspective_taking +persuade +persuasion +persuasion_technique +persuasive +persuasive_essay +persuasive_message +persuasiveness +pertain +pertinence +pertinent +pertinente +pertinente_pour +perturb +perturbation +pervasive +pet +peu +peu_dot +peu_etudi +peu_ressource +peut +peut_etre +peut_permettre +peuvent +peuvent_etre +phase +phenomena +phenomenon +phi +philippine +philosophy +phon +phon_eme +phon_etique +phone +phoneme +phoneme_conversion +phoneme_duration +phoneme_grapheme +phoneme_mapping +phonemic +phonemic_transcription +phonetic +phonetic_phonological +phonetic_prosodic +phonetic_symbol +phonetic_transcription +phonetically +phonetically_balanced +phonetically_similar +phonological +phonological_reconstruction +phonology +phonology_morphology +phonotactic +phonotactic_complexity +photo +photo_realistic +phrasal +phrasal_verb +phrase +phrase_attachment +phrase_parall +phrase_smt +phrase_statistical +phrasing +phylogenetic +phylogenetic_inference +phylogenetic_reconstruction +phylogenetic_tree +physical +physical_activity +physical_appearance +physical_commonsense +physical_harm +physical_object +physical_world +physician +physics +physiological +physiological_signal +pick +picture +picture_description +picture_worth +piece +piece_evidence +pillar +pilot +pilot_study +pinpoint +pinyin +pinyin_input +pioneer +pipeline +pitch +pitch_accent +pitch_contour +pitfall +pivot +pivot_pivot +pivotal +pivotal_role +pixel +place +place_competition +place_leaderboard +placeholder +placement +plagiarism +plagiarism_detection +plague +plain +plan +plan_execute +planner +planning +plateforme +platform +plausibility +plausibility_faithfulness +plausibility_judgement +plausible +plausible_alternative +plausible_clarification +plausible_explanation +play +play_central +play_critical +play_crucial +play_important +play_pivotal +play_vital +player +player_game +playing +playing_game +plenty +plethora +plm +plongement +plongement_lexicaux +plot +plug +plug_play +plugin +plupart +plupart_des +plural +plural_mention +plus +plus_court +plus_difficile +plus_ecis +plus_efficace +plus_elev +plus_faible +plus_grand +plus_grande +plus_importante +plus_particuli +plus_performant +plus_pertinente +plus_plus +plus_proche +plus_que +plus_robuste +plus_souvent +plusieur +plusieur_aches +plusieur_ethodes +plusieur_langue +plusieur_mod +plwordnet +plwordnet_princeton +pmi +poem +poetic +poetry +poetry_prose +point +point_likert +point_view +point_vue +pointer +pointer_generator +pointer_network +pointwise +pointwise_mutual +poison +poison_attack +poison_backdoor +poison_sample +poisoning +poisoning_attack +polar +polar_coordinate +polarity +polarity_intensity +polarization +polarize +policy +policy_gradient +policy_maker +policy_making +policy_optimisation +policy_optimization +policymaker +polish +polish_russian +polite +politeness +politic +politic_sport +political +political_actor +political_affiliation +political_debate +political_discourse +political_ideology +political_multiclass +political_orientation +political_party +political_polarization +political_science +political_scientist +political_spectrum +politically +politician +polynomial +polynomial_time +polysemous +polysemous_word +polysemy +polysynthetic +pool +pool_operation +pooling +pooling_operation +poor +poor_generalization +poorly +poorly_calibrate +poorly_understand +popular +popularity +populate +population +population_intervention +port +portability +portable +portal +portion +portray +portuguese +portuguese_spanish +pos +pos_tag +pos_tagger +pos_tagging +pos_tagset +pose +pose_challenge +pose_serious +pose_severe +pose_significant +pose_threat +pose_unique +posit +position +position_encoding +positional +positional_encoding +positive +positive_negative +positive_neutral +positive_unlabeled +positively +positively_correlate +positively_impact +positively_negatively +possess +possibility +possible +possibly +post +post_edit +post_edited +post_editing +post_edition +post_editor +post_hoc +post_subreddit +posterior +posterior_collapse +posterior_distribution +posterior_probability +posterior_regularization +posting +postulate +potential +potential_misuse +potential_pitfall +potentially +potentially_harmful +potentially_idiomatic +potentially_misleading +potentially_useful +pour +pour_ache +pour_anglais +pour_apprentissage +pour_caract +pour_cela +pour_cette +pour_chaque +pour_compr +pour_construire +pour_eation +pour_ediction +pour_edire +pour_elior +pour_eliorer +pour_enrichissement +pour_entra +pour_eration +pour_erer +pour_esambigu +pour_esolution +pour_esum +pour_etecter +pour_etection +pour_eterminer +pour_etiquetage +pour_etude +pour_etudi +pour_evaluer +pour_extraire +pour_faire +pour_fouille +pour_fran +pour_identifi +pour_les +pour_mieux +pour_objectif +pour_objet +pour_obtenir +pour_permettre +pour_probl +pour_recherche +pour_reconnaissance +pour_rendre +pour_synth +pour_tal +pour_traduction +pour_traitement +pour_traiter +power +power_consumption +power_law +powerful +powerful_tool +ppo +practical +practical_applicability +practical_application +practical_deployment +practical_utility +practicality +practically +practically_useful +practice +practitioner +practitioner_interested +pragmatic +pragmatic_implicature +pragmatically +pragmatically_informative +prague +prague_czech +prague_dependency +pre +pre_computed +pre_defined +pre_determined +pre_existing +pre_requisite +pre_specified +pre_train +pre_trained +precede +precedent +precise +precisely +precision +precision_medicine +precision_recall +predecessor +predefine +predefine_category +predefine_template +predicate +predicate_argument +predict +predict_missing +predictability +predictable +prediction +predictive +predictive_maintenance +predictive_power +predictive_uncertainty +predictor +predictor_estimator +predominant +predominant_sense +predominantly +predominantly_focus +prefer +preferable +preference +preference_optimization +preferred +prefix +prefix_prefix +prefix_suffix +prefix_tuning +prejudice +preliminary +premi +premi_ere +premier +premier_esultats +premier_temps +premise +premise_conclusion +premise_entail +premise_hypothesis +prendre +prendre_compte +preparation +prepare +prepared +preposition +preposition_determiner +prepositional +prepositional_attachment +prepositional_phrase +preprocesse +preprocesse_step +preprocessing +preprocessing_step +prerequisite +presence +presence_absence +present +presentation +presentation_slide +preservation +preservation_fluency +preservation_revitalization +preserve +preserve_integrity +preserve_original +presidential +presidential_campaign +presidential_debate +presidential_election +press +press_need +press_release +pressure +presumably +presupposition +presupposition_trigger +pretrain +pretrain_finetune +pretraine +pretraine_plm +pretraining +prevail +prevalence +prevalent +prevent +prevent_catastrophic +prevent_fitting +prevent_misuse +prevent_overfitte +prevent_overfitting +prevent_spread +prevention +previous +previously +previously_publish +previously_think +previously_unknown +previously_unseen +price +price_fluctuation +price_movement +price_rise +primarily +primarily_concentrate +primarily_due +primarily_focus +primary +primary_care +primary_objective +primary_school +primary_secondary +primary_submission +prime +priming +primitive +princeton +princeton_wordnet +principal +principal_component +principale +principle +principle_govern +principled +principled_way +print +print_book +print_dictionary +prior +prior_work +priori +prioritize +priority +privacy +privacy_concern +privacy_copyright +privacy_guarantee +privacy_law +privacy_leakage +privacy_policy +privacy_preservation +privacy_preserve +privacy_protection +privacy_regulation +privacy_risk +privacy_utility +privacy_violation +private +prize +pro +pro_con +pro_drop +pro_pose +proactive +proactively +proactively_engage +probabilistic +probabilistic_finite +probabilistic_graphical +probability +probability_distribution +probability_mass +probable +probably +probe +probing +probl +probl_ematique +probl_ematiques +probl_eme +problem +problem_mwp +problem_solving +problematic +problematic_webpage +proc +procedural +procedure +proceed +proceeding +process +processing +processing_nlp +processing_sdp +processus +proche +produce +produce_fluent +product +product_catalog +product_review +product_service +product_title +production +production_environment +production_parole +production_ready +productive +productivity +productivity_gain +produit +produit_par +produite +produite_par +profanity +profession +profession_occupation +professional +professional_editor +professional_translator +professionally +professionally_simplify +professionally_translate +professionally_write +proficiency +proficient +profile +profiling +profit +profond +profound +profound_impact +program +program_execution +program_repair +program_synthesis +programme +programmer +programmer_interpreter +programming +programming_ilp +programming_python +progress +progression +progressive +progressively +progressively_refine +prohibit +prohibitive +prohibitively +prohibitively_costly +prohibitively_expensive +project +project_fund +projection +projection_matrix +projective +projective_dependency +projective_transition +projective_tree +projet +proliferation +proliferation_fake +prominence +prominent +promise +promising +promising_avenue +promising_direction +promising_solution +promote +promote_healthy +promotion +prompt +prompt_engineering +prompting +prone +prone_hallucination +prone_overfitte +pronominal +pronominal_anaphora +pronoun +pronoun_resolution +pronounce +pronounced +pronunciation +pronunciation_dictionary +proof +proof_concept +proof_net +propaganda +propaganda_detection +propaganda_technique +propagandistic +propagandistic_meme +propagate +propagation +propbank +propel +propensity +proper +proper_name +proper_noun +properly +property +proportion +proportional +proportional_analogy +propos +propos_ees +propos_par +propos_permet +propos_pour +proposal +proposed +proposer +proposer_des +proposer_une +proposition +proposition_bank +propositional +propositional_logic +proposon +proposon_deux +proposon_etudi +proposon_une +proposon_utiliser +propre +propri +proprietary +prose +prose_poetry +prosodic +prosodic_boundary +prosodic_cue +prosodic_prominence +prosodique +prosodique_fran +prosody +prospect +protect +protect_attribute +protect_copyright +protect_group +protect_health +protect_intellectual +protect_privacy +protection +protection_regulation +protein +protein_interaction +protein_protein +protest +protest_event +protest_news +proto +proto_indo +proto_role +protocol +prototype +prototypical +prototypical_contrastive +prototypical_network +prove +prove_effective +prove_successful +prove_useful +provenance +prover +provide +provide_organizer +provide_valuable +provider +provider_lsp +provision +proximal +proximal_policy +proximity +proxy +prune +pruned +pruning +pruning_quantization +pseudo +pseudo_label +pseudo_labeling +pseudo_mot +pseudo_parallel +psychiatric +psychiatric_disorder +psycho +psycho_linguistic +psycholinguistic +psychological +psychological_counseling +psychological_distress +psychological_questionnaire +psychological_theory +psychology +psychology_literature +psychometric +psychometric_predictive +psychotherapy +psychotherapy_session +ptb +ptb_ctb +public +public_administration +public_health +public_leaderboard +public_opinion +public_sector +public_speaking +public_sphere +publication +publication_venue +publicly +publicly_accessible +publicly_available +publicly_release +publish +publisher +publishing +pubme +pubme_abstract +pubme_central +pubmed +pubmed_abstract +pubmed_arxiv +pubmed_database +puis +puis_nous +pull +pull_close +pull_together +pun +pun_pun +punctuation +punctuation_capitalization +punctuation_insertion +punctuation_mark +punctuation_restoration +punjabi +punjabi_urdu +purchase +purchase_decision +purchase_intention +purchase_product +pure +purely +purpose +pursue +pursuit +push +push_apart +push_away +push_boundary +push_forward +push_limit +put +put_emphasis +put_forth +put_forward +put_together +puzzle +pyramid +python +python_api +python_java +python_library +python_package +python_program +python_toolkit +pytorch +pytorch_library +quac +quadratic +quadratic_complexity +quadratic_weight +qualified +qualit +qualit_des +qualit_voix +qualitative +qualitative_analysis +qualitative_quantitative +qualitatively +qualitatively_quantitatively +quality +quality_assurance +quality_estimation +quantifiable +quantification +quantifie +quantifier +quantifier_scope +quantify +quantify_extent +quantify_uncertainty +quantit +quantit_donn +quantitative +quantitative_qualitative +quantitatively +quantitatively_qualitatively +quantity +quantization +quantization_pruning +quantize +quantize_variational +quantize_weight +quasi +quasi_synchronous +quatre +que +que_approche +que_celle +que_celui +que_cette +que_ceux +que_dur +que_les +que_leur +que_notre +que_nous +que_soit +que_utilisation +quechua +quechua_spanish +quelle +quelle_mesure +quelque +query +query_expansion +query_reformulation +query_rewrite +querying +quest +question +question_answer +question_answering +question_eponse +question_mcqs +questionable +questionable_assumption +questioning +questionnaire +questions +qui +qui_compte +qui_consiste +qui_est +qui_int +qui_les +qui_leur +qui_ont +qui_permet +qui_peut +qui_peuvent +qui_plus +qui_repose +qui_repr +qui_sont +qui_vise +quick +quickly +quickly_accurately +quickly_adapt +quickly_grasp +quite +quiz +quotation +quotation_attribution +quotation_mark +quote +quote_attribution +qwen +race +race_age +race_ethnicity +race_gender +race_religion +racial +racial_bias +racial_ethnic +racial_gender +racial_religious +racial_stereotype +racism +racism_sexism +radical +radio +radio_broadcast +radiologist +radiology +radiology_report +rag +raise +raise_awareness +raise_bar +raise_concern +raise_doubt +raise_serious +ram +ram_ehr +random +random_chance +random_field +random_forest +random_guess +random_guessing +random_initialization +random_masking +random_permutation +random_sampling +random_seed +random_split +random_walk +randomize +randomize_control +randomized +randomized_smoothing +randomly +randomly_choose +randomly_initialize +randomly_mask +randomly_replace +randomly_sample +randomly_select +randomly_shuffle +range +rank +rank_fifth +rank_fourth +rank_leaderboard +rank_mrr +rank_ninth +rank_sixth +ranker +ranking +ranlp +ranlp_share +rapid +rapid_advancement +rapid_development +rapid_dissemination +rapid_evolution +rapid_growth +rapid_pace +rapid_progress +rapid_proliferation +rapid_rise +rapid_spread +rapidly +rapidly_advance +rapidly_evolve +rapidly_grow +rapport +rare +rare_disease +rare_unknown +rare_unseen +rarely +rate +rate_humor +rate_wer +rater +rater_agreement +rater_reliability +rather +rating +rating_likert +ratio +rational +rationale +rationale_behind +rationale_extractor +rationale_investor +rationality +rationalization +raw +ray +ray_image +ray_mimic +ray_report +rdf +rdf_owl +rdf_triple +reach +reach_consensus +reach_satisfactory +react +reaction +read +read_comprehension +read_listen +readability +readability_assessment +readability_formula +readability_index +readable +reader +reader_writer +readily +readily_accessible +readily_applicable +readily_available +reading +reading_comprehension +ready +real +real_life +real_world +realisation +realistic +realistic_scenario +reality +reality_check +realization +realize +really +really_matter +realm +reason +reason_behind +reason_flip +reasonable +reasonably +reasonably_well +reasoner +reasoning +reasoning_civil +reasoning_path +recall +recall_precision +recast +receive +receive_considerable +receive_increase +receive_little +receive_lot +receive_much +receive_submission +recency +recency_bias +recent +recent_advance +recent_advancement +recent_breakthrough +recent_surge +recent_witness +recently +recently_attract +recently_publish +recherche +recherche_dans +recherche_informations +recherche_sur +recherche_tal +recipe +recipe_flow +reciprocal +reciprocal_rank +recognise +recognition +recognition_asr +recognition_multiconer +recognition_ner +recognition_ocr +recognize +recognizer +recommend +recommend_item +recommendation +recommender +recommender_system +reconcile +reconnaissance +reconnaissance_automatique +reconnaissance_des +reconnaissance_entit +reconnaissance_parole +reconsider +reconstruct +reconstruct_original +reconstruct_proto +reconstruction +reconstruction_loss +reconstruction_proto +record +record_ehr +record_ehrs +record_linkage +recording +recover +recover_miss +recover_original +recovery +recruit +recruitment +rectify +recur +recurrence +recurrent +recurrent_convolutional +recurrent_neural +recurrent_unit +recursion +recursive +recursive_autoencoder +recursively +red +red_team +red_teaming +reddit +reddit_comment +reddit_post +redefine +reduce +reduce_burden +reduce_reliance +reduce_workload +reduced +reduction +reduction_perplexity +redundancy +redundant +redundant_computation +reduplication +refer +refer_expression +reference +reference_cefr +reference_free +reference_resolution +referent +referential +referential_game +referential_gaze +referring +referring_expression +refine +refined +refinement +refining +reflect +reflection +reflective +reflective_listening +reformulate +reformulation +reframe +refute +refute_claim +reg +reg_algorithm +regard +regardless +regardless_whether +regeneration +regime +region +regional +regional_accent +regional_dialect +regional_france +regional_variation +register +regression +regression_naive +regression_random +regressive +regressor +regular +regular_expression +regular_polysemy +regularity +regularization +regularization_dropout +regularize +regularized +regularized_dropout +regularizer +regularly +regulate +regulation +regulatory +regulatory_compliance +reinflection +reinforce +reinforcement +reinforcement_learning +reject +rejection +rejection_sampling +relate +relate_observable +related +relatedness +relation +relation_emantiques +relation_extraction +relational +relational_database +relational_fact +relational_triple +relational_tuple +relations +relations_emantiques +relations_entre +relationship +relationship_among +relative +relative_clause +relative_importance +relative_improvement +relative_merit +relative_position +relative_positional +relative_reduction +relative_wer +relatively +relatively_little +relatively_small +relatively_understudied +relatively_unexplored +relax +relax_assumption +relaxation +relaxed +release +release_https +relevance +relevancy +relevant +reliability +reliability_trustworthiness +reliability_validity +reliable +reliably +reliance +reliance_spurious +reliant +relieve +religion +religious +religious_hate +rely +rely_heavily +rely_solely +rely_superficial +remain +remain_challenging +remain_elusive +remain_formidable +remain_largely +remain_opaque +remain_questionable +remain_unanswered +remain_uncertain +remain_unchanged +remain_unclear +remain_underexplored +remain_understudied +remain_unexplored +remain_unresolved +remain_unsolved +remark +remarkable +remarkable_achievement +remarkable_advancement +remarkable_capability +remarkable_proficiency +remarkable_progress +remarkable_success +remarkably +remedy +remember +remote +removal +remove +remove_duplicate +remove_redundant +remove_unwanted +render +rendre +rendre_compte +renew +renew_interest +reorder +reordering +rep +rep_des +rep_entit +rep_erer +repair +repeat +repeatedly +repetition +repetition_penalty +repetitive +repetitive_pattern +rephrase +rephrasing +replace +replace_softmax +replacement +replay +replicate +replication +reply +reply_suggestion +report +report_ccl +reporting +repose +repose_sur +repository +repr +repr_esent +repr_esentation +repr_esentations +repr_esente +repr_esentent +represent +representation +representational +representational_capacity +representational_harm +representational_power +representational_similarity +representative +representativeness +reproduce +reproducibility +reproducible +reproduction +reproduction_study +repurpose +reputation +requ +requ_ete +request +require +require_careful +requirement +requisite +rerank +reranke +reranker +reranking +rescore +rescoring +rescue +research +research_area +research_council +researcher +researcher_developer +researcher_interested +researcher_policymaker +researcher_practitioner +resemble +reshape +reside +residual +residual_connection +residual_stream +resilience +resilient +resistance +resnet +resolution +resolve +resolve_ambiguity +resolve_ambiguous +resolve_conflict +resolve_pronoun +resolver +resort +resource +resource_lrls +respect +respective +respective_strength +respectively +respond +respond_appropriately +respondent +response +responsibility +responsible +responsive +responsive_utterance +ressource +ressource_disponible +ressource_existante +ressource_lexicale +ressource_linguistique +ressource_pour +rest +rest_api +restaurant +restaurant_laptop +restaurant_review +reste +reste_difficile +restoration +restore +restrict +restrict_applicability +restricted +restriction +restrictive +resultant +retain +retention +rethink +retrain +retrain_scratch +retraining +retrieval +retrieval_augment +retrieval_augmented +retrieval_clir +retrieval_dpr +retrieve +retrieve_passage +retrieve_relevant +retrieve_rerank +retrieved +retriever +retriever_dpr +retriever_ranker +retriever_reader +retriever_reranker +retrofit +retrospective +return +return_investment +reusable +reuse +reveal +reveal_interesting +reversal +reversal_curse +reverse +reverse_dictionary +reverse_engineer +reverse_engineering +review +review_helpfulness +reviewer +reviewing +revise +revision +revision_history +revisit +revitalization +revitalization_effort +revolutionize +revolutionize_field +revolutionize_landscape +revolve +revolve_around +reward +reward_function +reward_rlhf +reward_signal +rewrite +rewriting +rhetoric +rhetorical +rhetorical_device +rhetorical_figure +rhetorical_move +rhetorical_psychological +rhetorical_role +rhetorical_structure +rhyme +rhyme_rhythm +rhyme_scheme +rhythm +rich +rich_morphology +rich_mrl +richly +richly_annotate +richness +right +right_lean +right_leave +right_left +right_right +rigid +rigorous +rigorous_testing +rigorously +rigorously_assess +rigorously_evaluate +rise +rise_decline +rise_popularity +rise_prominence +risk +risk_assessment +risk_mbr +risk_minimization +risk_overfitte +rival +rlhf +rmse +rmse_subtask +rnn +rnns +road +road_map +roadmap +roberta +roberta_albert +roberta_deberta +roberta_distilbert +roberta_electra +roberta_liu +roberta_xlm +roberta_xlnet +robot +robot_interaction +robot_navigation +robotic +robust +robuste +robustly +robustly_optimize +robustness +roc +roc_auc +role +role_filler +role_labeler +role_labeling +role_labelling +role_play +role_playing +roman +roman_script +roman_urdu +romance +romanian +romanian_wordnet +romanize +romanize_hindi +room +room_improvement +room_room +root +root_affix +root_cause +root_mean +root_node +rotation +rouge +rouge_bertscore +rouge_meteor +rouge_rouge +rouge_score +rough +roughly +roughly_divide +round +round_trip +route +router +routine +routing +row +row_column +rst +rst_discourse +rst_parse +rst_pdtb +rst_tree +rte +rubric +rule +rule_govern +rumor +rumor_detection +rumor_veracity +rumor_verification +rumour +rumour_stance +rumour_veracity +rumour_verification +run +runtime +runtime_complexity +russia +russia_ukraine +russian +russian_ukraine +russian_ukrainian +rwth +rwth_university +rwth_weather +sacrebleu +sacrifice +sad +sadness +sadness_anger +sadness_fear +sadness_surprise +safe +safe_biomedical +safe_deployment +safe_online +safeguard +safety +safety_guardrail +safety_vulnerability +sale +sale_talk +salience +saliency +saliency_map +salient +sample +sampling +sampling_strategy +san +sans +sanskrit +sarcasm +sarcasm_detection +sarcasm_irony +sarcastic +sarcastic_sarcastic +sarcastic_tweet +sari +sari_score +satisfaction +satisfaction_estimation +satisfactory +satisfy +satisfy_constraint +satisfy_criterion +satisfy_curiosity +satisfy_requirement +save +save_time +saving +say +say_say +sbert +scalability +scalable +scalar +scalar_adjective +scalar_implicature +scale +scaling +scaling_law +scan +scarce +scarcity +scarcity_annotated +scatter +scatter_across +scenario +scene +scene_graph +sch +sch_winograd +schedule +schedule_sampling +scheduling +schema +schema_induction +schemas +schemata +scheme +schizophrenia +scholar +scholarly +scholarly_article +scholarly_document +scholarly_publication +school +school_child +school_exam +school_math +school_student +scibert +science +science_exam +science_fiction +science_humanity +science_journalism +scientific +scientific_article +scientific_discipline +scientific_excerpt +scientific_figure +scientific_literature +scientific_publication +scientific_writing +scientifique +scientist +scope +scope_negation +scope_resolution +score +score_higher +scorer +scoring +scoring_aes +scoring_function +scoring_rubric +scrape +scratch +screen +screening +script +scrutinize +scrutiny +sdp +sdp_workshop +sds +sea +seamless +seamless_integration +seamlessly +seamlessly_integrate +search +search_engine +search_mct +searchable +searching +second +second_acquisition +second_edition +second_esl +second_learner +second_place +secondary +secondary_school +secondly +section +section_header +section_title +sector +secure +secure_fourth +secure_place +secure_position +secure_rank +security +security_concern +security_risk +security_threat +security_vulnerability +see +see_surge +see_unseen +seed +seed_lexicon +seek +seeker +seeker_internal +seem +seemingly +segment +segmental +segmentation +segmentation_cws +segmentation_ematique +segmentation_punctuation +segmenter +sein +sein_des +seldom +select +select_appropriate +select_informative +select_suitable +selection +selectional +selectional_preference +selectional_restriction +selective +selective_gate +selective_masking +selective_rationalization +selectively +selector +self +self_attention +self_attentional +self_attentive +self_consistency +self_disclosure +self_pace +self_rationalization +self_reflection +self_regulate +self_supervise +selon +selon_diff +selon_les +selon_leur +selon_une +semantic +semantic_parsing +semantic_relatedness +semantically +semantically_coherent +semantically_enrich +semantically_equivalent +semantically_meaningful +semantically_relate +semantically_similar +semantically_syntactically +semantically_unrelated +semantics +semeval +semeval_competition +semeval_hyperpartisan +semeval_multiconer +semeval_multidomain +semeval_multimedia +semeval_offenseval +semeval_patronize +semi +semi_automated +semi_automatic +semi_markov +semi_parametric +semi_structured +semi_supervis +semi_supervised +seminal +seminal_work +sen +send +senior +sens +sense +sense_disambiguation +sense_induction +sense_inventory +sensible +sensitive +sensitive_hashing +sensitivity +sensor +sensory +sensory_experience +sensory_modality +sentence +sentential +sentential_paraphrase +sentiment +sentiment_analysis +sentiment_polarity +sentiment_triplet +sentimental +separate +separately +separation +seq +seq_seq +sequence +sequence_labeling +sequence_labelling +sequence_sequence +sequence_tagging +sequence_transduction +sequential +sequentially +serbian +serbian_croatian +serial +serial_verb +series +serious +serious_concern +serious_consequence +serious_game +serious_threat +seriously +serve +serve_foundation +serve_proxy +serve_starting +serve_valuable +server +server_client +server_side +service +service_center +service_infrastructure +service_provider +ses +session +set +setting +setup +seven +seventh +several +severe +severe_consequence +severely +severely_depressed +severely_endanger +severely_hinder +severely_limit +severity +severity_depression +sex +sexism +sexism_detection +sexism_misogyny +sexism_racism +sexist +sexist_content +sexist_sexist +sexual +sexual_abuse +sexual_harassment +sexual_orientation +sft +sft_rlhf +sgd +shall +shallow +shallow_discourse +shallow_heuristic +shape +shapley +shapley_additive +shapley_value +share +shared +shared_encoder +sharing +sharp +shed +shed_light +sheer +sheer_volume +shelf +shift +shoot +shoot_cross +shopping +shopping_experience +short +short_term +shortage +shortage_annotated +shortcoming +shortcut +shorten +shot +shot_prompting +shot_setting +showcase +showcase_efficacy +showcase_impressive +showcase_potential +showcase_remarkable +shrink +shuffle +siamese +siamese_convolutional +siamese_network +side +side_effect +side_side +sighan +sigmorphon +sigmorphon_share +sigmorphon_unimorph +sign +sign_asl +sign_avatar +sign_depression +sign_gloss +signal +signature +signer +signer_independent +significance +significance_testing +significant +significant_advancement +significant_hurdle +significant_improvement +significant_leap +significant_margin +significant_portion +significant_progress +significant_room +significant_stride +significantly +significantly_degrade +significantly_outperform +significative +significative_entre +signify +signing +signing_avatar +silent +silent_speech +silver +silver_standard +simcse +similaire +similar +similar_dissimilar +similar_dsl +similarit +similarit_emantique +similarit_entre +similarit_textuelle +similarity +similarity_relatedness +similarity_sts +similarly +similarly_sized +simile +simple +simple_yet +simplicity +simplification +simplified +simplified_version +simplify +simplistic +simply +simply_concatenate +simulate +simulated +simulated_environment +simulation +simulation_environment +simulator +simultaneous +simultaneous_interpretation +simultaneous_interpreter +simultaneous_interpreting +simultaneous_lecture +simultaneously +since +since_inception +single +single_gpu +singular +singular_plural +singular_value +sinhala +sinhala_tamil +sit +site +situate +situated +situated_dialogue +situated_interactive +situation +situational +situational_awareness +six +six_month +sixth +sixth_edition +sizable +sizable_improvement +size +size_fit +sizeable +sized +skeleton +sketch +sketch_engine +skew +skewed +skewed_distribution +skill +skill_neuron +skip +skip_connection +skip_gram +skip_thought +slang +slavic +slice +slide +slide_window +slight +slight_improvement +slight_modification +slightly +slightly_bad +slm +slot +slot_fill +slot_filling +slot_tagging +slot_value +slovak +slovene +slovene_croatian +slovenian +slow +slow_convergence +slow_expensive +slow_inference +slt +slt_track +slu +small +small_amount +small_fraction +small_portion +small_sized +small_slm +small_subset +smart +smart_assistant +smart_device +smart_home +smart_phone +smart_reply +smart_speaker +smartphone +smatch +smatch_score +smile +smm +smm_share +smm_workshop +smooth +smooth_transition +smoothing +smoothly +sms +sms_message +smt +snapshot +snip +snippet +snli +snli_mnli +snome +social +social_anxiety +social_governance +social_media +social_medium +social_norm +social_responsibility +social_science +social_scientist +socially +socially_acceptable +socially_aware +socially_intelligent +socially_responsible +socially_unacceptable +societal +societal_bias +societal_harm +societal_impact +societal_norm +society +socio +socio_cultural +socio_demographic +socio_economic +socio_political +sociolinguistic +sociological +soft +soft_prompt +soft_voting +softmax +softmax_layer +software +software_developer +software_development +software_engineering +software_package +soit +solely +solely_rely +solicit +solid +solid_foundation +solution +solve +solve_math +solve_mwps +solve_problem +solver +solving +someone +someone_else +something +sometimes +sometimes_even +sometimes_fail +somewhat +somewhat_surprisingly +son +son_int +song +song_lyric +sonnet +sont +sont_associ +sont_bas +sont_bien +sont_compar +sont_disponible +sont_ees +sont_egalement +sont_ensuite +sont_entra +sont_esent +sont_etudi +sont_evalu +sont_men +sont_moin +sont_pas +sont_plus +sont_proche +sont_propos +sont_souvent +sont_utilis +soon +sophisticated +sort +sota +sound +sound_change +sound_correspondence +soundness +source +source_side +sourced +sous +sous_cat +sous_ensemble +sous_forme +south +south_africa +south_african +south_america +south_american +south_ami +south_asia +south_asian +south_east +south_slavic +southern +southern_african +southern_min +southern_region +souvent +space +spacy +spain +spam +spam_detection +span +spanish +spanish_basque +spanish_catalan +spanish_portuguese +spark +spark_interest +sparql +sparql_query +sparse +sparse_autoencoder +sparse_dense +sparse_mixture +sparse_subnetwork +sparsely +sparsely_activate +sparseness +sparsity +spatial +spatial_arrangement +spatial_configuration +spatial_preposition +spatial_temporal +spatially +spatio +spatio_temporal +speak +speak_slovenian +speak_southern +speaker +speaker_addressee +speaker_commitment +speaker_diarization +speaker_listener +speaker_verification +speaking +speaking_assessment +speaking_style +spearman +spearman_correlation +spearman_rank +special +special_emphasis +special_token +special_treatment +specialise +specialised +specialist +specialization +specialize +specialized +specially +specially_design +specific +specifically +specifically_tailor +specification +specificity +specified +specify +spectral +spectral_clustering +spectral_decomposition +spectrogram +spectrum +spectrum_disorder +speculation +speculation_negation +speculation_scope +speculative +speculative_decode +speculative_decoding +speculative_sampling +speech +speech_recognition +speech_recognizer +speech_synthesis +speech_tts +speed +speedup +spell +spell_check +spell_checker +spell_checking +spell_correction +spell_error +spell_grammar +spell_mistake +spelling +spelling_check +spelling_checker +spelling_convention +spelling_correction +spelling_corrector +spelling_error +spelling_mistake +spelling_normalisation +spelling_variant +spelling_variation +spend +sphere +spider +spider_dev +spite +spite_simplicity +split +split_antecedent +split_rephrase +splitting +spoiler +spoiler_clickbait +spoken +spoken_dialogue +spoken_understanding +spontan +spontan_ees +spontaneous +spontaneous_speak +spontaneous_speech +sport +sport_game +spot +spotlight +spread +spread_fake +spread_false +spread_harmful +spread_hateful +spread_hatred +spread_misinformation +spur +spur_advancement +spur_future +spurious +spurious_association +spurious_correlation +sql +sql_query +squad +squad_marco +squad_triviaqa +square +square_error +square_kernel +srl +ssl +ssn +ssn_edi +ssn_nlp +ssn_semeval +sst +stability +stabilize +stable +stable_diffusion +stack +stack_bidirectional +stack_exchange +stack_lstm +stack_lstms +stack_pointer +stack_rnns +stacking +stacking_ensemble +staff +stage +stake +stakeholder +stance +stance_detection +stand +stand_alone +standalone +standard +standard_deviation +standard_orthography +standardised +standardization +standardize +standardized +standardized_orthography +standpoint +stanford +stanford_dependencie +star +start +starting +starting_point +state +state_art +state_automata +state_tracker +state_tracking +state_transducer +statement +states +static +static_contextualised +static_contextualized +static_dynamic +statistic +statistical +statistical_machine +statistical_significance +statistically +statistically_significant +statistique +statistique_pour +status +stay +steadily +steadily_increase +steady +steer +steering +steering_vector +stem +step +step_toward +step_towards +stepwise +stereotype +stereotypical +stereotypical_association +stereotypical_bias +still +still_face +still_fall +still_lag +still_room +still_struggle +still_suffer +still_unclear +still_underperform +still_unsatisfactory +stimulate +stimulate_interest +stimulate_research +stimulus +stochastic +stochastic_gradient +stock +stock_market +stock_movement +stock_price +stock_return +stock_trading +stone +stop +stop_criterion +storage +storage_cost +storage_requirement +store +story +story_cloze +story_continuation +story_ending +story_tell +storyline +storytelle +storytelling +straight +straight_forward +straightforward +straightforward_yet +strat +strat_egie +strat_egies +strategic +strategic_planning +strategically +strategically_select +strategy +stream +streaming +streaming_service +streamline +streamline_workflow +streamlined +street +street_journal +strength +strength_limitation +strength_shortcoming +strength_weakness +strengthen +stress +stress_disorder +stress_importance +stress_placement +strict +strictly +strictly_local +stride +strike +strike_balance +string +string_kernel +string_matching +string_transduction +strive +stroke +strong +strong_baseline +strong_competitor +strongly +strongly_associate +strongly_correlate +strongly_depend +strongly_influence +strongly_prefer +structur +structur_ees +structural +structural_priming +structurally +structurally_ambiguous +structurally_similar +structure +structured +structured_perceptron +structured_pruning +structured_unstructured +structuring +struggle +struggle_generalize +sts +student +student_essay +studied +studio +study +style +style_transfer +stylistic +stylistic_variation +stylize +stylometric +sub +sub_optimal +sub_sentential +subdomain +subgraph +subgroup +subject +subject_heading +subject_matter +subject_object +subject_predicate +subject_verb +subjective +subjective_nature +subjectivity +submission +submission_americasnlp +submission_huawei +submission_iwslt +submission_sigmorphon +submission_wat +submission_wmt +submit +submit_run +submit_team +subnetwork +suboptimal +subpar +subreddit +subsequence +subsequent +subsequently +subset +subspace +substance +substantial +substantial_gain +substantial_improvement +substantial_margin +substantial_room +substantially +substantially_bad +substantially_outperform +substantiate +substantiate_claim +substitute +substitution +substitution_lexicale +substring +substructure +subtask +subtask_subtask +subtitle +subtle +subtle_difference +subtle_nuance +subtlety +subtree +subtype +subword +subword_regularization +subword_segmentation +subword_tokenization +subword_tokenizer +subword_tokens +subword_unit +succeed +succeed_fail +success +success_failure +success_rate +successful +successfully +successfully_apply +successive +succinct +suffer +suffer_catastrophic +suffer_exposure +suffer_insufficient +suffer_poor +suffer_severe +suffer_shortage +suffice +sufficiency +sufficiency_comprehensiveness +sufficient +sufficiently +suffix +suffix_array +sugg +sugg_ere +sugg_erent +suggest +suggestion +suggestion_mining +suicidal +suicidal_ideation +suicidal_risk +suicidal_thought +suicide +suicide_attempt +suicide_ideation +suicide_note +suicide_prevention +suicide_risk +suit +suitability +suitable +suite +suited +sujet +sum +summarisation +summarise +summarization +summarization_mds +summarize +summarizer +summary +summary_worthy +sup +sup_port +super +superficial +superficial_cue +superficial_pattern +superglue +superglue_benchmark +superior +superior_performance +superiority +supersense +supertagge +supervis +supervis_ees +supervis_pour +supervise +supervised +supervision +supervision_signal +supplement +supplementary +supplementary_material +supply +supply_chain +support +support_refute +support_vector +supportive +suppose +suppress +supreme +supreme_court +sur +sur_ache +sur_apprentissage +sur_des +sur_deux +sur_etection +sur_jeu +sur_les +sur_plusieur +sur_qualit +sur_quatre +sur_trois +sur_une +sur_utilisation +sure +surface +surface_form +surface_realisation +surface_realization +surge +surge_interest +surpass +surpasse +surprisal +surprisal_entropy +surprisal_estimate +surprisal_theory +surprise +surprising +surprisingly +surrogate +surround +surround_context +surveillance +survey +susceptibility +susceptible +suspect +sustainability +sustainability_report +sustainable +sustainable_development +svd +svm +svm_classifier +svm_idf +svm_ive +svm_logistic +svm_xgboost +svms +swahili +swap +swedish +swedish_framenet +swiss +swiss_german +switch +switchboard +switchboard_corpus +switching +syllable +symbol +symbol_grounding +symbolic +symbolic_planner +symbolic_solver +symmetric +symmetry +symmetry_inversion +symptom +symptom_diagnosis +symptom_disease +symptom_severity +symptom_status +synchronize +synchronous +synchronous_grammar +synchronous_tree +synergistic +synergy +synonym +synonym_antonym +synonym_replacement +synonym_substitution +synonymous +synonyms +synonymy +synonymy_antonymy +synonymy_hypernymy +synset +syntactic +syntactical +syntactically +syntactically_annotate +syntactically_control +syntactically_inform +syntactically_motivated +syntactically_semantically +syntagmatic +syntagmatic_paradigmatic +syntax +syntax_aware +syntax_tree +syntaxe +syntaxe_emantique +syntaxique +syntaxique_ependance +syntaxique_fran +syntaxiques +synth +synth_ese +synth_etique +synthesis +synthesize +synthesized +synthesized_speech +synthetic +synthetic_animation +synthetically +synthetically_generate +syst +syst_ematique +syst_ematiques +syst_eme +system +systematic +systematic_comparison +systematic_exploration +systematic_investigation +systematically +systematically_manipulate +systemic +systemic_functional +systems +table +table_cell +table_column +tabular +tac +tac_kbp +tackle +tackle_issue +tackle_problem +tacre +tag +tag_lemmatize +tagalog +tagger +tagging +tagging_lemmatization +tagset +tail +tail_distribution +taille +taille_des +tailor +tailored +taiwan +taiwan_mandarin +taiwanese +take +take_account +take_advantage +take_consideration +take_inspiration +take_place +taking +tal +tal_nous +tale +talk +talk_page +taln +tamil +tamil_kannada +tamil_malayalam +tamil_tamil +tamil_telugu +tamil_tulu +tandem +tap +target +target_audience +target_side +targeted +tasking +tasks +taux +taux_erreur +tax +tax_law +taxonomic +taxonomic_hierarchy +taxonomy +taxonomy_enrichment +taxonomy_expansion +taxonomy_induction +teach +teacher +teacher_force +teacher_student +teaching +teaching_material +team +team_participate +team_register +team_semeval +team_ssn +team_submit +tech +technical +technically +technique +technological +technological_advance +technological_advancement +technology +technology_hlt +ted +ted_lecture +ted_talk +tedious +tedious_costly +tedious_time +tei +tei_encode +tei_xml +tel +tel_que +tel_syst +telephone +telephone_conversation +television +tell +tell_que +tell_story +telle +telle_que +telugu +telugu_kannada +telugu_malayalam +telugu_tamil +temp +temp_nous +temperature +temperature_sampling +temperature_scaling +tempeval +template +temporal +temporal_misalignment +temporal_ordering +temporal_orientation +temporally +temporally_order +temporelle +temps +temps_nous +ten +ten_thousand +tend +tend_favor +tend_hallucinate +tend_overfit +tendency +tendency_hallucinate +tense +tense_aspect +tense_mood +tension +tensor +tensor_decomposition +tensor_factorization +tensor_product +tensor_tensor +ter +ter_bleu +term +term_memory +terme +terminological +terminological_database +terminological_resource +terminology +terminology_management +test +test_bed +test_set +test_suite +testbe +testbed +testing +testing_phase +testset +text +textbook +texte +texte_arabe +texte_automatiquement +texte_clinique +texte_court +texte_dans +texte_deft +texte_ecrit +texte_edicaux +texte_est +texte_fran +texte_litt +texte_par +texte_qui +texte_scientifique +textual +textual_entailment +textual_relatedness +textual_similarity +textuel +textuelle +thai +thank +thematic +thematic_fit +theme +theorem +theorem_prove +theorem_prover +theoretic +theoretic_characterization +theoretic_perspective +theoretical +theoretical_empirical +theoretical_foundation +theoretical_guarantee +theoretical_justification +theoretical_practical +theoretically +theoretically_empirically +theoretically_ground +theoretically_motivated +theoretically_prove +theoretically_sound +theory +theory_mind +theory_posit +theory_rst +therapeutic +therapist +therapist_client +therapy +therapy_session +thereby +therefore +therein +thereof +thesauri +thesaurus +thesis +thesis_proposal +thesis_statement +thing +think +thinking +third +third_fourth +third_party +third_person +third_place +third_workshop +thirdly +thirteen +thirty +thirty_five +thorough +thorough_examination +thorough_investigation +thoroughly +thoroughly_analyze +thoroughly_examine +thoroughly_investigate +though +thought +thought_cot +thought_feeling +thought_prompting +thousand +thread +threat +threaten +three +threefold +threshold +throughout +throughout_entire +throughput +thus +tibetan +ticket +ticket_booking +ticket_hypothesis +tie +tier +tier_strictly +tight +tight_coupling +tight_integration +tightly +tightly_connect +tightly_couple +tightly_integrate +time +time_consume +time_consuming +time_interval +time_money +time_period +time_series +time_spend +timebank +timebank_dense +timeline +timeline_summarization +timely +timely_disclosure +timely_manner +timeml +times +times_nyt +timestamp +timing +tiny +tiny_fraction +tion +tip +tip_tongue +title +title_abstract +title_body +title_prose +tkg +tkg_reasoning +tod +today +today_digital +today_society +together +token +tokenisation +tokenization +tokenization_lemmatization +tokenization_scheme +tokenize +tokenizer +tokens +tom +tonal +tone +tongue +tool +toolbox +toolkit +toolkit_pytorch +top +top_bottom +top_nucleus +top_performer +top_tier +topic +topic_continuity +topic_modeling +topic_modelling +topical +topical_coherence +topically +topically_coherent +topically_relate +topological +topology +total +total_duration +total_team +totally +touch +touch_upon +tourism +tous +tous_les +tout +tout_abord +tout_etant +tout_les +toward +towards +toxic +toxic_comment +toxic_content +toxic_degeneration +toxic_engaging +toxic_span +toxicity +toxicity_mitigation +trac +trac_consortium +trac_share +trace +trace_back +track +track_parseme +tracker +tracking +tractable +traction +trade +tradeoff +trading +tradition +traditional +traditionally +traduction +traduction_automatique +traduction_parole +traduction_statistique +traffic +train +trainable +trainable_parameter +trained +training +trait +traitement +traitement_automatique +traitement_des +traitement_langage +traiter +trajectory +trans +transaction +transcribe +transcribe_audio +transcribed +transcribed_speak +transcribed_speech +transcript +transcription +transcription_automatique +transcription_bottleneck +transcription_convention +transducer +transducer_cascade +transducer_fst +transducer_sst +transduction +transduction_grammar +transductive +transfer +transferability +transferable +transform +transformation +transformative +transformative_potential +transformer +transformer_vaswani +transition +transition_parser +transitive +transitive_verb +transitivity +translate +translation +translational +translational_correspondence +translationese +translator +translator_productivity +transliterate +transliterated +transliteration +transliteration_mining +transmission +transmit +transparency +transparency_controllability +transparency_explainability +transparency_reproducibility +transparent +transphobia +transphobia_detection +transport +travail +travail_esent +travail_est +travail_nous +travaux +travaux_ant +travaux_ont +travaux_sur +travel +travel_planning +traver +traversal +traverse +treat +treat_equally +treatment +treatment_outcome +treatment_plan +trec +trec_covid +tree +tree_adjoin +tree_ast +tree_kernel +tree_linearization +tree_transducer +treebank +treebank_ctb +treebank_pdtb +tremendous +tremendous_amount +tremendous_progress +tremendous_success +trend +tri +tri_gram +triage +trial +trial_report +trick +trigger +trigger_warning +trigram +trip +triple +triplet +triplet_extraction +triplet_loss +trivial +triviaqa +trois +trois_aches +trois_ethodes +trois_langue +troll +troll_meme +troll_troll +trolling +trolling_aggression +trouble +trouble_parole +true +true_false +truly +truncation +trust +trustworthiness +trustworthy +truth +truthful +truthful_deceptive +truthfulness +truthfulness_claim +try +tsc +tsc_participation +tsc_submission +tsc_wmt +tst +tts +tulu +tunable +tunable_parameter +tune +tuned +tuning +tunisian +tunisian_arabic +tunisian_dialect +tuple +turbo +turbo_gpt +ture +ture_test +turk +turkish +turn +turn_taking +tutor +tutorial +tutorial_cover +tutoring +tweet +tweet_intimacy +tweets +tweets_fran +twelve +twenty +twice +twice_fast +twitter +twitter_bot +twitter_facebook +twitter_instagram +twitter_message +twitter_reddit +twitter_weibo +two +two_fold +two_stage +twofold +twofold_first +type +typed +typical +typically +typing +typo +typo_correction +typological +typological_database +typologically +typologically_distant +typologically_diverse +typology +uas +uas_las +ubiquitous +ubiquity +ubuntu +ucca +ucca_amr +udpipe +uima +ukraine +ukraine_conflict +ukrainian +ukrainian_ukrainian +ukrainian_war +ultimate +ultimate_goal +ultimately +ultra +ultra_fine +ultra_low +umls +umls_ontology +unable +unacceptable +unaligned +unambiguous +unannotate +unannotated +unanswerable +unanswerable_question +unanswered +unanswered_question +unavailability +unavailable +unaware +unbalanced +unbiased +unbounded +uncertain +uncertainty +uncertainty_estimation +uncertainty_quantification +unchanged +unclear +unclear_extent +unclear_whether +uncommon +unconstrained +uncover +uncover_surprising +underestimate +underexplored +underexplored_area +undergo +undergraduate +undergraduate_student +underlie +underline +underline_importance +underlying +underlying_assumption +underlying_cause +undermine +undermine_reliability +underperform +underpin +underrepresented +underrepresented_group +underscore +underscore_efficacy +underscore_importance +underscore_necessity +underscore_need +underscore_potential +underscore_significance +underscore_urgent +underspecified +understand +understandable +understanding +understanding_nlu +understanding_slu +understudied +understudy +undertake +underutilize +undesirable +undesirable_behavior +undesirable_bias +undesired +undesired_behavior +une +une_ache +une_analyse +une_approche +une_autre +une_bonne +une_certaine +une_cha +une_combinaison +une_comparaison +une_connaissance +une_corr +une_ecision +une_elioration +une_emonstration +une_etape +une_ethode +une_ethodologie +une_etrique +une_etude +une_exp +une_fois +une_fonction +une_forme +une_grammaire +une_grande +une_indexation +une_langue +une_meilleure +une_mesure +une_nouvelle +une_partie +une_plateforme +une_premi +une_proc +une_repr +une_requ +une_ressource +une_strat +une_telle +uneven +uneven_distribution +unexpected +unexplored +unexplored_area +unfair +unfair_clause +unfaithful +unfamiliar +unfold +unfortunately +ungrammatical +ungrammatical_sentence +uni +uni_directional +uni_modal +unicode +unidirectional +unidirectional_bidirectional +unification +unified +uniform +uniform_mean +uniformity +uniformly +uniformly_distribute +unify +unigram +unigram_bigram +unigram_frequency +unimodal +unimodal_bimodal +unimodal_multimodal +unimorph +unimportant +uninformative +unintended +unintended_bias +union +unique +unique_characteristic +unique_identifier +unique_opportunity +uniquely +uniqueness +unit +unit_edu +unit_gru +unit_lexicale +unite +united +united_states +universal +universal_dependency +universality +universally +universally_applicable +university +university_edinburgh +university_exam +university_helsinki +university_student +university_submission +university_university +unknown +unknown_intent +unlabeled +unlabeled_attachment +unlabelled +unlearn +unlearning +unleash +unleash_potential +unleash_power +unless +unlike +unlike_conventional +unlike_previous +unlike_prior +unlike_traditional +unlikely +unlock +unlock_full +unnatural +unnecessary +unobserved +unprecedented +unprecedented_opportunity +unpredictable +unravel +unrealistic +unrealistic_assumption +unrelated +unreliable +unresolved +unrestricted +unrestricted_coreference +unsafe +unsafe_behavior +unsafe_response +unsatisfactory +unseen +unsegmented +unsolved +unsolved_problem +unstable +unstructured +unsuitable +unsupervise +unsupervised +unsupervised_manner +unsupported +untapped +untapped_potential +unusual +unveil +unwanted +unwanted_bias +upcoming +update +updating +upgrade +upload +upon +upon_acceptance +upon_publication +upon_request +upper +upper_bind +upper_bound +upper_layer +upper_ontology +upstream +upto +uralic +urban +urban_planning +urban_scene +urdu +urdu_roman +urgent +urgent_need +url +url_citation +url_https +usability +usable +usage +useful +usefulness +useless +user +user_engagement +user_friendly +user_request +user_satisfaction +user_simulator +usual +usually +utile +utile_pour +utilis +utilis_comme +utilis_dan +utilis_ees +utilis_par +utilis_pour +utilisant +utilisant_des +utilisant_les +utilisant_une +utilisateur +utilisation +utilisation_des +utilisation_mod +utilisation_outil +utilise +utiliser +utilison +utilison_des +utility +utilization +utilize +utmost +utmost_importance +utter +utterance +uyghur +vaccination +vaccination_debate +vaccine +vae +vae_vae +vague +valence +valence_arousal +valency +valency_dictionary +valency_frame +valency_lexicon +valeur +valid +validate +validate_effectiveness +validate_efficacy +validate_feasibility +validate_superiority +validation +validity +validity_novelty +valuable +valuable_asset +valuable_insight +value +value_cache +van +vanilla +vanish +vanish_gradient +vardial +vardial_evaluation +vardial_locate +vardial_workshop +vari +variability +variable +variable_length +variance +variant +variation +variational +variational_auto +variational_autoencoder +variational_baye +variational_inference +variational_posterior +varied +variety +variety_geolocation +various +vary +vary_considerably +vary_degree +vary_depend +vary_greatly +varying +varying_level +vast +vast_amount +vast_array +vast_majority +vast_quantity +vastly +vastly_outperform +vaswani +vec +vector +vector_quantize +vector_space +vehicle +venue +ver +ver_des +ver_une +veracity +veracity_claim +veracity_prediction +verb +verb_adjective +verb_agreement +verb_alternation +verb_construction +verb_noun +verb_particle +verb_tense +verb_valency +verbal +verbal_abuse +verbal_communication +verbal_cue +verbal_fluency +verbal_idiom +verbal_inflection +verbal_intelligence +verbal_irony +verbal_leakage +verbal_morphology +verbal_multiword +verbal_mwe +verbal_nonverbal +verbal_verbal +verbalization +verbalize +verbalizer +verbatim +verbatim_memorization +verbatim_transcript +verbnet +verbnet_propbank +verbose +verbs +verdict +verifiability +verifiable +verification +verification_fever +verifier +verify +verify_claim +verify_correctness +verify_effectiveness +versa +versatile +versatility +verse +verse_poetry +version +versus +vertex +vertical +vertical_thinking +via +viability +viable +viable_alternative +viable_option +viable_solution +vice +vice_versa +victim +vicuna +vicuna_llama +vid +video +video_captioning +video_clip +video_moment +video_recording +vietnamese +view +viewer +viewpoint +violate +violation +violence +violence_incite +virtual +virtual_agent +virtual_assistant +virtual_environment +virtual_patient +virtual_reality +virtual_signer +virtually +vis +vis_vis +visant +vise +visibility +visible +vision +vision_lvlm +vision_navigation +vision_vlm +visit +visual +visual_auditory +visual_grounding +visual_imagination +visual_scene +visual_storytelling +visualisation +visualise +visualization +visualize +visually +visually_ground +visually_impair +visually_rich +visually_situate +vit +vit_tts +vital +vital_role +viz +vlm +vln +vln_agent +vlp +vocabulary +vocabulary_expansion +vocabulary_oov +vocabulary_size +vocal +voice +voice_activity +voice_assistant +voice_command +voice_conversion +voice_synthesis +voix +volume +volunteer +vote +voting +voting_ensemble +vowel +vowel_consonant +voyelle +voyelle_fran +vqa +vqa_vqa +vue +vulnerability +vulnerable +vulnerable_adversarial +vulnerable_backdoor +vulnerable_population +wait +wait_policy +walk +wall +wall_street +wang +want +war +warm +warm_start +warn +warn_contain +warning +warrant +wassa +wassa_empathy +wassa_implicit +wassa_share +wasserstein +wasserstein_autoencoder +wasserstein_distance +waste +wat +wat_indic +wat_participate +wat_workshop +watch +watermark +watermarke +watermarking +wav +wav_vec +way +weak +weak_supervision +weaken +weakly +weakly_correlate +weakly_supervise +weakly_supervised +weakness +wealth +weather +weather_forecast +web +web_app +web_browser +web_crawl +web_interface +web_page +web_portal +web_scrape +web_service +web_site +webnlg +webnlg_challenge +webpage +website +week +weibo +weigh +weight +weight_activation +weight_averaging +weight_consolidation +weight_finite +weight_kappa +weight_matrix +weight_poison +weight_sharing +weighted +weighted_average +weighted_finite +weighted_kappa +weighted_sum +weighted_voting +weighting +weighting_scheme +well +well_performing +well_suit +well_suited +wer +wer_reduction +west +west_african +west_germanic +west_slavic +western +western_centric +western_culture +western_european +whenever +whereas +whereby +wherein +whether +whilst +whisper +white +white_box +whole +whose +whose_goal +wic +wide +wide_adoption +wide_array +wide_coverage +wide_margin +wide_range +wide_spectrum +wide_spread +wide_variety +widely +widely_accept +widely_acknowledge +widely_adopt +widely_applicable +widen +widespread +widespread_adoption +widespread_deployment +widespread_dissemination +widespread_usage +width +width_beam +wiki +wikidata +wikidata_identifier +wikihow +wikihow_website +wikipedia +wikipedia_article +wikipedia_biography +wikipedia_edition +wikipedia_editor +wikipedia_hyperlink +wikipedia_page +wikipedia_revision +wikisql +wikisql_spider +wikitext +wiktionary +wild +win +win_rate +win_team +win_ticket +window +window_size +winner +winograd +winograd_schema +winograd_schemas +wisdom +wisdom_crowd +wise +wish +within +within_realm +without +without_compromise +without_extra +without_forget +without_hurt +without_incur +without_lose +without_retrain +without_sacrifice +witness +witness_rapid +witness_remarkable +witness_significant +witness_surge +wizard +wizard_wikipedia +wizard_woz +wmt +wmt_ape +wmt_biomedical +wmt_share +wmt_slt +wnut +wnut_identification +wnut_share +woman +woman_immigrant +woman_man +word +word_mover +word_vec +wording +wordlist +wordnet +wordnet_germanet +wordnet_princeton +wordnet_synset +work +workbench +worker +workflow +workflow_manager +working +working_group +workload +workload_radiologist +workshop +workshop_acl +workshop_asian +workshop_cole +workshop_eacl +workshop_emnlp +workshop_figurative +workshop_locate +workshop_naacl +workshop_proceeding +workshop_scholarly +workshop_trolling +world +world_scenario +worldwide +worth +worth_note +worth_thousand +worthy +would +would_desirable +would_expect +would_otherwise +woz +wrap +write +write_assistance +write_roman +writer +writer_reader +writing +writing_assistance +writing_style +wsd +wsj +www +www_com +www_github +www_org +www_youtube +xai +xgboost +xlm +xlm_roberta +xlnet +xlnet_roberta +xml +xml_file +xml_format +xml_markup +xml_tei +xnli +xsum +yahoo +yahoo_answer +yelp +yelp_amazon +yelp_restaurant +yelp_review +yet +yet_effective +yield +yield_substantial +ynu +ynu_hpcc +york +young +young_adult +young_child +young_learner +young_people +young_student +youtube +youtube_com +youtube_comment +youtube_video +zero +zero_pronoun +zero_shoot +zero_shot +zhang +zone +实验结果表明 diff --git a/data/Sample_data/CFDTM/beta.npy b/data/Sample_data/CFDTM/beta.npy new file mode 100644 index 0000000000000000000000000000000000000000..56c13bfba1da799ac6566384228badd4eb949ba5 --- /dev/null +++ b/data/Sample_data/CFDTM/beta.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5ff3ca0fa38037bf605a067bfa14a4902b9cf5d337bc5bc153744a30726d06c +size 23328 diff --git a/data/Sample_data/docs.jsonl b/data/Sample_data/docs.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..f00ff30c66308f801915f17a4cca326f6517bdfb --- /dev/null +++ b/data/Sample_data/docs.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9e8f99bcf7cc2418b043973f3565238b46d0443d99a4363dfb1e53ae356d1f3 +size 927 diff --git a/data/Sample_data/processed/length_stats.json b/data/Sample_data/processed/length_stats.json new file mode 100644 index 0000000000000000000000000000000000000000..4a41f87dadf714db6fd8a0c40096be70afbc84e1 --- /dev/null +++ b/data/Sample_data/processed/length_stats.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29835dd9994ef6924536130660c57e901958fe65d10b0f9917ef50d8b7c14492 +size 115 diff --git a/data/Sample_data/processed/time2id.txt b/data/Sample_data/processed/time2id.txt new file mode 100644 index 0000000000000000000000000000000000000000..e4763c3e0a8b522b9b50345cc84ec441973536e7 --- /dev/null +++ b/data/Sample_data/processed/time2id.txt @@ -0,0 +1,7 @@ +{ + "2020": 0, + "2021": 1, + "2022": 2, + "2023": 3, + "2024": 4 +} \ No newline at end of file diff --git a/data/Sample_data/processed/train_bow.npz b/data/Sample_data/processed/train_bow.npz new file mode 100644 index 0000000000000000000000000000000000000000..97d68b46b9ad355443eacae0ea3c84ce9008630b --- /dev/null +++ b/data/Sample_data/processed/train_bow.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8d2a95fc50543a59adc1321d1a4bad4538a213484ce2a5edad1b6147f19a364 +size 1144 diff --git a/data/Sample_data/processed/train_texts.txt b/data/Sample_data/processed/train_texts.txt new file mode 100644 index 0000000000000000000000000000000000000000..b470e4495bf67f3f9c1af07bfe5993c7ef4223d4 --- /dev/null +++ b/data/Sample_data/processed/train_texts.txt @@ -0,0 +1,10 @@ +sustainable energy solution crucial future planet +learn new language open world opportunity +human brain incredibly complex organ +never underestimate power good book +early bird catch worm second mouse get cheese +advancement biotechnology offer promise new treatment +innovation often arise unexpected place +quick brown fox jump lazy dog +artificial intelligence rapidly transform various industry +internet revolutionize way communicate access information diff --git a/data/Sample_data/processed/train_times.txt b/data/Sample_data/processed/train_times.txt new file mode 100644 index 0000000000000000000000000000000000000000..023cc9a67c59fbdab6bbb98a05711cca4c825c75 --- /dev/null +++ b/data/Sample_data/processed/train_times.txt @@ -0,0 +1,10 @@ +3 +1 +2 +2 +1 +4 +0 +3 +4 +3 diff --git a/data/Sample_data/processed/vocab.txt b/data/Sample_data/processed/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..86f4161a67c040a63d459189871d1f6405996519 --- /dev/null +++ b/data/Sample_data/processed/vocab.txt @@ -0,0 +1,58 @@ +access +advancement +arise +artificial +biotechnology +bird +book +brain +brown +catch +cheese +communicate +complex +crucial +dog +early +energy +fox +future +get +good +human +incredibly +industry +information +innovation +intelligence +internet +jump +language +lazy +learn +mouse +never +new +offer +often +open +opportunity +organ +place +planet +power +promise +quick +rapidly +revolutionize +second +solution +sustainable +transform +treatment +underestimate +unexpected +various +way +world +worm diff --git a/data/Sample_data/processed/word_embeddings.npz b/data/Sample_data/processed/word_embeddings.npz new file mode 100644 index 0000000000000000000000000000000000000000..ba7f90ef1cd7b93b437eae39ff2c1e5ca4f2e2cb --- /dev/null +++ b/data/Sample_data/processed/word_embeddings.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3592add93e6c79062fd7aa9a31390823ec48919e7b550767bf4acdd42fc57d7 +size 44489 diff --git a/data/TCPD_IPD_Finance/CFDTM/beta.npy b/data/TCPD_IPD_Finance/CFDTM/beta.npy new file mode 100644 index 0000000000000000000000000000000000000000..613d46273c9051585849aad1425debcefc9d0125 --- /dev/null +++ b/data/TCPD_IPD_Finance/CFDTM/beta.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9f27ad5eaf1192a857c3d7434d85f995e7827a682fb458d0635007b5e86a103 +size 14006288 diff --git a/data/TCPD_IPD_Finance/CFDTM/topic_label_cache.json b/data/TCPD_IPD_Finance/CFDTM/topic_label_cache.json new file mode 100644 index 0000000000000000000000000000000000000000..6932a6f1927581b75e9220719e18e71c4d13a796 --- /dev/null +++ b/data/TCPD_IPD_Finance/CFDTM/topic_label_cache.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b93da989a1f32cdc57090f22d1530f532ea483e38e2141e2ea1b3dc6b528d4c5 +size 1916 diff --git a/data/TCPD_IPD_Finance/DETM/beta.npy b/data/TCPD_IPD_Finance/DETM/beta.npy new file mode 100644 index 0000000000000000000000000000000000000000..03c18079a48122fe219845ed504edb6da25698d7 --- /dev/null +++ b/data/TCPD_IPD_Finance/DETM/beta.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa94c73b9ed76d44ecfd08bbababeb1792c9f7c69d74ac6cf61bf60fe9c354ba +size 14006288 diff --git a/data/TCPD_IPD_Finance/DETM/topic_label_cache.json b/data/TCPD_IPD_Finance/DETM/topic_label_cache.json new file mode 100644 index 0000000000000000000000000000000000000000..a067fa42b81511adb0e84339b9bef19b69c7ff96 --- /dev/null +++ b/data/TCPD_IPD_Finance/DETM/topic_label_cache.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bc6c633fbe6b6734db7b74e4707c915194a76228e80dd42ecadcaf809e59056 +size 1742 diff --git a/data/TCPD_IPD_Finance/DTM/beta.npy b/data/TCPD_IPD_Finance/DTM/beta.npy new file mode 100644 index 0000000000000000000000000000000000000000..738d83ffe3deb06e68bfe835411324ac4b0ce9ca --- /dev/null +++ b/data/TCPD_IPD_Finance/DTM/beta.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:279651f4a4684e44f2e063cdf11166c598f8137cdf640e453f8256ed34092a04 +size 28012448 diff --git a/data/TCPD_IPD_Finance/DTM/topic_label_cache.json b/data/TCPD_IPD_Finance/DTM/topic_label_cache.json new file mode 100644 index 0000000000000000000000000000000000000000..c365cd284e81a60802e754976fe2e693cf3324e3 --- /dev/null +++ b/data/TCPD_IPD_Finance/DTM/topic_label_cache.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f357ce21f4a6a6cc3f1bce349fd1e046b3646e0514fe4640d90a77d8ce4f6f3f +size 1879 diff --git a/data/TCPD_IPD_Finance/docs.jsonl b/data/TCPD_IPD_Finance/docs.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..92e6fff73d92325e7db418bd27b09a00720bacce --- /dev/null +++ b/data/TCPD_IPD_Finance/docs.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:317af12ddb1ff94229b2827f004a226f7b963905acc2090c588cfe29916bd092 +size 34621852 diff --git a/data/TCPD_IPD_Finance/inverted_index.json b/data/TCPD_IPD_Finance/inverted_index.json new file mode 100644 index 0000000000000000000000000000000000000000..15a9101b2351069bd6ab4c4b0a47b41d4d0be4bb --- /dev/null +++ b/data/TCPD_IPD_Finance/inverted_index.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e69e718098ee8ccf62785df964524c6f71701463bd4b2d505afd78bb880ddfb0 +size 12232905 diff --git a/data/TCPD_IPD_Finance/processed/lemma_to_forms.json b/data/TCPD_IPD_Finance/processed/lemma_to_forms.json new file mode 100644 index 0000000000000000000000000000000000000000..c9fdc2638cf0e55a8eee319ea0e5204320ea1830 --- /dev/null +++ b/data/TCPD_IPD_Finance/processed/lemma_to_forms.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97be44fb273f2621ed9b3ae9e4e3a1eec2658366edf8752b8f5b020b16a10707 +size 2144051 diff --git a/data/TCPD_IPD_Finance/processed/length_stats.json b/data/TCPD_IPD_Finance/processed/length_stats.json new file mode 100644 index 0000000000000000000000000000000000000000..d8dcdf66eb537ca2259ae1a6437de565ad31dd47 --- /dev/null +++ b/data/TCPD_IPD_Finance/processed/length_stats.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3596e9167456a2ae615451ebd2dc55fbb1ae6fa8c068d8e6854895bb2711091c +size 134 diff --git a/data/TCPD_IPD_Finance/processed/time2id.txt b/data/TCPD_IPD_Finance/processed/time2id.txt new file mode 100644 index 0000000000000000000000000000000000000000..3a04063949fda6839070715b9cf76d0ccffa7bd6 --- /dev/null +++ b/data/TCPD_IPD_Finance/processed/time2id.txt @@ -0,0 +1,23 @@ +{ + "1999": 0, + "2000": 1, + "2001": 2, + "2002": 3, + "2003": 4, + "2004": 5, + "2005": 6, + "2006": 7, + "2007": 8, + "2008": 9, + "2009": 10, + "2010": 11, + "2011": 12, + "2012": 13, + "2013": 14, + "2014": 15, + "2015": 16, + "2016": 17, + "2017": 18, + "2018": 19, + "2019": 20 +} diff --git a/data/TCPD_IPD_Finance/processed/vocab.txt b/data/TCPD_IPD_Finance/processed/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..1def716492d52e846c94eccab93da788a5fae77c --- /dev/null +++ b/data/TCPD_IPD_Finance/processed/vocab.txt @@ -0,0 +1,8337 @@ +action +already +approve +augmentation +balasaheb_vikhe +bank +basis +capacity +central +complete +consideration +contemplate +control +currency_note +department +deposit +detailed +drinking_water +except +execute +exist +expansion +factory +formulate +full +increase +ink +madhya_pradesh +manufacture +meet +modernisation +note_press +note_printing +one +plan +press +press_nasik +principle +print +printing +project +promise +propose +provide +report +requirement +requisite +respect +scheme +start +state +technology +time +two +type +undertake +urgent +water +water_supply +work +bhopal +chandigarh +currency +equip +extensive +fix +four +germany +gradually +instal +international +issue +line +machine +manual +mutilate +note +office +process +processing +rbi +recently +reserve +scale +set +soil +soiled +sort +specific +supply +system +systems +three +time_frame +usa +use +verification +additional +administer +administrative +approach +arise +commission +company +create +disinvestment +equity +evolve +expect +far +make +modality +model +oversee +power +profit +psu +psus +public_sector +sale +stake +strategic_sale +undertaking +vest +accounting +achieve +actual +actual_actual +advance +allowance +answer +attributable +benefit +budget +budget_estimate +budgetary_support +bulk +collection +committed +composition +consequent_upon +constant_endeavour +continue +crore +current_year +cut +decision +defence +difficult +due +estimate +expenditure +expense +fiscal_deficit +five +grant +growth +high_level +house_statement +identify +impose +instruction +interest +internal +investment +item +large +last +loan +mandatory_cut +may +measure +nature +pay +pay_commission +payment +pension +place +police +question +rapid +rapidly +reason +recommendation +reduce +reduction +relate +restrain +result +revise +revision +salary +saving +secretariat +security +see +short_term +show +sick +similar +small_saving +star +statement +states +subsidy +table +therefor +year +amount +bridge +collect +consolidated_fund +credit +follow +percentage +utilise +utilize +voluntary_retirement +aware +bhutan +circulation +district +house +information +lay +prevent +step +territory +west_bengal +wide +abolish +area +associate +certain +construction +december +direct_investment +distribution +early +economy +electricity +fdi +foreign +generation +highway +holder +hundred_percent +include +information_technology +infrastructure +lok_sabha +maintenance +order +partner +permit +policy +port +reply +road +sector +share +shareholder +toll +transmission +venture +arrest +bangladesh +case +check +come +country +effort +fake_currency +involve +large_scale +last_three +notice +number +pakistan +people +period +seize +smuggled +smuggler +smuggling +value +wise +final_decision +housing +less +percent +present +proposal +rate +taken +alert +board +cbec +client +complaint +computerised +confidentiality +custom +customs_cbec +deal +dhananjaya_kumar +ensure +excise +extent +float +implementation +invite +launch +officer +public +purpose +staff +stage +useful +vigilance +website +acquire +acquisition +act +allow +banking +body +capital +ceiling +corporate +govern +hold +holding +individual +name +nationalise +nris +otherwise +overseas +path +prescribe +provision +resident_indians +resident_outside +section +term +transfer +constitute +cooperative +functioning +implemented +main +observation +reaction +review +task_force +thereto +yet +available +condition +development +financial +industry +institution +liberalise +operate +balance +completion +fund +guidelines +kerala +panchayat +panchayati_raj +raj +recommend +release +request +stipulate +sum +tenth +thereon +union +adr_gdr +announcement +approval +away +business +comply +conform +dispense +fera +foreign_exchange +fresh +general_permission +guideline +issuer +issues +january +liberalisation +mandatory +market +need +norm +notification +obtain +obtain_prior +ongoing +part +permissible +post +prior_approval +procedure +proceed +pursuance +reckon +reporting +shrimati +underlie +whereby +annually +employee +epf +general +lower +notify +provident_fund +accord +account +adequate +adopt +advise +aggregate +agricultural +agriculture +agro +applicable +application +april +assist +availability +banking_regulation +behalf +bihar +borrower +branch +branch_manager +card +care +cash +certificate +committee +composite +compulsory +concession +consist +contribute +corporation +cost +cover +crop +dccbs +delegation +disbursement +discipline +discretion +drawal +effect +eligibility +enable +ensure_availability +especially +establish +every +exemption +extend +extension +facility +factor +family +farmer +financing +flow +function +fund_ridf +give +good_track +grow +high_tech +insist +inter_alia +introduce +introduction +involvement +irrespective +kisan_credit +labour +lending +limit +many +margin_security +matter +method +minimum +nabard +national +open +operation +overdue +performing_asset +plantation +product +progressive +quick +record +recovery +relaxation +representative +rural +sanction +sao +scb +scbs +seasonal_agricultural +several +shortfall +simplification +society +special +specialized +strengthen +target +tech +technical +uniformity +upon +well +instance +nationalize +particularly +rural_areas +view +violate +assistance +commerce +consultant +different +evaluate +get +group +incur +initiate +internet_banking +nri +offer +orient +personal +presently +sbi +segment +select +service +solution +vendor +aid +allocate +allocation +bid +block +border +chief_secretary +concerned +decide +develop +hill +level +planning +planning_commission +preside +primarily +programme +progress +punjab +quarterly +responsibility +screen +supplement +acceleration +atleast +build +cent +comfortable +economic +eight +employment +fact +fast +favourable_impact +goal +inflation +inflow +low +main_reason +masse +multi_sectoral +necessary +overall +position +poverty +pursue +reach +reform +seven +strategy +substantial +unemployment +unless +activity +admit +air +airlines +another +arrive +asian +attempt +bureau +cell +central_excise +close_watch +conceal +concerted +delhi +deploy +detect +deterrent +director_general +discharge +dri +drug +drug_trafficking +effective +eleven +field_formation +fill +flight +guard +heroin +hospital +hotel +icd +igi_airport +indira_gandhi +initial +intelligence +intelligence_unit +international_airport +investigation +kgs +kumar +maintain +narcotic +narcotics_control +ndps_act +official +one_woman +origin +pack +passenger +penal_action +person +preliminary_investigation +preventive +prosecution +rail +recover +reveal +revenue +room +sea_port +seizure +sensitive +six +source +thwart +travel +unit +week +yield +actually +disburse +enclosed +idbi +industrial +till +almost +alternative +assign +basic +consignment +cotton +devolution +excise_duty +exclude +fifteen +freeze +gross +implement +income_tax +instead +lieu +march +medicinal +modification +net +prescribed +railway +stamp_duty +subject +successive +sugar +suggest +surcharge +tax +taxis +tobacco +abroad +agree +band +commercial +damage +life +mark +piece +practice +require +stop +tender +window +write +absorption_capacity +adequate_infrastructural +annexure +bala_saheb +committee_slbc +conducive +credit_flow +depend_upon +deposit_ratio +deprive +disparity +entrepreneurial +find +friday +improve +initiative +law +level_banker +like +marketing +medium +meeting +monitor +possible +properly +ratio +region +respective +rural_semi +satisfactory +schedule +separately +situation +unusual +urban +vary +vikhe +accumulate +analyse +appellate_tribunal +appointment +arrear +arrear_demand +assess +assessment +attach +attachment +authority +bind +charge +closely_monitor +coercive +continuous_process +court +day +defaulter +defaulter_movable +demand +detention +entire +exceed +fall +high +immovable_property +lac +lakh +levy +litigation +major +management +nearly +outstanding +owe +penalty +pende +periodic +receiver +recoverable +regular +regularly +rupee +september +settlement +statutory +stay +thereafter +within +administration +advisory +advisory_committee +affair +agreement +aids_control +allahabad +america +anand +annual +arab +arun +asia +attend +attention +aug +australia +bharti +british +canada +ceo +chairman +chamber +chandra +chief +child +china +city +civil_aviation +commissioner +community +conference +convention +coop +corp +corpn +cum +deepak +deptt +dev +dir +director +discussion +disease +division +draw +drink +education +educational +election +electrical +energy +environment +equipment +europe +exchange +executive +executive_director +executive_officer +exhibition +export +fair +feed +food +forestry +form +former +former_chairman +forum +ganga +govt +gupta +health +health_family +hindi +hiv +home +hong_kong +hongkong +horticulture +improvement +ind +indira +indo +indonesia +industries +inspect +inspector +institute +inter +irrigation +japan +jul_jul +kalyan +kanpur +khan +kisan +lal +list +london +lucknow +mahila +malaysia +manager +mar +medical +member +mill +mission +modern +month +nagar +nainital +narayan +negotiation +nepal +netherland +netherlands +nigam +noida +nominate +oct +oman +pariyojana +park +participate +participation +permission +personnel +political +pollution +prasad +pratap +principal +prof +proj +projects +purchase +ram +ray +reference +regional +relation +remark +representation +research +resource +round +rozgar +rule +russia +sagar +saudi +scholarship +school +science +secretary +seed +seller +seminar +shah +sharma +shiv +singapore +small_scale +smt +social_welfare +south_africa +south_korea +ssi +study +study_tour +suresh +sweden +team +textile +thailand +tokyo +tour +tourism +tourist +towards +trade +training +training_institute +transport +udyog +unstarre +unstarred_question +verma +vilas +village +violation +warehousing +washington +welfare +without +workshop +world +yadav +yojna +york +address +attract +centre +coordination +investor +medium_enterprises +resident +restructure +small +appoint +asset +asset_management +assets +avoid +base +circuit +consider +denominate +direct +domestic +firm +fluctuation +global +insurance +invest +least +lic +life_insurance +long_term +ltd +manage +million +outside +portfolio +pound_sterling +regulation +ring +risk +seek +size +subsidiary +alongwith +american +denomination +discontinue +gap +germany_france +got +import +quantity +supplier +advantage +agency +commitment +common_people +could +dca +debar +default +disclose +exchange_board +file +listing +mobilize +monitoring +namely +nbfc +newspaper +operator +ordination +pass +petition +proceeding +promoter +publish +punish +register +regulatory +sebi +securities +still +stock +trace +vanish +wind +accept +access +addition +advertisement +amendment +application_form +appropriate +ban +change +class +connected +custodian +depositor +direction +disclosure +ease +educate +empower +enactment +entry +errant +exposure +first +fraudulent +grievance_redressal +inspection +legislation +level_coordination +liquid +metro +nbfcs +offence +particular +prudential +publicity_campaign +ratio_crar +real_estate +registration +reject +remove +separate +solicit +speedy +statutory_auditor +taking +tightening +unauthorized +unregistered +unsecured +way +whose +cooperation +illicit_trafficking +mutual +prevention +psychotropic +sign +substance +accordingly +adjustment +back +balance_sheet +book +contract +directive +fully +hon_ble +judgement +liability +maturity +maximum +phase +refund +repay +return +supreme_court +accommodation +ally +applicant +builder +construct +convenor +dena +determine +enclose +enough +flat +free +gujarat +income +independent +indicate +installment +lead +link +margin +money +monthly +mortgage +normally +plr +prime_lending +professional +property +rate_plr +renovation +repair +repayment +residential +restrict +saurashtra +self +slbc +calcutta +category +compile +context +desire +mobilization +october +paragraph +performance +protection +residuary +satisfaction +since +turn_around +accountant +allotment +appear +association +charter +chartered_accountant +pan +permanent_account +reaction_thereto +simplify +urge +bill +commerce_industry +contain +facilitate +federation +ficci +finalize +memorandum +merger +parliament +pre +restructuring +suggestion +suitable +compulsory_retirement +crs +current +finalise +retire +cyclone +emergency +final +first_phase +future +handle +hazard_mitigation +immediate +know +mention +orissa +reconstruction +rehabilitation +relief +response +restoration +second_phase +secure +third +victim +depository +divest +face +harassment +loss +mid +modus_operandi +suffer +unprecedented +viable +adverse_effect +affect +align +annex +around +attractive +avenue +bonus +completely +compound +considerable +easy +end +february +force +half_yearly +help +incentive +kisan_vikas +liquidity +marginal +mean +necessitate +notwithstanding +opportunity +patra +payable +pensioner +plus +post_office +ppf +recent +recur +render +senior_citizen +simple +structure +tap +zero +enter +ail +availment +cold_storage +haryana +ridf +balasaheb +big +circumstance +classify +confederation +constituent +disseminate +doubtful +merit +private +prohibit +suit +age +attain +cadre +clerical +compassionate_appointment +compassionate_ground +death +dependent +deserve +die +eligible +feature +frame +ground +important +job +keep +subordinate +banks +capital_adequacy +exim +icici +idbi_ifci +option +pattern +profit_loss +profitability +sidbi +aspect +automobile +boost +depreciation +easing +exercise +fiscal +recession +related +revival +vehicle +card_holder +credit_card +declaration +draw_cash +farmers +fertilizer +input +latest_available +obligation +pesticide +production +regional_rural +able +enhance +examination +narasimham_committee +shareholding +clandestine_activity +customs +enforcement +frequent +gain +generate +good +incident +make_concerted +monetary +occurrence +past +respectively +smuggle +strict_vigil +stringent +stringent_punishment +sufficient +add +adversely_affect +average +broker +clearing +close +compulsory_rolling +consequent +daily +decline +defer +delivery +drop +emerge +expand +furnish +great +light +lot +move +oppose +prevail +price +prior +roll +rolling_settlement +scrip +sharply +speed +ten +trading +transition +valuation +volume +accretion +accrue +buoyancy +compare +discount +display +earn +end_march +fiis +generally +gold +indication +invisible +late +november +nri_deposits +outcome +peak +portfolio_investment +put +quantum +receipt +reflect +reserves +sdr +sharp +significant +surplus +transaction +turnaround +upward_trend +withdraw +closure +prepare +conduct +evade +fine +indulge +multi +raid +tax_evasion +defaulters +provided +wilful_defaulter +belong +corrective +irregular +length +appraisal +associated +cii +debt +document +effectively +freedom +gross_npa +head_office +incidence +negotiate +npa +perform +preparation +priority_sector +recovery_tribunal +skill +totally +upgradation +weak +wise_break +adhere +challenge +clearance +conclusion +credit_rating +crisil +entrust +extant +limited +rating_agency +safeguard +safety +therefore +agro_industries +audit +carry +cis +collective_investment +dealing +deployment +diversion +entity +finding +forest +fraud +golden +green_gold +irregularity +land +legitimate +maharashtra +mechanism +mobilisation +mobilise +objective +often +portion +present_status +protect +show_cause +spend +sponsor +subsequent +unless_certificate +acknowledge +appropriately +become +closing +complainant +difficulty +dividend +duly +duplicate +forward +grievance +indemnity_bond +inform +investor_grievances +lose +market_regulator +periodical +pertain +previous +promptly +pursuant +redress +redressal +redressal_mechanism +remain +replacement +send +airport +alleged +angle +caption +catch +cbi +central_vigilance +connivance +corruption +evasion +express +hand +ill +mainly +mobile +news_item +nexus +prevalent +suspension +tone +wealth +worth +charging +continuous +continuous_basis +direct_taxes +figure +movable_immovable +penalty_attachment +periodical_review +taxes +allegation +ask +bifr +enquire +enquiry +manipulation +modi +punjab_national +besides +code +consistent +crisis +critical +currently +discuss +dissemination +encourage +essential +governor +importance +key +monetary_policy +range +recognize +regime +role +salient +shock +sound +sovereign +stable +standard +summit +transparency +vulnerability +widespread +agricultural_produce +avail +banker +become_operational +call +collaboration +developmental +district_level +ensure_proper +field +governments +grace_period +grower +horticulture_produce +lie +nhb +ninth_five +north_eastern +onward_transmission +operative +partnership +refinance +site +storage +union_territory +unutilized +utilization +uttar_pradesh +visit +wherever_necessary +aca +assistance_aca +beginning +claim +cost_escalation +donor +externally +lodge +normal +pending +reimbursement_basis +utilisation +dearness_allowance +instalment +july +price_index +sharp_increase +twice +amend +bad +commencement +examine +negligence +progressively +recapitalisation +recapitalise +rrb +rrbs +stand +stand_alone +steady_progress +support +arrangement +consultation +deputy_governor +drive +effectiveness +efficient +enterprise +head +instrument +joint_secretary +long_run +package +problem +rationalisation +sectors +spread +supervisory +tier +designate +metropolitan +operational +classification +commit +companie +concern +drawback +false +hawala_transaction +imposition +launching_prosecution +misuse +realisation +regulation_act +wilful_attempt +wrong +accordance +ahmedabad +among +bangalore +borrowing +chairmanship +chennai_guwahati +chief_executive +competent_authority +comprehensive +compromise +compromise_settlement +counter +distribute +drt +either +ernakulam +expedite_recovery +fis +general_manager +hyderabad +informed +jaipur +judge +liquidate +monthly_basis +mount +mumbai +mumbai_chennai +nine +npas +ordinance +patna +precaution +promulgate +pronged_strategy +quarterly_basis +represent +rise +secured_creditor +settlement_advisory +setup +speedy_recovery +therein +top +tribunal +upgrade +vet +bilateral +countries +joint_venture +operationalisation +positive_impact +russian +understanding +award_compensation +commensurate +heavy +inadequate +initiation +injury +lok +motor +motor_accident +motor_third +motor_vehicle +party +premium +profitable +road_accident +survey +tariff_advisory +third_party +trend +underwriting +caste +promotion +remedial +scs +tribe +vacant +anomaly +appraise +august +chairperson +combine +creditor +cuttack +declare +dispose +drat +drt_act +expeditious_adjudication +failure +high_court +legal +originally +psbs +since_inception +special_leave +twelve +augment +compulsory_registration +contravention +elect +filter +guarantee +guarantee_fund +immediately +initial_public +intimate +investor_protection +jurisdiction +least_one +net_worth +precede +primary +regulate +regulatory_purview +responsible +right +secondary_market +stock_broker +stock_exchange +track_record +allot +cellular_telephone +connection +debenture +deficiency +delay +expeditious +incomplete +installation +letter +mainly_due +moreover +quote +restaurant +tax_payer +telephone +wide_publicity +bond +impact +index +mutual_fund +observe +subscribe +subscriber +corporation_ifc +focus +ifc +june +knowledge +majority +manufacturing +medium_enterprise +next +traditional +amongst +colour +design +distinct +distinction +finalisation +ahead +broad +civil_society +competitive_environment +consensus +create_strong +democratic +economic_forum +efficiency +foster +local +main_objective +nation +outline +president +pro +push +rest +second_generation +strong +andhra +arunachal +assam_bihar +autonomous +budgetary +collectively +constitution +derive +eleventh +goa_gujarat +haryana_himachal +highlight +karnataka_kerala +macroeconomic_stability +madhya +maharashtra_manipur +meghalaya_mizoram +nagaland_orissa +nct +page +preference +provident +punjab_rajasthan +restore +sikkim_tamilnadu +tripura_uttar +clarify +debt_burden +deem +deteriorate +evidence +keeping +parameter +revenue_deficit +sustainability +waiver +white_paper +accelerate +diversify +imf +macroeconomic +making +priority +privatisation +sentiment +stance +stress +branch_network +licence +look +opening +restriction +specially +viability +act_sica +association_iba +sick_industrial +sickness +tackle +adjudication +air_cargo +auto +cheap +deny +depb +duty +duty_drawback +entitlement +exporter +hindustan +illegal +illegally +passbook +passbook_scheme +quality +redeem +redemption_fine +surrender +wherein +wing +lend +loans +rajasthan +though +accumulation +backlog +backward_class +carry_forward +emphasize +ensure_timely +limitation +obc +obcs +proforma +recruitment +recruitment_drive +reservation +vacancy +deterioration +adequately +article +capture +comparison +delete +difference +fuel +incorporate +increased +newly +output +rationalise +relatively +series +shortly +slow +structural +undergo +weight +wholesale_price +wpi +wpi_series +aim +correction +council +deceleration +deficit +establishment +fiscal_reform +huge +medium_term +monitorable_fiscal +pricing +reform_programme +status +steep_rise +sub +borrowing_ecb +cap +ecb +external_commercial +lawyer +pocket +potential +unable +achievement +comprise +cumulative +depth +evaluation +lapse +mid_term +resolve +sanctioned +structured +wherever +actively +annum +backward +communicate +drought_prone +exclusively +execution +nodal +periodically +proper +prospect +quarter +reimbursement +repayment_schedule +smooth +soil_conservation +solely +submission +unpaid +viz +auditing +commence +adjudicate +allege +anti_evasion +appeal +approximately +clandestine +evader +excisable +fabric +initiated +legislative +mis +penal +quantify +realise +removal +retail_sale +shortcoming +surprise +tariff_structure +transit +cbdt +channel +circular +companies +computation +remittance +taxe +attribute +bearing +behaviour +central_statistical +confidence +gross_domestic +imperative +organisation +promote +proportion +save +savings +depend +despite +factor_cost +gdp +product_gdp +real +real_gdp +electronic +freely +hardship +instruct +legal_tender +menace +mitigate +refusal +refuse +replace +seriously +shop +withdrawal +commodity +floor +hence +may_vary +remedy +shift +taxation +trader +uniform +uniform_floor +bombay_stock +chapter +consolidated +delist +fail +rbi_org +rejection +award +central_bureau +charge_sheet +clean +course +criminal_complaint +dainik +draft +expose +gurgaon_branch +investigation_cbi +involved +much +oriental +overdraft +thus +active +asian_development +first_half +man +adb +coal_mining +comprising +experience +human +integrate +karnataka +multilateral +mysore +nuclear +program +pvt_ltd +test +yearly +population +serve +tribal +analysis +appropriate_action +bio +cluster +concentrate +entrepreneur +event +farm +farming +fixation +flexible +highly +natural +plant +realistic +reschedule +satisfy +shr +cheque +creation +disposal +feasible +negotiable_instrument +development_authority +insurance_regulatory +irda +word +amendment_bill +amendment_ordinance +apply +asset_classification +autonomy +character +contract_regulation +definition +deregulation +derivative_instrument +flexibility +improve_efficiency +income_recognition +modify +provider +provisioning +provisioning_requirement +risk_weight +second +statutory_liquidity +version +whole +ambit +computerisation +enlarge +gdp_ratio +indirect +largely +relatively_low +reliance +strengthening +tds +widening +american_dollar +apart +august_september +dollar +even +first_nine +sell +artisan +cottage +gram +retailer +self_employment +sgsy +vide_circular +yojana_sgsy +youth +accessibility +baroda +bikaner +bse +chennai +cochin +demat +demat_form +exchange_bse +half +mangalore +market_capitalisation +nse +nse_bse +physical +physical_form +syndicate +vadodara +criminal_procedure +expert +genuineness +legally +mint +security_press +suitably +valid +concrete +funding +introduced +compromise_write +bar +hearing +rajya_sabha +vide +working_group +beneficiary +cyclone_affected +therefrom +uco +employment_generation +network +poverty_alleviation +authorise +certify +entitle +excess +residence +advance_licence +attendant +compete +custom_duty +easy_access +entail +export_obligation +foregone +fulfillment +jan +revenue_foregone +scrap +sea +shipment +steel +absolute +beyond +comment +compliance +constant_watch +deficient +discretionary +downturn +due_diligence +external +functionary +inefficient +occasion +rather +raw_material +run +slippage +spite +staff_accountability +steadily +strict +supervision +turn +arunachal_pradesh +manipur_meghalaya +mizoram_nagaland +tripura +nagaland +yard +bonds +contribution +dedicated +define +earmark +forty +grade_rating +idfc +negotiable +obligatory +sec +similarly +social +specify +trust +trustee +twenty +twenty_five +adherence +cultivation +devise +easily +engage +hassle_free +high_powered +insistence +sanction_disbursement +timely +variation +abide +additionally +airline +audio +bombay +bpl +broadcasting +communication +cor +corpn_ltd +deduct +electric +electronics +employees +film +fire +gulf +innovative +korea +liaison_office +machinery +marine +multinational +radio +south_asia +technologies_ltd +television +vikas +wherever_applicable +cheating +constantly +efficiently +endeavour +format +guidance +investor_grievance +ipc +listed +penal_code +revamp +aurangabad +composite_index +criterion +deduction +east +enjoy +identification +indicator +infrastructural +locate +relaxed +special_category +tax_holiday +taxable +thereunder +uttar +west +anant +art +auditor +authorize +exception +formal +four_pronged +healthy +incorporation +moot +news +nodal_officer +offsite +prudential_norms +reasonable +site_inspection +strength +supervise +acceptable +component +extended +fifth +fiscal_stress +programmes +temporary_mismatch +allegedly +defective +dupe +lakhs +proprietary +senior +temporary +vijaya +announce +entirely +exorbitant +gold_silver +hike +influence +liberalization +mostly +silver +abnormal +bse_nse +common +concert +conclude +detrimental +front +integrity +president_vice +price_manipulation +prima_facie +sudden +surveillance +systemic +underway +vice_president +wake +stabilization +stabilization_fund +assam +funds +kashmir_karnataka +kerala_madhya +monthly_quarterly +nadu_tripura +sikkim_tamil +western +bidder +clear +coin +explore +indigenous +possibility +rupee_coin +standing_committee +continuance +effecting +explicit +frame_work +implicit_examine +maximise +rationale +transparent +whenever +bala +guwahati +tribunal_drt +vikhe_path +workload +directly +jubilee_rural +bharat_overseas +canara +ganesh +long +accuracy +clean_note +currency_chest +destroy +destruction +insert +remit +nagpur +nasik +pune +sangli +thane +budgetary_provision +focusse +ninth_plan +reform_initiative +acceptance +custody +official_gazette +airport_nil +chennai_airport +chinese +cochin_airport +courier +feb +german +goa +japanese +mauritius +nil +pakistani +swiss +uae +favour +inquiry +mnc +officials +panel +periodic_review +speedy_disposal +tune +convergence +deputy_chairman +framework +globalisation +good_governance +implementable_action +indians +ministerial_meeting +prime +revive +wto +level_bankers +update +paper +sica +agriculturist +target_achievement +price_rise +budget_speech +fiscal_responsibility +capital_formation +downward_trend +gas +manufacturing_electricity +mining +organisation_cso +bourse +maintained +usual +analyze +broaden +correct +enhancement +fiscal_correction +imbalance +impress +interaction +macro +point +regular_basis +black +dea +elimination +municipal +pipeline +pose +route +sanitation +want +diesel_kerosene +lpg +oil +petrol +prices +commission_tfc +fulfil +tfc +across +broadly +chowdhury +considerably +convert +coverage +developed +economic_situation +emphasis +impart +institutional +moderate +move_towards +recognise +rely +retain +side +single +task +variable +vat +grade +local_body +original +sewerage +aircraft +ascertain +exact +black_money +enact +content +exempt +mixed_cotton +products +rebate +valorem +andhra_pradesh +avoidance +bench +chief_commissioner +commissioner_appeals +delhi_goa +dispute +gujarat_haryana +hear +himachal_pradesh +jammu_kashmir +nadu_uttar +opt +ordinate +phalguna_saka +pradesh +quick_disposal +rajasthan_tamil +revenue_collection +short +sit +tamilnadu_tripura +vacancy_position +vacate +borrow +consumption +double +educational_loan +employ +estate +following +ngo +organization +qualified +retail +scs_sts +self_help +shgs +software +sub_target +venture_capital +cause +customer_service +envisage +poor +recommendations +regulatory_framework +agent +consistently +deceased +door_step +organise +postal +postal_department +throughout +vikas_patra +house_rent +jammu +parity +particular_reference +pondicherry +stands +substantially +adequacy +departmental +determination +economist +extraordinary_part +follows +manpower +overlap +redeployment +relevant +resolution +road_map +staffing +user +capita +allied +impetus +town +tamil_nadu +valley +chronic +together +adoption +chalk +diversification +micro +partially +recent_past +revitalise +slr +understanding_mou +whereas +centrally +apprehend +constant_surveillance +meghalaya +myanmar +possession +reportedly +subsequently +vulnerable +partly +strike +every_effort +lack +measures +buy_sell +continuously +cross +decrease +depreciate +emergence +increase_decrease +maintain_adequate +movement +orderly +outgo +rates +reduce_excessive +rupee_vis +speculative +vis +volatility +crime +criminal_offence +laundering +liable +money_laundering +punishment +treat +automatic +entertain +favourable +sectoral_cap +streamline +burden +fiscal_consolidation +insufficient +pressure +wage +expedite +proposals +affected +centralised +circle +cleared +minist +super_cyclone +session +amt +extent_possible +provisional +sick_ssi +suicide +waive +years +acute_shortage +chit +govt_mint +indigenous_production +modernise +nil_nil +paise +shortage +cases +guilty +owner +regulator +adequate_provisioning +bear +commitment_charge +constraint +external_aid +monitoring_cell +procurement +compound_interest +marketing_season +compensation +dwell +earthquake +flood_cyclone +insurance_cover +insure +intend +loss_damage +natural_calamity +poor_people +poverty_line +joint +stability +summary +caution +exercise_caution +participant +press_release +transact +austerity +consumption_expenditure +diesel +entertainment +servant +adverse +import_duty +finally +reconstruction_bifr +chennai_port +coastal +environmental +hdfc +hudco +hudco_housing +ifci +lpg_pipeline +mumbai_port +power_transmission +renewable_energy +resource_management +urban_environmental +competitive +concentration +economic_activity +financially_viable +geographical +local_area +low_cost +opene +rank +rating +accountable +cag +cash_crunch +crunch +divert +frequently +legislature +plan_outlay +proper_utilization +proportionate_cut +serious +shape +tide +enforcement_directorate +hundred +zonal_office +mental +scope +steady +widen +alliance +curb +formation +supervisory_role +unscrupulous +consultancy_firm +implementable +minimise +outlet +revenue_gain +appellate +assessee +bharat +cycle +liquidation +lock +rent +justification +nominee +lender +treasury +north_east +pool +sikkim +inclusion +combat +corrupt +none +opinion +appellate_authority +headquarter +headquarters +liberalize +player +siphon +soft +accede +cater +divisional +exclusive +inconvenience +apex +existence +manipur +commercial_judgement +aggressive +fee +hurdle +merge +overcome +district_primary +efc +primary_education +span +claim_settlement +inordinate_delay +lok_adalat +permanent +required +settle +cheat +expectation +keep_close +massive +play +positive +proactive +quite +watch +budgeting +emphasise +fourth +scrutiny +significant_proportion +weakness +belgium +billion +confiscation +directorate +dubai +pound +suspect +switzerland +true +bogus +manipulate +certification +comparative +comptroller_auditor +para +contraband +counterfeit_currency +dec +drug_law +hashish +indo_nepal +intervene +invariably +mizoram +national_highway +ncb +near +pak +prevent_smuggling +qty +retired +zonal_unit +andaman_nicobar +building +compulsorily +consumer +contact +dadra_nagar +desirous +diu +eastern +fmc +harass +haveli_daman +honour +island +kashmir +lakshadweep +northern +pradesh_jammu +purview +southern +tripura_sikkim +vide_press +backwardness +bottleneck +decade +equal +equitable +fiscal_discipline +formula +foundation +human_resource +nurture +optimum +pace +pharmaceutical +prudent_external +strive +sustained +telecom +iba +arrange +customer +imply +prohibition +secrecy +statute +website_http +wilful +wilful_defaulters +beedi +fictitious +manufacturer +never +worker +abhiyan +child_development +elementary +elementary_education +empowerment +erstwhile +family_welfare +health_care +janashree_bima +literacy +pmry +poverty_eradication +pradhan_mantri +rozgar_yojana +sarva_shiksha +shahari_rojgar +sjsry +social_justice +social_security +spending +swarna_jayanti +woman +yojana +flood +proper_utilisation +services +slum +advice +although +exploitation +informal +intermediary +risk_management +concurrent_audit +delinquent +frauds +internal_control +abuse +admissible +automatically +brand +brief +calculate +cause_notice +computerise +confiscate +denial +describe +export_incentive +high_incidence +invoice +invoicing +presentation +prone +redemption +reimburse +repatriate +satisfied +shipping +station +united +accountability +cmd +commission_cvc +consult +director_cmd +expire +later +pnb +preliminary_enquiry +realize +termination +organize +competition +dumping +mismanagement +one_hundred +paper_mill +realization +offset +accounts +college +convertible +electronic_hardware +entrepreneurship +eous +fii_investment +foreign_institutional +hire_purchase +leasing +ordinary +repatriation +software_technology +technology_park +unlisted +zone +apr_may +aug_sep +carefully +close_monitoring +comparative_figure +consumer_price +cpi +dec_jan +feb_mar +fruit +index_wpi +inflationary +jun_jul +labourer +modest +oct_nov +pulse +rice_wheat +scenario +sugar_edible +vegetable +wpi_cpi +concealment +capital_gain +compute +golden_jubilee +incremental +objection +taxpayer +voluntary +units +wherever_feasible +assam_arunachal +finalization +sikkim_tripura +tripura_mizoram +optional +terrorism +terrorist +assurance +cofeposa +compilation +criminal_prosecution +duty_entitlement +exchequer +illegal_transaction +object +pass_book +preventive_detention +age_group +assam_manipur +cease +chandigarh_delhi +enrol +enrolment +equally +household +join +meghalaya_nagaland +orissa_pondicherry +orissa_punjab +popularise +renewal +age_limit +applications +educational_qualification +live +micro_enterprise +nehru +pendency +shahari_rozgar +speedy_clearance +within_fortnight +yojana_sjsry +conditionality +mou +sacrifice +understand +assistant +forth +preliminary +account_convertibility +approximate +element +indirectly +chest +daily_basis +distance +linkage +might +amritsar +home_affairs +leak +maintain_secrecy +mixed +premise +search +secret +south +spot +american_express +bajaj +cascade +chemicals +civil +craft +cricket +dues +east_west +electronics_ltd +exports +federal +fin +fine_penalty +gandhi +hassan +inv +itc +jyoti +kishore +natural_gas +nigam_ltd +personality +petrochemical +power_generation +premier +rajan +rao +rectification +reddy +roy +sahara +scam +tata +vysya +perform_ing +formulation +revitalization +separate_cell +first_tranche +suspend +deepak_parekh +disinvest +regulations +risk_factor +uti +cenvat +fibre +fully_exempt +hard +jute +mix +produce +fera_violation +investigate +liquor +vijay +delayed +cess +petroleum +consequently +facilitator +fast_track +industrial_sickness +wide_range +confirm +jointly +mine +credibility +insurer +await +deadline +elsewhere +train +actual_strength +indicative +staff_strength +accessible +cheque_book +local_police +madra +safe_custody +steal +unauthorised +cast +disabled +finalised +fora +gram_swarozgar +liberation +needy +unemployed +urban_cooperative +authorization +center +conversion +marginally +ongoing_basis +strictly +urban_areas +wholly +account_holder +annual_turnover +english +bse_sensex +curb_excessive +interact +multiple +nse_nifty +price_band +price_fluctuation +risk_containment +sensex +cultivator +exceptional +excessive +illicit +license +opium +opium_cultivation +advancement +commercially_viable +enhanced +feasibility +mandate +technological +affiliate +approx +arm +climate +convey +expeditiously +improved +lift +perceive +productive +recent_visit +sept +sundaram +withhold +aids +belgium_canada +circulate +debt_servicing +denmark +example +external_debt +france_germany +grand +historical +italy_japan +kuwait +multilateral_bilateral +publication +russian_federation +spain +sweden_switzerland +united_kingdom +encashment +fake +readily_available +computer +bengal +ing +north +unemployed_youth +electrification +supportive +usually +water_sanitation +filing +fls +afforestation_japan +afforestation_project +agri +avert +bangalore_water +bhopal_conservation +bombay_sewage +break +bundelkhand +cable +calcutta_transport +canal +canal_irrigation +capacity_building +consolidation +delhi_mass +demand_side +dist_primary +district_poverty +distt +drainage +eap +energy_efficiency +eradication +euro +favourably +forestry_ida +hospital_opec +hydro_electric +hydro_power +ibrd +ida +ifad +industrial_pollution +institutes +integrated +japan_yen +karnataka_afforestation +karnataka_watershed +kerala_forestry +land_reclamation +leave +lift_irrigation +livelihood +livestock +manipur_sericulture +medical_equipment +minor_irrigation +mysore_paper +nlc +norway +opec +pilot_project +pollution_control +poverty_reduction +pump +punjab_afforestation +rajasthan_forestry +rapid_transport +reclamation +revolve +river +sericulture_japan +shimla_sewerage +ship +signing +tamilnadu +tank +terminal +thermal_power +transmission_system +transportation +watershed +watershed_dev +watershed_development +yen +clause +mode +overall_ceiling +resort +upper_limit +xvii +attached +nominee_director +aadhar +accident +adult +annuity +assure +bima +career +collateral_security +corpus +deep +endowment +endowment_assurance +female +girl +handicapped +health_insurance +jeevan +jeevan_suraksha +mitra +nos +occur +physically_handicapped +premature_death +spouse +sukanya +thousand_sum +vidya +bsf +coast +fence +indo_pak +maintain_utmost +narcotics +pakistani_anti +prosecute +quarterly_coordination +vigil +zonal +high_profile +vice +concessional_rate +fully_convertible +induce +intention +krishna +preliminary_stage +racket +undue +unearth +calendar_year +copy +counterfeiting +dimension +economic_affairs +english_hindi +entire_gamut +genuine +kind +notes +predominantly +regional_language +ultra +vis_vis +widely +fishing +seasonal +whichever +asset_liability +earning +equivalent +projection +raising +resources +tenure +umbrella +advances +post_sanction +pro_active +retired_judge +sub_standard +revenue_intelligence +vigilant +legal_heir +successor +unclaimed +adverse_impact +destination +directly_attributable +qatar +avoidance_convention +double_taxation +interpretation +treaty +uncertainty +assured +absence +amalgamation +exit +listing_agreement +prudent +requirements +stipulation +middle +middle_class +animal_husbandry +behind +dairy +sericulture +gram_swarojgar +project_phase +rojgar_yojana +schemes +mass +dainik_jagran +issuance +encouragement +linkage_programme +refinance_support +strategic +attack +calamity +calamity_relief +coconut +qualify +implication +location +bipartite +seventh +sixth +wage_revision +assurance_company +cmc +first_instalment +gic +improper +licence_fee +success +facilitate_easy +gupta_committee +procedural +posts +thousand +minority +minority_community +tie +conformity +issuing +received +recording +validity_period +acre +illicit_opium +ncb_holding +poppy_cultivation +asset_reconstruction +legislative_framework +response_thereto +setting +discrepancy +rectify +birla +corporate_governance +networth +nifty +remuneration +contributory_provident +desirable +feel +iibi +unit_trust +workman +working +governance +guide +private_partnership +systematic +thrust +moratorium +cottage_industries +credit_guarantee +dilute +promotional +nagaland_sikkim +selection +adversely +basic_custom +ssis +anti_smuggling +drug_abuse +essentially +even_though +extra +informer +motivate +reward +vogue +book_depb +detain +directorate_general +entitlement_pass +importer +mis_declaration +soon +verify +warehouse +securitisation +stamp +candidate +interview +successfully +today +come_across +count +inclusive +atms +automatic_teller +machine_atm +near_future +operational_efficiency +rrb_staff +advanced_stage +purely +concessional_refinance +either_directly +branches +businessman +clearly +financial_institutions +popular +prior_permission +initially +bad_loans +cvc +malpractice +thorough +cite +inspite +mop +spirit +van +anti +debt_relief +financially_sound +border_fencing +cross_border +fencing +flood_lighting +seal +deregulate +erosion +rescheduling +affordable +common_man +disadvantage +edible_oil +intervention +medicine +milk +weekly_basis +pradhan +aids_prevention +categorization +innovation +census +emanate +ipo +ipos +media +offer_document +offering +relax +telecommunication +buyer +constant_endeavor +fold +friendly +investor_friendly +perception +relationship +resilience +willing +differential_rate +indusind +previous_year +combined +inflationary_pressure +nominal +dominate +industry_cii +cellular_phone +expensive +tariff +claims +collusion +requisite_information +availing +commissionerate +competent +gift +smuggled_gold +unaccounted_income +undisclosed_income +shukla +aadhaar +delegate +guarantor +indore +par +perpetration +punjab_sind +supervisory_responsibility +title +induct +cellular +insolvency +threat +issued +agro_processing +channelise +fishery +foodgrain +sectoral +complexity +depot +inland +inland_container +licensing +practical +qualification +annexed +lende +nagaland_tripura +architecture +choice +data +effective_supervision +revenue_loss +domestically +hedge +infusion +agriculture_forestry +first_quarter +gdp_growth +lag +originate +second_quarter +slowdown +third_quarter +chargeable +weapon +ever_vigilant +leviable +probe +investor_confidence +must +special_dispensation +affidavit +block_level +formality +income_generating +profile +sometimes +baggage +inside +convene +eco +waste +discourage +grain +paddy_rice +price_msp +rice +semi +stimulate +thereby +wheat +ability +aluminium +material +plastic +commercial_borrowing +currency_convertible +economic_survey +error +france +ida_ibrd +japan_germany +omission +outflow +begin +beneficial +crr +fortnight +hit +percentage_point +ratio_crr +roll_back +contrary +disciplinary_action +rich +moto +nbc +reporting_friday +steep +customs_duty +cpc +nodal_agency +vol +car +foreign_travel +chair +credible +monitorable +community_social +chairman_cum +faridabad +ghaziabad +quota +wait +chief_vigilance +disciplinary +interval +onwards +organisational +regular_departmental +vigilance_officer +give_boost +parallel +sign_memorandum +specifically +appreciation +curtail +metal +differential +discrimination +treatment +crf +css +every_month +pac +utilisation_certificate +financially +organizational +potentially +supersede +deliberate +dispensation +operational_freedom +second_schedule +absorb +academic +admission +set_aside +student +vocational +constant +efficient_functioning +generating +rationalization +recourse +signal +sustain +women +grievances +inquire +junior +andaman +authorisation +banke +chandigarh_dadra +daman_diu +lakshadweep_pondicherry +nagaland_nct +rajasthan_sikkim +alloy +chemical +commodity_basket +mineral +mineral_oil +petroleum_crude +raw +relative +tea +tin +wholesale +yarn +accuse +arjun +ashok +counterfeit +criminal +detection +distt_gurgaon +fir +justice +ment +rajesh +ward +controller +reconstitute +statistical +vision +ongoing_process +valuable +contingency_fund +tonne +encashe +loot +minimal +nab +payee +railway_station +remote +slow_pace +tion +ccs +completed +disproportionate +functional +keep_vigil +implicit +saka +socio_economic +tribal_sub +confer +investor_awareness +investor_education +maintain_separate +sub_section +corresponding_period +film_industry +offices +ensure_orderly +offer_ipo +selling +exemption_limit +dealer +definite +match_fixing +always +auction +virtue +paddy +talk +wheat_onion +citizen_charter +face_competition +lic_gic +servicing +divide +repayable +pleased +sun +national_sample +nsso +statistic +sub_judice +survey_organisation +investors +lodge_complaint +ownership +registrar +retirement +retirement_age +comfortable_external +connectivity +economic_outlook +handling +labour_force +safe +universal +weather +performing +err +developmental_expenditure +slash +bed +mega_power +forex +inc +investments +lab +ltd_hindustan +master +offshore +satellite +swift +visa +welcome +customary_amongst +fidelity +gateway +internet +secrecy_act +usage +banknote +language +solve +fii +capitalisation +stream +bail +exporters +fraudulently +accountant_general +expert_committee +intimated +natural_disaster +prescribed_format +tamil +applicability +closely +determined +effectively_tackle +potentially_sick +restrictive_definition +successful +timely_detection +dependence +sharp_decline +anomaly_committee +fifth_pay +leader +staff_side +budgetary_allocation +crores +judicial +smooth_functioning +cyber +eligibility_criterion +gradation +mail +pli +sfc +perspective +interactive +national_institute +profession +publicity +simplified +standing +outlay +chit_fund +enact_legislation +significantly +depositors +disappear +alternate +complex +cultural +lease +renew +tenant +background +charitable +prefer +via +collective +adjusted +reconciliation +give_preference +rescue +commission_efc +debt_waiver +explanatory_memorandum +fight +awareness +dialogue +industrialist +interalia +habitation +net_profit +operating_profit +vast +apr +dollar_pound +japanese_yen +jul +major_convertible +markets +purchasing_power +reciprocal +sterling +book_value +internal_debt +benami +benami_property +benami_transaction +prohibition_act +undisclosed +unearth_black +iron_steel +adjust +five_star +himalayan +plantation_companies +plug +compassionate +minor +sole +sri +gazette_notification +lump_sum +productivity +revised +conscious +international_monetary +macro_economic +voice +centra +ground_level +primary_agricultural +policy_holder +surrender_value +aggrieved +payee_cheque +connect +expert_group +interim_report +voluntary_compliance +writ_petition +aegis +brick +disability_due +driver +handicraft +handloom +hilly +khadi +leather +nagar_haveli +natural_death +occupational +partial_permanent +permanent_disability +producer +salt +weaver +selective +agenda +relevance +advertising +agencies +broadcast +speech +meritorious_student +avoidance_agreement +dtaa +dtac +container +keep_constant +adviser +downsize +entertainment_hospitality +enumerate +pan_allotment +pan_card +apprehension +cancel +concept_document +correspondence +dfid +feasibility_study +goi +interested +italy +nevertheless +prepared +respond +resume +sanitation_project +seem +solid_waste +sustainable +discriminatory +anticipate +become_doubtful +consultative +intensive +semi_urban +become_overdue +abolition +cenvat_credit +concept +magnetic +one_page +reversal +voluntarily +mismatch +cultivate +hectare +poppy +comprehensively +couple +goods +value_chain +ready +writing +categorise +early_disposal +loophole +succeed +wrong_availment +adequate_publicity +convenience +himachal_gramin +budgetary_exercise +cold_chain +scientific_research +fertiliser +contingent +major_portion +economically_disadvantaged +erode +privatise +self_reliance +debit +fairly +long_duration +incumbent +superannuation +conservation +culture +disadvantaged +exploration +health_systems +post_graduate +sanjay +oral +exclusion +explain +assume +consume +corresponding +evolve_consensus +retention +activate +announced +contractual +meaningful +stakeholder +first_stage +food_subsidy +anti_corruption +punishable +trial +brazil +debtor +indebted +indebtedness +indonesia_korea +magnitude +manageable +mexico +ninth +top_ten +touch +turkey +database +mistake +photograph +signature +updation +clarity +discussion_paper +switch +rules +collateral_free +technology_upgradation +tiny +donation +gazette +writ +months +cement +clarification +forge +insulate +nashik +organic +seamless_steel +white +degree +price_stability +scrutinise +spell +within_reasonable +arbitration +standard_chartered +zealand +economic_zone +sez +sezs +situate +wilful_default +capital_markets +catholic_syrian +city_union +corp_ltd +dev_corpn +dist +global_trust +immovable +mercantile +rajkot +boards +fifty +impress_upon +negative +resource_crunch +kelkar_committee +coimbatore +membership +utility +artificial +fake_stamp +stamp_paper +highway_project +surat +specialised +shareholding_pattern +vrs +ahmedabad_bangalore +fema +law_justice +repeal +repeal_act +carpet +structural_reform +automatic_route +constitute_special +incidents +jawaharlal_nehru +judicial_custody +concession_agreement +documentation +granting +inability +service_provider +already_exist +due_course +helpline +routine +special_cell +inflow_outflow +usd_million +fine_tune +log +receivable +royalty +software_dev +synergy +feed_back +vital +dist_poverty +ibrd_ida +immunization +merchant_banker +simultaneously +warn +viable_sick +encompass +bikaner_jaipur +drought +drought_affected +food_grain +reschedulement +outstande +consensus_among +methodology +losse +losses +gradual +thing +apr_apr +crude_oil +jun_jun +lpg_kerosene +may_jun +sharp_rise +weekly +hawala +kharif +kharif_season +rabi +willful_defaulter +mini +reside +reopen +deposits +hfc +hfcs +convertibility +host +normal_course +step_towards +dccb +idea +temporary_measure +avoid_recurrence +provisional_unaudited +chain +constant_vigil +door +dry +gear +hindu +intelligence_machinery +interdict_narcotic +pakistani_narcotic +periodic_cross +purity +online +policyholder +advisory_council +crucial +jagdish +member_secretary +narasimham +patel +magistrate +mobile_phone +rationalize +incorrect_valuation +neither +itat +focused +resource_mobilization +three_years +food_processing +institutions +board_fipb +dhananjaya +fdi_inflow +mukherjee_formula +bifurcation +cabinet_committee +libor +liquidity_adjustment +repo +repo_rate +upward_revision +redefine +administrator +patiala +prominent +heroin_kgs +defraud +description +flood_protection +godown +popularize +consultancy +gem +majority_stake +mauritius_singapore +sri_lanka +turnover +convenient +activities +afghanistan +intensify +interdict +modernization +multi_pronged +legal_framework +convict +culprit +audited +divisible_pool +electricity_gas +nss +round_round +secondary +transport_storage +user_friendly +united_nations +harvest +season +initial_moratorium +camp +haveli +prospective +sanctioning +eliminate +face_value +direct_recruitment +obc_category +recruit +bhartiya +hoshangabad +note_mudran +assignment +bahrain +iran +republic +vietnam +absorption +deputation_basis +officers +parent +personnel_training +maintain_orderly +citizen +ganja +gather +pre_sanction +scbs_dccbs +western_region +search_seizure +collateral +pledge +asset_size +krishi +lakshmi +chance +secured +jayanti_shahari +scavenger +sgsy_swarna +swarnjayanti_gram +swarozgar_yojana +accompany +file_return +objection_certificate +core +undp +variety +behind_schedule +streamlining +infra +book_building +qualified_institutional +theft +arc +arcs +video +convention_dtac +mauritius_double +taxation_avoidance +incorrect +ministries +asset_quality +competitiveness +corporation_limited +divulge +maximum_permissible +corporation_dicgc +dicgc +guarantee_fee +operationalise +uniformly +safe_deposit +advocate +assesse +fraud_claim +fraudulent_encashment +imprisonment +minor_penalty +penalize +stage_advice +digital +inter_governmental +investment_destination +policy_stance +consequence +restrictive +consent +travancore +nav +repurchase_price +expiry +actual_user +printing_press +investment_fdi +negative_list +approach_paper +small_industries +curb_circulation +whole_gamut +cabinet +dismiss +autonomous_body +gratia +precise +closed +continuation +integration +significant_improvement +empowered_committee +uts +bihar_goa +himachal +negligible +rajasthan_tamilnadu +weightage +port_trust +ongoing_exercise +baggage_allowance +deputy_commissioner +platform +tighten +cit +computerization +mitigation +technical_assistance +intimation +registration_cor +jewellery +possess +formerly +university +equity_participation +highly_indebted +kenya +try +fulfilment +potentially_viable +wasteful +arm_length +crar +dbod +divestment +granting_permission +merchant +please +selective_basis +authenticity +illicit_traffic +international_cooperation +morphine +precursor +trafficking +itc_ltd +nomination +quickly +severe +stall +pollution_prevention +pds +prov +states_uts +political_party +accidental +medical_treatment +mediclaim +atleast_one +cst +jurisdictional +nipfp +policy_nipfp +resource_allocation +reliance_industries +bhubaneswar +kerala_lakshadweep +manipur_mizoram +ombudsman +draw_comprehensive +operating +alternative_mechanism +warrant +distt_primary +fish_culture +gas_turbine +hazard +hydrology +improvement_project +power_station +tourism_dev +upper +account_payee +claimant +hour +managerial +hiv_aids +reinsurance +dgft +forgery +slab +yamuna +mature +repeat +resource_mobilisation +appellate_authorities +intermediate +ihe +wrongful +audit_trail +deliberation +depository_participant +vice_versa +clerical_cadre +clerk +indent +come_forward +testing +act_fema +compensate +indo_mauritius +investors_fiis +consonance +downward +peak_rate +cyber_crime +identity +message +misappropriation +partial +representative_office +bureau_ncb +persistent +tandem +comparable +wide_spread +jayanti +travel_abroad +vide_notification +assumption +cmds +coordinate +daman +alias +chennai_metro +enterprises +grace +joshi +maker +picture +prakash +rajiv +rajya +official_language +expertise +insider_trading +hire +revitalize +soft_loan +air_travel +holiday +active_role +card_kcc +prompt +time_lag +clerical_staff +cargo +hub +laboratory +refund_claims +voluntary_disclosure +bhubaneshwar +dehradun +gandhinagar +gurgaon +shimla +regional_offices +create_awareness +takeover +verge +manpower_planning +chandigarh_daman +chief_commissionerate +drinking +hydrology_project +protocol +supply_sanitation +women_child +women_youth +workforce +inadequacy +keep_watch +export_promotion +alkaloid +guwahati_hyderabad +mint_hyderabad +mint_mumbai +proforma_accounts +quasi +ranchi +refinery +research_institute +ring_road +security_printing +store +street +vide_letter +disbursal +operationalize +redressal_cell +dhan +apprise +commonly +compulsory_quoting +fulfill +coal +fertilizer_subsidy +optimize +buyback +ensure_transparency +explanation +explanatory +preferential +preferential_allotment +substantial_acquisition +transactions +unaudited +voting +yearly_basis +corporate_bond +remain_unchanged +segregate +contain_inflation +kerosene +little +manufactured +mass_consumption +supply_side +combination +portal +web +cancellation +cash_withdrawal +divisional_office +internal_audit +irregularitie +premature +signatory +subordinate_staff +nadu +liberal +momentum +outlook +install +next_five +nigam_limited +ambitious +migrate +reiterate +electricity_board +spectrum +super +uday +udyog_ltd +complete_ban +first_six +principle_approval +promoter_group +promoter_promoter +risk_weighted +validity +balasaheb_vihe +anti_dumping +offender +forestry_fishing +fuel_power +economic_cooperation +reverse +proximity +south_west +traffic +match +reliable +first_installment +passport +sport +controller_general +extraordinary +intensity +national_calamity +nccf +euro_pound +slip +current_session +assocham +liquidity_conditions +query +balanced +comprehensive_strategy +supplementary +wage_employment +day_strike +gratuity +increment +ineligible +plus_dearness +qualifying_service +skilled +united_forum +prepayment +blue +colour_shift +denomination_notes +green +left +mahatma_gandhi +red +visible +capability +deter +dividend_distribution +ever +infuse +launching +pharma +specified +gramin +swarnajayanti_gram +swarojgar_yojana +arrears +buy +sheet +cdr +building_advance +deceased_employee +hoc +modern_technology +placement +qualifying +ssc +operations +robbery +robbery_dacoity +cso +millennium_deposit +sluggish +germany_japan +accurate +constant_interaction +prize +thirty +transport_allowance +endeavor +alter +breach +cardholder +chip +choose +customary +debit_card +device +electronic_payment +hardware +necessarily +pilot_basis +pin +pos +reporting_format +satisfactorily +smart_card +smart_cards +smart_debit +unauthorised_transaction +unspent +without_prior +expansion_diversification +concurrence +multi_agency +authorised +subscription +entertainment_industry +calcutta_environmental +child_health +container_corporation +disease_surveillance +environmental_sanitation +northern_region +ntpc +ongoing_externally +participatory +pilot +power_grid +renewable +rubber +sewerage_project +tank_improvement +fifty_percent +solvency +solvency_margin +timely_repayment +willingness +loanee +may_differ +risk_perception +lord_krishna +ltd_karur +twenty_six +enunciate +impose_penalty +site_surveillance +supervisory_framework +systemic_risk +categorize +calculation +agrahayana_saka +depute +reluctance +robust +urban_transport +ganja_hashish +opium_heroin +pcardb +scardbs +fundamental +globally +justify +amalgamate +readily +filling +instructions +constitutional_amendment +evident +shareholde +heroin_ganja +opium_morphine +precise_valuation +bipartite_settlement +employer +recognition +tripartite +repurchase +act_read +amenity +teacher +decision_making +september_october +fully_computerised +kamataka +quasi_judicial +vice_chairman +spurt +group_shg +investment_climate +ncaer +research_ncaer +sample +usd_billion +pre_payment +intent +timeframe +inter_ministerial +kutch +kutch_district +collate +correspond +bihar_jharkhand +jharkhand +patient +read +presence +proviso +sub_clause +substitute +incidental_thereto +augment_supply +facilitate_quick +speculation +helpful +usd +bilateral_agreement +indo_myanmar +south_east +ministerial +unaccounted +doe +press_nashik +ago +madam +sanctioned_strength +appreciation_depreciation +notional +upward_movement +heavily +indebted_poor +oil_prices +diu_delhi +guilty_persons +illegal_trade +manipur_nagaland +neighbouring +nov +fera_fema +advertisement_publicity +org +initiate_necessary +unaccounted_money +africa +avoidance_treaty +canada_china +cyprus +dtaas +france_netherlands +saudi_arabia +singapore_south +swiss_confederation +united_arab +posting +ida_sdr +appropriate_legislative +encourage_voluntary +numerous +stringent_penal +threshold_exemption +weed +customers +dual +fix_responsibility +interim_order +statutory_audit +serious_concern +port_dredging +sericulture_project +wise_breakup +armed +repository +risk_profile +occupy +special_drive +reclamation_project +payments +adr +gdr +nepal_border +payer +punitive +early_clearance +rajiv_gandhi +gov +judicial_stamp +conventional +optimum_utilisation +chattisgarh +jharkhand_karnataka +pondicherry_punjab +require_prior +adb_ind +sewage +yamuna_action +fipb +liquidator +official_liquidator +abnormal_price +commerce_punjab +sind +file_criminal +ongc +structuring +fee_payable +procure +merchant_bankers +earliest +engineering +graduate +preferably +rationale_behind +ride +mega +prosperity +mutual_funds +uniformly_applicable +front_end +mutually +capitalize +diverse +ots +technical_collaboration +competitive_advantage +conduct_thorough +courts +providing +load +active_consideration +distress +wipe +reconsider +sharing +saving_schemes +millennium +airport_chennai +seaport +gsdp +dec_dec +feb_feb +index_cpi +jan_feb +nov_dec +oct_oct +sep +sep_oct +put_pressure +financial_stability +textile_industry +crude +edible +refined_edible +remain_vigilant +adjudicating +dispute_settlement +recipient +chartered +engineer +heir +call_option +deep_discount +dynamic +sabha +necessity +prepare_draft +cbi_police +erring +unfair +tangible +track +usq +com +contractor +tenor +pick +unorganized_sector +witness +alone +consolidate +automate +norms +association_ida +myanmar_border +defect +undue_delay +integral_part +independently +technical_consultancy +proportionate +consortium +lanka +upfront +gom +tripartite_agreement +appropriation +austerity_measures +strict_compliance +discontinuation +rigorous +tenth_plan +check_recurrence +prevent_recurrence +uttaranchal +second_half +groups +http +nic +web_site +proj_adb +miscellaneous +objective_behind +client_code +confirmation +digital_signature +awareness_among +borrowal +white_sugar +electronic_clearing +polymer +middle_office +monetisation +internationally +deputy +governmental +unclaime +bankruptcy +barrier +quantitative +matching_contribution +enforce +privatization +icai +internal_inspection +sitharaman +kochi +preservation +happen +persons +listing_fee +online_trading +price_discovery +strategic_partner +wherever_possible +silk +engagement +fiscal_prudence +utmost +migration +expeditious_disposal +substantial_portion +concessional +cognizance +valid_document +bailout +value_nav +matching +severe_drought +consistency +efficiency_productivity +policies +cards +recovery_climate +print_media +separate_data +tool +highway_improvement +poverty_initiative +gail +kotak_mahindra +platinum +rolling +visual +map +ofthe +ratio_slr +civilian +ministery +holistic +discriminate +gets +environmental_improvement +advisory_group +app +appreciate +jul_aug +mar_apr +petrol_diesel +petroleum_products +counterpart +first_week +inception +diamond +bima_plus +duration +ulip +icici_iibi +pvt +eastern_region +industrial_units +southern_region +award_period +mar_mar +opec_bilateral +protest +liaison +procedural_simplification +cyclone_earthquake +disaster +fire_flood +immediate_relief +fund_crf +released +european +european_union +gujarat_earthquake +formulate_comprehensive +ensure_smooth +maharashtra_canara +alleviation +economically +onward_lending +agents +give_impetus +introduce_plastic +plastic_currency +empanel +areas +margin_norms +disaster_management +keep_check +national_disaster +consecutive +convert_reschedule +failure_intensity +rescheduled +treat_portion +campaign +bansal +club +association_gipsa +gipsa +interalia_include +opposition +necessary_precaution +advanced +increasingly +nutrition +transform +deliver +pro_poor +examine_staff +wherever_required +therof +spread_across +fact_finding +forensic +mha +hit_areas +policy_holders +precious +narrow +chief_justice +arun_jaitley +space +east_asia +economic_scenario +discriminatory_mechanism +shahari +swarnajayanti +urgent_need +mid_day +rein +capital_infusion +conventional_energy +deliberate_upon +indo_russian +inherent +scientific +mumbai_kolkata +spur +cigarette +economically_backward +indemnity +minster +mfis +departmental_enquiry +dadra +freight +investigative +ispat +mahindra +parekh +work_load +roadmap +asset_npa +earthquake_affected +pre_shipment +ordinarily +ignore +mint_kolkata +noida_mint +distributor +karnataka_community +entities +extant_branch +shifting +every_insurer +insurance_penetration +penetration +estimation +chennai_bangalore +assured_career +centralized +cooperative_societies +raw_silk +image +unviable +domain +grameen +banking_ombudsman +erring_staff +ombudsman_scheme +productive_use +bharatiya +bharat_petroleum +contingent_upon +kolkata +vault +facilitation +forecast +one_specialised +law_tribunal +technologies +conflict +stock_exchanges +finished +hyderabad_jaipur +rashtriya_sahara +direct_benefit +fiscal_space +prove +parallel_economy +bankers_committee +district_consultative +slbcs +european_economic +integrated_livestock +fully_computerized +wan +wide_area +sette +cdr_mechanism +minimize +restructuring_cdr +organization_wto +tcs +draft_bill +apr_dec +capita_external +south_asian +rural_electrification +stop_smuggling +fcnr +nre +traceable +steep_decline +average_yield +important_role +contingency +safety_liquidity +press_note +hrd +jayant +corridor +scrutinize +hdfc_standard +icici_prudential +ltd_kotak +double_digit +ten_thousand +anywhere +gram_sadak +high_speed +road_connectivity +tenth_five +conditions +collected +deepen +fortnight_beginning +assured_return +hsbc +karur_vysya +lakshmi_vilas +united_western +impediment +repatriation_basis +chief_controller +degree_consistency +metric_tonne +opium_poppy +game +contraband_goods +outreach +travancore_allahabad +satellite_office +therewith +organizational_structure +kelkar +march_phalguna +town_hall +zonal_manager +acknowledgement +every_person +land_acquisition +statewise +municipality +road_transport +emergency_earthquake +departments +religious +ida_ifad +willful +continuous_monitoring +electronically +agency_cra +confidential +cra +leverage +apr_jun +budget_vol +gross_budgetary +haveli_goa +chaitra_saka +elaborate +deviation +garment +shore +tariff_area +trade_organisation +weighted_deduction +loss_making +unbanked +fledge +unnecessary_delay +sebi_prohibition +unfair_trade +metropolitan_magistrate +shut +specialized_ssi +unorganised_sector +plr_plus +leasing_hire +monitoring_mechanism +kingdom +ant +multinational_companies +office_memorandum +adjudicating_authority +second_amendment +wasteful_expenditure +swap +flagship +onward +raipur +royal +small_savings +cumulatively +animal +apex_court +pre_mature +prospectus +crash +market_manipulation +nation_wide +nationwide +vital_role +underwrite +birla_sun +flout +proof +drug_menace +ranking +locker +sunil +undisclosed_foreign +supervisor +mercantile_cooperative +calcutta_stock +equivalent_amount +threshold_limit +timely_completion +trading_membership +bullion +prudence +coordinated +valuation_surplus +tranche +devolve +atm +enhance_transparency +targets +home_town +call_centre +food_fertilizer +attract_basic +shell +become_final +interim +source_tds +settlement_ots +mind +vertical +tea_plantation +physically +could_lead +consequential +aggregate_turnover +column +dopt +facie +give_wide +revival_package +consultative_committee +transfer_pricing +low_volume +heroin_morphine +sovereign_guarantee +borrowers +foreclosure +cesse +alienation +chief_metropolitan +dacoity +eliminate_revenue +employment_guarantee +fertiliser_subsidy +stabilise +finished_leather +surplus_pool +illegal_cultivation +deputation +rehabilitation_package +inconsistent +risk_mitigation +fraudster +punitive_action +shrl_balasaheb +supervisory_body +call_notice +collapse +constitutional +average_daily +derivative +specification +efc_award +dual_control +functional_autonomy +minus +naik +post_shipment +section_read +trigger +sizeable +careful +commonwealth +ensure_strict +green_channel +screening +mrp +unforeseen +renegotiation +may_deem +transit_point +lend_directly +ucb +ucbs +overall_stance +permanent_establishment +chhatisgarh_goa +chhattisgarh +uttranchal_west +purchasing +monthly_average +reclassify +exploit +ssi_units +intra +mis_appropriation +rashtriya +leave_travel +travel_concession +air_linkage +effectively_deal +huge_quantity +leakage +bombay_high +storage_capacity +ultimate +second_hand +mitra_committee +hamper +laws +differ +supply_sewerage +investigation_wing +privacy +undue_harassment +civilian_staff +domestic_product +expedite_disposal +sluggish_growth +disability +incidental +shg +suraksha_yojana +jun +turnover_threshold +bihar_chandigarh +lakshadweep_madhya +quarterly_cross +chhatisgarh +recapitalization +pawan_kumar +tripura_uttaranchal +alleged_involvement +speed_post +unique +bracket_indicate +exhibit +monetary_penalty +validation +vote +bulk_drug +cash_handling +sixty +hawala_racket +fourth_quarter +computerized +drug_transit +monsoon +maturity_mismatch +significant_role +directors +parekh_committee +post_harvest +seventh_schedule +receivables +dfid_assistance +bankruptcy_code +modernize +interactive_voice +advisor +deputy_secretary +preserve +wpi_inflation +chattisgarh_goa +uttaranchal_west +bracket +tend +chit_funds +labour_intensive +independent_body +petroleum_natural +circular_dbod +cps +invoke +calibrate +economic_slowdown +structural_adjustment +ndtl +scheduled +economic_recession +global_competitiveness +benchmark +feedback +containment +reason_behind +deterrence +common_fraud +prevent_reduce +deserving +law_enforcement +gold_jewellery +phone +cadre_restructuring +administrative_warning +install_atms +prohibit_acceptance +private_placement +telephone_nigam +certain_precursor +anonymous +vigilance_machinery +cautionary +bidding +institutional_arrangement +malegam +authorities +late_payment +prescription +subsidized +latter +resultant +score +con +reciprocal_basis +sanction_follow +thermal +immunization_programme +observance +seamless +vikas_yojana +vocational_training +women_shgs +fraud_prone +monitorable_action +detailed_examination +palm_oil +canal_modernisation +maximize +basket +communication_technology +east_asian +ict +finished_goods +udyog_limited +five_hundred +unnecessary +brand_name +checking +extant_instructions +kendriya +spread_throughout +customer_satisfaction +control_bureau +mumbai_police +mandatorily +extra_ordinary +libor_bps +diligence +open_offer +assistant_commissioner +chennai_kolkata +premia +panchayat_raj +merger_amalgamation +offence_wing +anti_burglary +correspondent +buffer +distribution_system +single_window +tabulate +reorganisation_act +gainful_employment +core_banking +ecs +mtfrp +programme_mtfrp +drought_flood +power_plant +infuse_capital +provisionally +life_span +adb_ifad +remain_unutilized +within_manageable +launder +centrally_maintained +crude_palm +sca +sized +fund_imf +cochin_port +grid_corporation +hindustan_copper +ispat_nigam +mineral_metal +nuclear_power +pawan +rain +steel_plant +water_resources +paid +generation_programme +sewage_disposal +joint_parliamentary +prepay +complaints +illegal_activities +initiate_criminal +equitable_distribution +mumbai_nagpur +practical_experience +remedial_measures +compounding +discover +joint_commissioner +hospitality +earthquake_relief +person_resident +put_together +early_hearing +judgment +every_citizen +funding_pattern +aviation_turbine +dept +steep_hike +intelligence_bureau +cci +npa_ratio +fit +safe_transparent +volatile +computer_software +post_search +pan_masala +ensure_easy +economic_adviser +original_maturity +end_use +standardization +former_deputy +sme +aviation +false_declaration +jurisdictional_commissioner +prevent_misuse +mint_noida +depot_icd +bengal_andaman +nicobar +ing_vysya +royal_sundaram +furnish_copy +war +charitable_purpose +miss +sad +solar +stake_holders +sharing_pattern +backdrop +stimulate_demand +uncertain +sts_obcs +internationally_competitive +buy_back +teller_machine +transformation +avoid_wasteful +crude_petroleum +domestic_lpg +targeted +indebte +single_borrower +stiff_competition +bailout_package +ratio_car +crop_loans +purpose_vehicle +spv +warning +igms +strictly_adhere +durable +organizational_restructuring +caution_advice +lessee +server +outward +dop +estt +december_agrahayana +finmin_nic +substantial_expansion +gainful +swarna +afforestation +canal_hydro +gender +hydro +logistic +nhpc +sail +second_renewable +sensitize +shiksha +solar_energy +women_empowerment +global_recession +immediate_effect +predict +developer +underlying +cabinet_secretariat +convener +statistics +nsdl +police_station +penalise +hoarding +stabilize +facility_laf +laf +downward_revision +inbuilt +charitable_trust +party_guarantee +sugar_industry +benchmarke +syrian +expansionary +monetary_stance +vision_document +branch_cib +technological_upgradation +unaccounted_wealth +lobi +illegal_trading +optimum_utilization +bahrain_kuwait +serial +oda +utmost_vigil +accidental_death +bima_yojana +kcc +permanent_partial +personal_accident +preferential_treatment +rashtriya_krishi +threshold +alkaloid_factory +exact_estimate +edible_grade +palm +refined_palm +audio_visual +mat +motion +deductor +cib +tpa +stiff +holders +sound_prudent +merchandise +negative_networth +skilled_manpower +doubtful_integrity +portability +regardless +cotton_yarn +ccea +mela +audits +concept_paper +eventually +poster +hindu_undivided +premature_withdrawal +movable +practice_code +ficn +note_ficn +viability_profitability +undivided +calendar +notional_value +file_quarterly +family_pension +deter_economic +economic_offender +keep_track +reset +stimulus +catholic +sts +awa +malegam_committee +special_package +fixing +correctness +tile +agro_forestry +fish +parliamentary +incentivise +defaulted +drought_hit +ecbs +dearness_relief +salary_structure +help_group +unchanged +fccb +escape +serious_fraud +global_slowdown +fraud_cheating +uttranchal +marginal_farmers +credit_cards +residual +kolkata_chennai +luxury +hawala_transactions +chhattisgarh_goa +nadu_pondicherry +nicobar_chandigarh +pradesh_assam +moderation +antyodaya_yojana +subsidize +tea_sugar +urad +assistance_oda +fresh_equity +soiled_notes +ccs_pension +distressed +gol +debt_sustainability +kcc_scheme +kccs +satisfactory_operation +doorstep +blue_print +tamilnadu_lakshadweep +simple_language +variable_spread +programme_undp +strictly_comparable +grade_pay +pdf +agriculture_cooperation +concurrent +involved_therein +anti_money +direct_recruit +gulf_countries +subsume +escalation +stone +cad +inr +mof +pilot_proj +sdr_second +treaty_partner +commissionerate_wise +imp +integrated_child +gold_platinum +unstarred +much_less +comfort +national_crime +smooth_transition +waste_management +classified +obviate +truly +recurrence +parliamentary_standing +usual_course +libor_plus +deterrent_punishment +nayak +gfr +bankers +seriousness +pradesh_arunachal +undertake_recruitment +utilization_certificate +jubilee +authorized_dealer +yojana_pmry +counsel +minimize_litigation +mass_rapid +meal +tank_management +apex_level +debit_instruction +depository_limited +customer_protection +mse +counterfeiter +note_vigilance +minimum_qualifying +poppy_crop +strict_surveillance +mid_size +concerned_ministries +metro_rail +report_civil +custom_authorities +saheb +guilty_officials +alia +japan_russian +unlicensed +mfs +customs_tariff +place_liberal +dwell_time +trade_facilitation +moment +untied +commitment_fee +copper +national_thermal +breakup +serial_number +simplified_method +semi_finished +tractor +chandigarh_chattisgarh +alignment +blue_chip +shanghai_banking +authorized +operational_flexibility +differently +swarnjayanti +unorganised +hardware_software +original_subscriber +open_branches +authority_irda +shrl +senior_police +early_recognition +early_warning +backward_areas +reluctant +oversight +prompt_corrective +hall +large_sized +cyber_security +dbs +fanner +extensive_use +citibank +collateral_requirement +hufs +grade_assistant +budgetary_outlay +chennai_sec +provide_hassle +dispatch +mediclaim_policy +suspicious +indent_supply +caste_certificate +filer +terrorist_funding +resource_gap +unearth_unaccounted +pension_accounting +surge +dividend_payout +parliamentary_committee +interest_rates +accident_insurance +insured_person +corridor_project +landless +pradesh_uttaranchal +sharing_arrangement +scardb +liquidity_crunch +rating_agencies +less_expensive +cdr_cell +plus_variable +processing_fee +fii_inflow +preparation_appraisal +program_project +cultivate_opium +illicit_cultivation +set_forth +provident_funds +geographical_spread +anant_gangaram +geete +anant_geete +capitalization +indo_german +serious_nature +company_affairs +interface +earthquake_reconstruction +history +fifty_thousand +chandigarh_chhattisgarh +kashmir_jharkhand +bima_yojna +gangaram_geete +investigation_team +credible_information +affairs +suitability +demat_account +suspicious_transaction +fatf +market_integrity +common_publicity +trie +system_ecs +unclaimed_deposits +secondary_markets +relevant_laws +depb_scrip +gross_npas +ifad_opec +libor_swap +insured +monetary_fund +grievance_redress +dipp +frame_suitable +hide +security_guard +mutual_administrative +dispute_resolution +anandrao_adsul +inland_air +ticket +bench_mark +cargo_complex +litre +bihar_chhattisgarh +extremism +left_wing +smooth_flow +reliance_petroleum +chandigarh_chennai +kolkata_mumbai +shanghai +model_code +fipb_route +france_italy +exit_option +generate_employment +indirect_taxes +financially_weak +five_consecutive +commercial_judgment +occupational_groups +special_drawing +kolkata_lucknow +fitness +mandatory_quoting +independent_regulator +coupon +ministerial_group +restrictive_trade +empowered +dependent_upon +conviction +onion +groups_shgs +short_duration +hassle +khadi_village +community_forest +competitive_bidding +suuti +burglary +greenfield +marine_products +anti_inflationary +base_rate +gsdp_ratio +securitization_reconstruction +debt_swap +high_coupon +sixth_pay +normal_monsoon +proactively +rail_vikas +turbine_fuel +sterling_euro +adsul +fiscal_rectitude +nationwide_strike +profit_making +labour_commissioner +chhattisgarh_madhya +anandrao_vithoba +fll +participatory_note +shore_derivative +vijay_kelkar +slow_progress +woman_director +anandrao +orissa_socio +skill_upgradation +dacoity_burglary +criminal_breach +cashless_facility +rbis +network_tin +apex_body +primary_responsibility +fair_practice +harassment_viz +anand_rao +expeditious_recovery +forgone +chapter_xvii +globally_competitive +office_sfio +sfio +level_playing +unstructured +charitable_society +environment_friendly +expansion_plan +refinance_agency +preparedness +produce_marketing +dearness +reverse_repo +drug_controller +secondary_education +http_finmin +jayanti_gram +six_months +appeals +land_holding +alleged_violation +mortgaged +branch_authorization +undertake_substantial +basel +self_declaration +masala +drt_coimbatore +electronic_filing +outsource +outsourcing +initiate_probe +labour_employment +kalyan_yojana +privileged +prompt_repayment +scheduled_caste +upper_age +psb +residual_maturity +developed_countries +risk_premia +duplication +outsource_certain +aside +bihar_chattisgarh +audit_machinery +intensive_screening +operandi +genuine_grievance +latest +shripad_naik +itat_high +top_twenty +careful_monitoring +frequency +industrial_entrepreneur +offshore_derivative +rrb_employees +forward_contract +samriddhi +reasonable_component +swarojgar +benchmark_prime +universal_health +cheque_clearance +rtgs +settlement_rtgs +interim_dividend +mainly_relate +securities_depository +human_consumption +source_tcs +undertaking_substantial +preferred +assets_npas +divisional_manager +curb_speculation +exchanges +market_capitalization +currency_notes +single_digit +vithoba_adsul +shipping_industry +rail_road +fiu +organisational_structure +upward +frbm_act +fund_nssf +nssf +good_monsoon +securitization +active_pharmaceutical +specialised_ssi +adequate_discretionary +nayak_committee +bihar_chhatisgarh +consumption_pattern +collate_information +indo_bangladesh +organic_farming +shripad +introduce_preventive +onsite_inspection +diu_goa +severe_disability +high_courts +common_shareholder +supportive_role +sharp_fall +cashless +intensive_publicity +shris +http_dipp +oil_refinery +stem +boost_exports +easy_availability +meghwal +common_minimum +retail_investors +voting_equity +anchor +hospitalization +bangladesh_border +societies +chhattisgarh_gujarat +broad_consensus +palan_manickam +sample_survey +irrigation_component +payout +bplr +rate_bplr +monetary_discipline +swarozgar +container_depot +give_fillip +waive_margin +twelfth +restructuring_rescheduling +strict_adherence +supply_chain +sgsy_sjsry +coconut_oil +potato +comfortable_level +universal_access +arid +sop +readiness +easy_accessibility +recruitment_boards +offering_ipo +sarfaesi +sarfaesi_act +palaniman_ckam +accountant_firm +death_cum +retirement_gratuity +alloy_steel +brief_allegation +land_customs +hon +palan +stable_outlook +honble_supreme +compress +university_college +dipp_nic +global_depository +abnormality +delayed_payment +natural_justice +defend +dlcc +slbc_convenor +return_air +public_domain +lottery +minority_stake +management_frbm +lower_denomination +budget_announcement +awareness_campaign +multi_media +tracking_system +moratorium_period +cis_entities +cylinder +negative_impact +gas_lpg +liberal_tariff +metric +ton +undue_burden +official_directors +wto_commitment +post_offices +prof_vaidyanathan +divisible +attractive_destination +growth_momentum +prepare_well +sectoral_exposure +modus +mortgaged_property +least_percent +education_cess +overdraft_facility +inoperative +rbl +subordinate_offices +fiu_ind +earthquake_rehabilitation +cbec_gov +discovery +immediate_corrective +iron +metal_scrap +module +port_icd +sez_units +shipment_inspection +potential_viability +long_tenor +revisit +computer_training +dredging_corporation +hotel_airport +medium_sized +draft_guidelines +excess_liquidity +gamut +microfinance +irrigation_projects +prakash_narayan +aluminium_company +dec_mar +jun_sep +customer_friendly +meritorious +sunil_kumar +truly_needy +acute +cards_kcc +cibil +safe_secure +institutions_mfis +nps +pfrda +system_nps +oil_palm +five_thousand +limited_cibil +huge_gap +fishing_industry +centres +ckam +sao_refinance +meal_scheme +motor_spirit +central_recordkeeping +austerity_instructions +efficacy +tsunami +avoid_duplication +record_keeping +allow_stay +tsunami_hit +tsunami_reconstruction +gangwar +santosh +supreme +retired_employees +viability_gap +uptor +bharat_heavy +trading_platform +kerosene_pds +buffer_stock +unlawful +majority_shareholding +tsunami_disaster +focused_attention +intelligence_agencies +apr_mar +middle_east +password +constant_prices +cheque_drop +concessional_rates +unique_identification +grievance_cell +contributory +hindi_english +initial_corpus +borrowing_ceiling +retail_selling +nia +autonomy_package +manickam +price_mrp +interest_subvention +sustained_campaign +duty_cvd +weighted_average +certain_conditions +pre_qualification +emergency_tsunami +american_depository +mobile_telephone +tranche_position +urban_renewal +level_marketing +raw_sugar +electric_power +robbery_theft +independent_directors +pns +ner +vacant_posts +nationwide_trading +kerosene_lpg +frbm +full_exemption +allied_activities +distress_sale +discount_rate +ppp +collation +collection_collation +ndps +labour_bureau +cibil_com +confidential_use +unit_holders +sick_units +validation_ordinance +interim_pension +fund_nif +nif +tata_consultancy +region_ner +compliant +participatory_forest +fair_treatment +customer_kyc +staff_involvement +anti_narcotics +growth_forecast +eventual +cpse +spread_awareness +exploration_licensing +outcome_thereon +adequacy_ratio +commodity_derivative +remote_areas +grievances_redressal +interest_sarfaesi +jagran +custody_article +deceased_depositor +locker_safe +joint_liability +instead_obtain +send_clear +underbanked +contributory_pension +facility_dcrf +cvd +frbma +precious_metal +trillion +ashok_hotel +approval_route +macroeconomic_situation +bplr_truly +particular_borrower +gcc +shiksha_kosh +digit +abhiyan_ssa +institutional_architecture +scheduled_tribe +exempt_exempt +structure_ccs +palaniman +prompt_reporting +become_functional +asian_countries +limited_nsdl +promotion_dipp +cropper +fpi +frill +minimum_balance +solution_cbs +accrue_therefrom +gaikwad +place_robust +sixth_central +iba_model +negative_net +british_pound +trial_basis +rojgar +faq +cibil_website +fresh_accretion +make_counterfeiting +warning_signal +initiate_expeditious +alleged_harassment +cbs +finished_steel +petroleum_refinery +financial_inclusion +capital_preservation +strong_room +vaidyanathan +authorization_policy +mobile_banking +erring_officials +iifcl +litre_diesel +pds_kerosene +adopt_innovative +indians_nris +compete_internationally +managerial_autonomy +prudential_supervision +sufficient_managerial +unified +late_fee +bcsbi +overall_macroeconomic +authorisation_policy +farmer_household +horticulture_mission +mca +prior_notice +oil_marketing +amongst_different +comparable_maturity +inr_inr +rupee_drawing +panama +educational_institutions +sarva +gst +unemployment_allowance +party_motor +act_pmla +suspicious_nature +banked +hindustan_petroleum +ltd_nmdc +kumar_bansal +paw +finmin +round_january +wan_kumar +kyc +kyc_aml +vaidyanathan_committee +audited_balance +average_maturity +magha_saka +computerized_processing +macroeconomic_environment +eleventh_five +account_holders +oct_dec +resident_ordinary +skill_development +unauthorized_transaction +conducive_environment +karur +armed_police +excessive_volatility +freight_corridor +mission_jnnurm +fulfill_know +line_ministries +business_correspondent +business_facilitator +cards_kccs +ssa +join_nps +training_institutes +tripartite_review +internal_grievance +benchmark_yield +single_unified +harmonize +oilseed +refined +equitable_treatment +nmdc +cold +extra_budgetary +economically_inefficient +ngos +ore +apr_sep +edible_vegetable +authorised_dealer +guarantee_cover +kosh +premature_closure +atm_debit +local_administration +customer_identification +bangladesh_sri +solid +auditor_general +without_insistence +deep_sea +intensive_preventive +trail +pmla +powered +minimum_alternate +iron_ore +illicit_poppy +biometric +omcs +single_brand +intrusive +sub_division +recordkeeping +surpass +interim_relief +sensex_nifty +facilitate_interface +quarter_review +pan_cards +tfc_award +prepaid +insurance_advisor +corrupt_officials +jlgs +npci +inclusive_growth +power_parity +beneficial_owner +commercially +model_education +guarantee_corporation +metropolitan_areas +laundering_act +shiksha_abhiyan +dcrf +chief_labour +operations_sao +sme_rating +chairman_cbdt +mediclaim_policies +accept_collateral +quarterly_half +fema_violation +appellate_fora +lord +branch_authorisation +branch_expansion +viability_availability +currency_swap +investment_program +stressed_asset +micro_small +equitable_portion +nations +correspondent_model +duplicate_pan +multiple_pan +special_academic +work_experience +scheduled_commercial +skim +relaxed_security +vide_master +companies_hfcs +reverse_mortgage +subvention +vikas_nigam +air_transport +corporate_bonds +several_punitive +mfi +consecutive_assessment +cst_compensation +dedicated_freight +standalone +accountability_complete +supervisory_process +ble_high +landless_agricultural +minority_concentration +terror +mauritius_dtac +pay_fixation +aggregate_exposure +statistical_institute +pronged_approach +periodically_verify +transaction_history +jnnurm +party_liability +master_circular +commission_fmc +tur +wpi_basket +refund_banker +swasthya +stt +flag +structure_stccs +uttarakhand +ten_consecutive +gender_budgeting +radhakrishnan +mutually_acceptable +aam +floor_price +forward_markets +eleventh_plan +efficient_market +include_advanced +promote_safe +lending_psl +kotak +rupee_appreciation +investor_sentiment +regulated_entity +unless_specifically +minor_head +neft +population_less +site_atms +centralized_processing +adjusted_net +credit_equivalent +natural_rubber +itr +circular_dbs +regulated +bangalore_bhopal +ews +anbc +credit_anbc +ecb_fccb +excess_volatility +stccs +bcd +school_education +million_farmer +puducherry_punjab +tripura_uttarakhand +msme +create_buffer +terrorist_financing +insider +unorganized +technology_adoption +smart +exposure_whichever +institution_mfi +uttarakhand_himachal +payment_gateway +administrative_ministries +doorstep_banking +cdr_less +wide_disparity +toll_free +viable_ucbs +customer_confidentiality +bps +permissible_end +consultancy_services +taxable_services +alia_include +strategic_disinvestment +rolling_back +realistic_lending +respective_boards +electronic_mode +benefit_transfer +dir_series +exposure_obe +mis_selling +uttarakhand_uttar +committee_dlcc +committee_ssc +extant_guidelines +robust_internal +zero_balance +solar_power +terror_financing +tracking +recovery_agents +bio_metric +consortium_lending +aam_aadmi +swap_arrangement +dlcc_district +america_usa +rupee_depreciation +civil_courts +thirteenth +shgs_jlgs +gram_udyog +participatory_notes +climate_change +mahatma +partial_withdrawal +enterprise_mse +channelize +screening_committee +fraudulent_withdrawal +wrong_quoting +expeditiously_regardless +redressal_machinery +structure_ltccs +client_kyc +quoting_pan +sum_assured +additional_subvention +import_intensity +massive_awareness +high_frequency +less_developed +nhb_sidbi +axis +semi_precious +laundering_aml +statutory_concurrent +flagship_programmes +sugar_season +authority_pfrda +job_creation +puducherry +forex_derivative +uttarakhand_west +awareness_amongst +rashtriya_swasthya +swasthya_bima +cautionary_advice +zero_coupon +standalone_health +cgtmse +headline_inflation +tds_tcs +hindustan_organic +cabinet_secretary +pay_band +property_mortgaged +bcsbi_code +advance_pricing +tribal_areas +install_coin +formulation_appraisal +ppp_projects +cctv +husbandry +adwdr_scheme +clog +occupational_status +moneylender +rajan_committee +create_good +banknote_printing +processing_centre +ble_supreme +rescheduled_agri +debit_cards +sharp_depreciation +registry +skill_training +revenue_forgone +sfio_examine +significant_opportunity +adwdr +lump +democratic_self +reverse_charge +cfmc +speedy_processing +fourteenth +additional_incentive +locker_holders +psl +cir +recent_months +bcs +offsite_atms +fiscal_stimulus +bpl_people +gujarat_stcb +stcb +pranab_mukherjee +stimulus_package +debarment +pranab +raw_cotton +bengaluru +centre_cpc +cpc_bengaluru +national_aluminium +rashtriya_ispat +zero_import +ppp_mode +degree_profile +onsite +fake_notes +jagdish_sharma +demand_imbalance +gnpa +financial_literacy +fiscal_roadmap +aaby +mission_mode +liquidity_tightening +entry_load +vigilant_tighten +active_consolidation +expansionary_fiscal +programme_pmegp +indias +countrys +revert +repayment_holiday +sale_pos +ltccs +ltccs_package +vaidyanathan_task +separately_maintained +forty_nine +additional_authentication +neighbouring_countries +stiff_penal +settlement_systems +police_authorities +training_programmes +uidai +employee_strength +indias_intervention +seamless_flow +rsby +subsidy_outgo +timeline +profitability_viability +regulatory_architecture +gst_regime +label +rabi_season +euro_area +combat_ficn +elevated +advanced_risk +suitable_spread +honble +strategy_cps +weaver_community +orissa_puducherry +authentication +prompt_dispatch +conversion_rescheduling +honble_member +model_educational +authenticity_genuineness +morphine_heroin +life_cycle +pradesh_uttarakhand +exit_load +epfo +organization_epfo +incentivize +draft_offer +ltd_nhpc +relevant_material +spmcil +offer_fpo +brazil_russia +bric +psl_target +link_failure +petrol_pump +irrigation_watershed +rice_edible +census_without +agency_sca +drawing +spectrum_allocation +rice_urad +white_refined +language_panel +anchor_inflationary +prepare_caution +terror_funding +msp +vikas_yojna +weighted +annual_increment +notes_ficn +unauthorized_acceptance +inordinate +banking_outlet +mortar +selling_agents +mar_dec +concentration_risk +sikkim_subject +tier_centres +disaster_relief +stay_ahead +field_trial +unlawful_activities +rbis_circular +statistical_organisation +calamity_contingency +fund_nccf +confidentiality_clause +include_selective +select_food +situation_regularly +raising_sovereign +whenever_credible +pmegp +ratio_cdr +proper_opportunity +prompt_payee +caste_scheduled +sustainable_livelihood +former_judge +innovation_fund +reorganisation +tripartite_portfolio +quoting +corporate_affairs +council_fsdc +food_inflation +headline_wpi +gradual_exit +fsdc +macro_prudential +kolkata_east +metro_project +cft +requisite_registration +terrorism_cft +upload +linkage_model +heavy_losses +pfms +swavalamban +transfer_neft +mortgage_guarantee +ppp_model +integral +carefully_mention +selective_ban +ltd_ntpc +nhpc_ltd +travancore_syndicate +swavalamban_scheme +voluntarily_save +cashless_treatment +preferred_provider +daily_cap +china_korea +gandhi_national +price_stabilization +twelfth_five +revised_estimate +lanka_thailand +technology_back +sovereign_wealth +underlying_exposure +sheet_exposure +elderly +cgst +cgst_sgst +sgst +tobacco_products +alternate_dispute +tribunal_itat +mar_jun +livelihood_mission +nrlm +bcd_cvd +grid +black_marketing +fresh_tenure +gross_advances +fraudulent_activities +draft_red +mgnrega +obe_whichever +adversely_affected +engineers +girl_child +enforcement_agencies +related_issues +devolution_formula +headline +obe +disabled_people +final_consumption +office_cso +extremism_lwe +volume_million +fpo +playing_field +electronically_collate +duty_bcd +mar_sep +agreement_tiea +tiea +cashless_health +marginal_tenant +apr_oct +itr_itr +personnel_selection +nps_architecture +standard_operating +gst_council +igst +series_circular +educational_institutes +put_forth +rural_hinterland +performing_assets +rbis_intervention +shgs_attain +money_bullion +valuable_article +profit_earning +speed_diesel +depositor_education +distress_migration +convertible_bonds +aadmi_bima +yojana_aaby +gold_etf +enactment_amendment +insight +lighting +nehru_national +term_crop +refined_sugar +refined_white +budgetary_resources +incentivize_production +increasingly_attractive +wealth_inside +aml +kyc_norms +back_black +outward_fdi +stash +margin_cap +nbfc_mfi +nbfc_mfis +organisation_epfo +dfs +system_igms +affordable_housing +spectrum_scam +anti_avoidance +suraksha +swabhimaan +wrongdoing +consumer_spending +mukherjee +prudent_macroeconomic +cyclone_risk +compulsorily_file +alleged_contravention +plan_abep +unbanked_rural +social_responsibility +ponzi +multi_brand +bangalore_metro +nadu_afforestation +test_merit +diu_lakshadweep +sep_dec +five_pronged +abep +cyprus_france +netherlands_uae +enterprises_mse +balanced_pace +rbis_website +agreement_dtaa +prior_consent +internet_protocol +additional_factor +could_expose +odisha_punjab +rigorous_imprisonment +tieas +possible_fraud +playing +gap_funding +dbs_circular +deficit_cad +vulnerable_groups +aml_cft +research_division +clean_energy +corrective_measures +benami_transactions +euro_zone +uncertain_global +atm_cards +ble +aadhaar_authentication +analytic +diesel_pds +contractual_employment +dbr +indias_sovereign +odisha +inclusive_innovation +aggressive_lending +gnpa_ratio +gnpas +narayan_yadav +agency_nia +fcord +ficn_menace +cic +banked_areas +stake_sale +custom_officials +utmost_priority +organisation_wto +link_http +first_availment +thereafter_simple +time_documentation +ram_meghwal +joint_consultative +euro_japanese +negotiable_warehouse +disaster_response +executive_chairman +white_label +rbis_master +prima +anbc_ceobe +exposure_ceobe +emv +emv_chip +revelation +europe_sluggish +bullion_jewellery +household_consumption +ultra_small +unified_payment +nps_swavalamban +swabhimaan_campaign +developmental_side +women_self +rupay +chief_adviser +industrial_corridor +working_capital +realistic_approach +prepaid_payment +atm_pos +stressed +fcord_ficn +group_fcord +every_household +lwe +partial_disability +central_repository +reveal_pin +discourage_distress +wing_extremism +irda_cir +mid_cap +eco_system +detection_reporting +fast_tracking +moderate_food +avoidance_rule +card_issuing +cyber_frauds +merchant_discount +prospective_overseas +aadhaar_enrolment +polymer_plastic +institutional_placement +disability_cover +provisioning_buffer +operative_societies +atm_cum +banknote_paper +ficn_coordination +supplementary_service +unstructured_supplementary +ussd +ussd_platform +adopt_iba +electronic_tracking +specialized_mse +discom +bundelkhand_package +units_eous +retail_aviation +kyc_anti +rddbfi_act +source_cso +consumer_protection +brand_retail +ecosystem +rolling_target +honble_prime +cvd_sad +early_detection +attract_normal +mission_nrlm +email +sacrifice_waiver +commensurate_punishment +retirement_termination +multilateral_fora +copper_ltd +warehouse_storage +etf +socio +unfair_business +jharkhand_odisha +odisha_puducherry +investment_cci +subsidized_lpg +transfer_dbt +biometric_authentication +aadhaar_card +multilateral_convention +treaty_network +dbt +gnpa_gnpa +robust_mechanism +central_statistics +yojana_rsby +natural_resources +employees_provident +cum_debit +post_devolution +probe_staff +strengthen_revamp +single_transaction +vide_gazette +essentially_emv +international_usage +usage_unless +rate_mdr +policyholder_gov +utwise +css_cps +selling_price +indias_ranking +portal_https +radio_frequency +cpc_tds +international_consortium +integrated_grievance +death_gratuity +web_portal +seelam +excessive_speculation +bharatiya_mahila +gas_pricing +telecom_deregulation +quasi_sovereign +shravana_saka +exit_conference +frauds_classification +base_erosion +profit_shifting +top_borrowal +contribute_significantly +willful_defaulters +forward_trading +unit_fiu +seventh_pay +quasi_equity +national_spot +quantitative_easing +zone_sez +compulsory_delivery +speculative_open +procedure_sop +automatic_exchange +february_magha +nagaland_odisha +scs_scheduled +computer_emergency +response_team +automatically_set +moment_calamity +motion_without +uncertain_stakeholder +kyc_registry +bhartiya_mahila +shrimati_nirmala +smt_nirmala +rupay_kisan +collect_store +distress_prompt +forum_jlf +combat_cross +fully_reciprocal +indias_treaty +transparent_ownership +two_former +via_emphasis +mca_website +ponzi_schemes +fundamental_change +willful_attempt +mlis +human_resources +dbs_cfmc +rupay_debit +groups_jlgs +fpis +filing_portal +nirmala +companies_omcs +telangana +resolution_process +mag +commission_ffc +ffc +fillip +nirmala_sitharaman +august_shravana +consumer_affairs +awareness_programmes +orderly_functioning +jaitley +workforce_low +six_metro +jan_dhan +ship_building +swachh_bharat +online_platform +merchant_exporter +standard_crs +inbuilt_accident +pmjdy +simple_declaration +ssas +yojana_pmjdy +attendant_facilitatory +hill_areas +rectitude +forensic_audit +mantri +odisha_west +lpg_subsidy +flexible_structuring +kyc_compliant +counterfeit_notes +https +actual_realization +create_unified +one_cluster +santosh_kumar +niti_aayog +disadvantaged_groups +credible_deterrence +official_estimation +pin_point +fpi_regime +constant_basic +facilitatory +gva +soil_health +bengaluru_chandigarh +ernakulam_hyderabad +irdai +green_energy +dehradun_ernakulam +enable_confiscation +accidental_insurance +teller +information_aeoi +sukanya_samriddhi +mudra +mudra_yojana +pmmy +sidbi_mudra +digital_payment +north_coastal +gold_monetisation +gstn +network_gstn +lanka_nepal +bharat_abhiyan +samriddhi_yojana +obtain_aadhaar +commodity_exchanges +swachh +nclt +vide_rbis +ltd_nlc +jeevan_jyoti +pmjjby +pmsby +suraksha_bima +yojana_pmsby +advance_authorization +rupay_card +filer_monitoring +atal_pension +mantri_suraksha +pmjjby_pradhan +yojana_apy +duty_bed +niti +nadu_telangana +ots_compromise +index_combined +eventual_confiscation +krishi_vikas +fund_niif +improve_clarity +key_raw +include_fillip +countrys_reliance +apy +exclusive_website +join_apy +pmjjby_pmsby +atal +honble_president +project_inr +act_fatca +forge_multi +prohibition_amendment +tieas_multilateral +vice_chairmanship +cbs_platform +neft_rtgs +authentication_validation +compliance_window +property_equivalent +robust_macroeconomic +telangana_tripura +trade_receivables +gross_enrolment +vidya_lakshmi +wherein_collateral +niif +unlicensed_dccbs +eastern_states +provisional_attachment +alternate_governor +indradhanush +indradhanush_plan +chandigarh_dehradun +gold_bond +master_direction +sovereign_gold +discom_assurance +yojana_uday +away_abroad +multilateral_competent +seventh_cpc +answ +serious_injury +digital_transaction +website_https +accumulation_phase +rate_mclr +fipb_gov +day_nrlm +samriddhi_account +lwe_affected +bharat_electronics +highway_construction +nagpur_metro +organic_chemicals +rashtriya_chemicals +rddbfi +intervention_thus +medical_negligence +chhattisgarh_dadra +maharashtra_bhartiya +rules_gfr +domestic_scheduled +schemes_css +aqr +review_aqr +offshore_jurisdiction +panama_paper +people_declare +mobile_app +mclr +jlf +payment_interface +upi +steel_flat +project_insight +bsbd_account +deposit_bsbd +tenor_premium +woman_borrower +interface_upi +arjun_ram +dipp_gov +sustainable_structuring +aqr_exercise +krishi_kalyan +maharashtra_bharatiya +group_mag +sustainable_municipal +digital_transactions +promote_digital +age_spouse +cyber_crisis +sick_industries +major_push +tamilnadu_telangana +specialised_msme +hardware_technology +turbine +affairs_dea +may_hamper +aeoi +fatca +timely_initiation +data_breach +unusual_cyber +weekly_video +ibc +compensation_cess +gst_compensation +digital_mode +interim_arrangement +fraud_registry +retirement_adviser +psl_targets +centralised_data +puducherry_andaman +multilateral_agencies +post_demonetisation +cashless_transactions +demonetisation +mdr +universal_basic +zero_rating +dredging +sbns +demonetisation_drive +provider_network +forex_reserves +incremental_crr +label_atm +island_bihar +protect_elderly +coastal_connectivity +billion_volume +dhan_yojana +upi_ussd +compute_actual +puducherry_tamilnadu +expeditious_verification +operation_clean +additional_grace +nadu_puducherry +climate_friendly +forest_ecosystem +transport_kochi +taxpayer_whose +upfront_deposit +integrated_textile +textile_park +cyber_resilience +railway_zone +cpse_etf +provisional_data +premium_quota +limited_spmcil +shell_companies +strategic_buyer +capture_financing +corporate_debtor +insolvency_resolution +hinterland +liberal_transparent +circular_dbr +fin_tech +network_platform +initiate_insolvency +nagpur_city +cgst_igst +commensurate_reduction +scrap_nigam +mantri_gram +sadak_yojana +slbc_convener +cgst_act +khadi_yarn +product_gsdp +shiv_pratap +pon_radhakrishnan +pratap_shukla +crime_record +composition_taxpayer +issuing_pns +team_sit +bharatmala +achieve_insolvency +bharatmala_pariyojana +manufacturing_concrete +bharat_etf +unauthorised_electronic +delayed_filing +elderly_persons +way_bill +frequency_trading +intelligence_dri +bsbd +political_funding +language_hindi +subsequent_transparent +pon +classify_potential +observation_evaluation +stringent_recovery +full_provisioning +civil_claim +fugitive_economic +offender_confiscation +debar_wilful +lakh_ssas +mantri_jan +east_region +unforeseen_seasonal +animal_welfare +specialised_monitoring +transparent_recognition +swift_operating +atms_timeline +initiate_immediate +confiscation_fugitive +ring_fencing +enjoy_ownership +mantri_mudra +yojana_pmmy +book_facility +creditor_debtor +get_possession +interim_resolution +petrochemical_complex +digital_payments +data_ussd +jyoti_bima +yojana_pmjjby +small_enterprises +sgst_integrated +simplified_computation +tribal_affairs +user_interface +agency_mudra +kalyan_cess +quality_review +payments_interface +specified_undertaking +fugitive +bharat_mission +bangladesh_assam +bangladesh_meghalaya +bangladesh_tripura +customs_station diff --git a/data/UN_Debates/CFDTM/beta.npy b/data/UN_Debates/CFDTM/beta.npy new file mode 100644 index 0000000000000000000000000000000000000000..2d60ce5a0e83d18c165eb8174d99afb7b836c297 --- /dev/null +++ b/data/UN_Debates/CFDTM/beta.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44b84c43bab8de2639eadb63a52a036453a3909ee0a0fa0018bf1fced118eaee +size 135175728 diff --git a/data/UN_Debates/CFDTM/topic_label_cache.json b/data/UN_Debates/CFDTM/topic_label_cache.json new file mode 100644 index 0000000000000000000000000000000000000000..752108a5b11b1125ae6ebb09819af65c343d3ca1 --- /dev/null +++ b/data/UN_Debates/CFDTM/topic_label_cache.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b048bc9bc590cc5afe32ed076135a2307a78a0f01332dea707369baecd1abb16 +size 4866 diff --git a/data/UN_Debates/DETM/beta.npy b/data/UN_Debates/DETM/beta.npy new file mode 100644 index 0000000000000000000000000000000000000000..e1c15b3e6b7ed8fb92e454e459f1680321502c08 --- /dev/null +++ b/data/UN_Debates/DETM/beta.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45564ead9dce5bc7dec5f9996f81ed5af3bcd5df599cc66ad09d976f81eb244b +size 54070368 diff --git a/data/UN_Debates/DETM/topic_label_cache.json b/data/UN_Debates/DETM/topic_label_cache.json new file mode 100644 index 0000000000000000000000000000000000000000..b36c32c7e170fd3515d12cc788cb71c6e22c63c3 --- /dev/null +++ b/data/UN_Debates/DETM/topic_label_cache.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bf645ed55fd88e93a4abf8fd229ddae8c85fbd214389f3f40d134c44461d49c +size 1967 diff --git a/data/UN_Debates/DTM/beta.npy b/data/UN_Debates/DTM/beta.npy new file mode 100644 index 0000000000000000000000000000000000000000..01ce3758120880a09d20171debdd9cbec5624542 --- /dev/null +++ b/data/UN_Debates/DTM/beta.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13cc1c149317ef4dd0ad18a86d91f42bfb7a596af56eca6233481e62bd9304ee +size 108140608 diff --git a/data/UN_Debates/DTM/topic_label_cache.json b/data/UN_Debates/DTM/topic_label_cache.json new file mode 100644 index 0000000000000000000000000000000000000000..e6f77080d543cfa58d0f78c9e0366ce59792d44d --- /dev/null +++ b/data/UN_Debates/DTM/topic_label_cache.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84e13bbb9c6a423865e4736eb8151ef0437737f61dfd5c0e6d03cbce6c3210d7 +size 1971 diff --git a/data/UN_Debates/docs.jsonl b/data/UN_Debates/docs.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..7b251ccd433117bea22f44d4947f91a6c1768153 --- /dev/null +++ b/data/UN_Debates/docs.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b92e82df0bf7ce89afbe99e771fea46c37a3104682252f53c681d90dab2c2bf +size 136970430 diff --git a/data/UN_Debates/inverted_index.json b/data/UN_Debates/inverted_index.json new file mode 100644 index 0000000000000000000000000000000000000000..d43e1e19f17b1292b6aae63c8cc70a9d78776fc2 --- /dev/null +++ b/data/UN_Debates/inverted_index.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72c831126811cf5a7bc368c42e60aa1cdc9e9558c8e57d19c271ff4a90d7da23 +size 37964990 diff --git a/data/UN_Debates/processed/lemma_to_forms.json b/data/UN_Debates/processed/lemma_to_forms.json new file mode 100644 index 0000000000000000000000000000000000000000..d10dbdcf14a12e86f22fb7e89d53eb8955a81f2d --- /dev/null +++ b/data/UN_Debates/processed/lemma_to_forms.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19930676fae4db0330a2f29d59430d815d810402e02d5df60c82a6c3e607a523 +size 1814672 diff --git a/data/UN_Debates/processed/length_stats.json b/data/UN_Debates/processed/length_stats.json new file mode 100644 index 0000000000000000000000000000000000000000..10658193955bcda01426f70c554f7b74abf58580 --- /dev/null +++ b/data/UN_Debates/processed/length_stats.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2f7acc7695122253bc306ca0c9260d7980b4348f993a4be2e1f40d126341c41 +size 135 diff --git a/data/UN_Debates/processed/time2id.txt b/data/UN_Debates/processed/time2id.txt new file mode 100644 index 0000000000000000000000000000000000000000..690cf5d60ca4722e9f4ebe93afcc147fdcf7536a --- /dev/null +++ b/data/UN_Debates/processed/time2id.txt @@ -0,0 +1,48 @@ +{ + "1970": 0, + "1971": 1, + "1972": 2, + "1973": 3, + "1974": 4, + "1975": 5, + "1976": 6, + "1977": 7, + "1978": 8, + "1979": 9, + "1980": 10, + "1981": 11, + "1982": 12, + "1983": 13, + "1984": 14, + "1985": 15, + "1986": 16, + "1987": 17, + "1988": 18, + "1989": 19, + "1990": 20, + "1991": 21, + "1992": 22, + "1993": 23, + "1994": 24, + "1995": 25, + "1996": 26, + "1997": 27, + "1998": 28, + "1999": 29, + "2000": 30, + "2001": 31, + "2002": 32, + "2003": 33, + "2004": 34, + "2005": 35, + "2006": 36, + "2007": 37, + "2008": 38, + "2009": 39, + "2010": 40, + "2011": 41, + "2012": 42, + "2013": 43, + "2014": 44, + "2015": 45 +} \ No newline at end of file diff --git a/data/UN_Debates/processed/vocab.txt b/data/UN_Debates/processed/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..1595100ff5b398bcaad59b1bb7f1f60f84bd9694 --- /dev/null +++ b/data/UN_Debates/processed/vocab.txt @@ -0,0 +1,14693 @@ +ababa +abandon +abandonment +abate +abatement +abbas +abdallah +abdel +abdelaziz +abderemane +abdicate +abdication +abdikassim +abdou +abduct +abduction +abdul +abdulaziz +abdulla +abdullah +abdussalam +aberrant +aberration +abet +abeyance +abhor +abhorrence +abhorrent +abide +abidine +abidjan +ability +abject +abjure +abkhaz +abkhazia +abkhazian +ablaze +able +ably +abm +abnegation +abnormal +abnormality +abnormally +aboard +abode +abolish +abolishment +abolition +abominable +abomination +aboriginal +abort +aborted +abortive +abound +abraham +abreast +abroad +abrogate +abrogation +abrupt +abruptly +absence +absent +absolute +absolutely +absolve +absorb +absorption +absorptive +abstain +abstention +abstract +abstraction +absurd +absurdity +absurdly +abu +abuja +abundance +abundant +abundantly +abuse +abusive +abusively +abyei +abysmal +abyss +academia +academic +academician +academy +acapulco +accede +accelerate +accelerated +acceleration +accent +accentuate +accentuation +accept +acceptability +acceptable +acceptance +accepted +access +accessibility +accessible +accession +accident +accidental +acclaim +acclamation +accolade +accommodate +accommodation +accompaniment +accompany +accompanying +accompli +accomplice +accomplis +accomplish +accomplished +accomplishment +accord +accordance +accordingly +account +accountability +accountable +accounting +accra +accredit +accredited +accretion +accrue +accumulate +accumulation +accuracy +accurate +accurately +accusation +accuse +accuser +accustom +accustomed +ace +aceh +achievable +achieve +achievement +achieving +achille +acid +acidification +acknowledge +acknowledgement +acknowledgment +acp +acquaint +acquaintance +acquainted +acquiesce +acquiescence +acquire +acquis +acquisition +acquit +acre +acrimonious +acrimony +across +acs +act +acting +action +activate +activation +active +actively +activism +activist +activity +actor +actual +actuality +actualize +actually +acuman +acute +acutely +acuteness +adage +adam +adamant +adamantly +adapt +adaptability +adaptable +adaptation +adaptive +add +addendum +addict +addiction +addis +addition +additional +additionally +address +adduce +aden +adequacy +adequate +adequately +adhere +adherence +adherent +adhesion +adjacent +adjective +adjoining +adjourn +adjudicate +adjudication +adjunct +adjust +adjustment +administer +administering +administration +administrative +administratively +administrator +admirable +admirably +admiral +admiration +admire +admissibility +admissible +admission +admit +admittance +admittedly +admonish +admonition +ado +adolescence +adolescent +adopt +adoption +adorn +adriatic +adrift +adroit +adult +adulthood +adumbrate +advance +advanced +advancement +advantage +advantaged +advantageous +advent +adventure +adventurer +adventurism +adventurist +adventurous +adversarial +adversary +adverse +adversely +adversity +advert +advertise +advice +advisability +advisable +advise +advisedly +adviser +advisory +advocacy +advocate +aegean +aegis +aerial +aeroplane +aesthetic +afar +affair +affairs +affect +affected +affection +affectionate +affective +affiliate +affiliated +affiliation +affinity +affirm +affirmation +affirmative +affix +afflict +afflicted +affliction +affluence +affluent +afford +affordable +afforestation +affront +afghan +afghani +afghanistan +afghans +afield +afloat +afoot +aforementione +aforementioned +aforesaid +afraid +afresh +africa +african +africanist +africano +africans +africaís +afrikaner +afro +aftermath +afternoon +aftershock +afterthought +afterwards +aga +age +aged +ageing +agency +agenda +agent +aggrandizement +aggravate +aggravated +aggravating +aggravation +aggregate +aggression +aggressive +aggressively +aggressiveness +aggressor +aggrieved +agile +aging +agitate +agitation +ago +agonize +agonizing +agonizingly +agony +agostinho +agrarian +agree +agreeable +agreed +agreement +agribusiness +agricultural +agriculture +agro +aguirre +ahead +ahg +ahidjo +ahmad +ahmadinejad +ahmadou +ahmed +ahtisaari +aic +aid +aide +aids +ail +ailment +aim +aimless +air +airborne +aircraft +aire +airing +airlift +airline +airliner +airlines +airplane +airport +airspace +akashi +akin +alacrity +alan +alarm +alarmed +alarming +alarmingly +alarmist +alas +alaska +alba +albania +albanian +albanians +albeit +albert +alberto +albright +alcohol +alcoholism +alec +alert +alexander +alexandra +alfonsin +alfonso +alfredo +algeria +algerian +algier +alhaji +ali +alia +alibi +alien +alienate +alienated +alienation +align +aligned +alignment +alike +alioune +alive +aliyev +allah +allay +allegation +allege +alleged +allegedly +allegiance +allen +allende +alleviate +alleviation +alley +alliance +allied +allocate +allocation +allot +allow +allowance +allude +ally +alm +alma +almaty +almighty +almost +aloft +alone +along +alongside +aloof +aloud +alpha +alphabet +already +alt +altar +alter +alteration +altered +alternate +alternately +alternation +alternative +alternatively +although +altitude +alto +altogether +altruism +altruistic +aluminium +alvarado +always +amadou +amalgam +amara +amaral +amass +amazed +amazement +amazing +amazingly +amazon +amazonia +amazonian +ambassador +ambassadorial +ambiguity +ambiguous +ambit +ambition +ambitious +ambitiously +ambivalence +ambivalent +ambush +ameliorate +amelioration +amenable +amend +amendment +amenity +ament +amerasinghe +america +american +americanism +americans +americas +amia +amicable +amicably +amid +amidst +amilcar +amin +amir +amis +amisom +amity +amman +ammunition +amnesty +among +amongst +amortization +amos +amount +amounting +amphictyonic +ample +amplify +amply +amputate +amputation +amu +ana +anachronism +anachronistic +anaemic +analogous +analogy +analyse +analysis +analyst +analytical +analyze +anarchic +anarchist +anarchy +anastasio +anathema +anc +ancestor +ancestral +ancestry +anchor +ancient +ancillary +ande +andean +andorra +andorran +andre +andrei +andropov +andrés +anew +ange +angel +anger +angie +angkor +angle +anglo +angola +angolan +angolans +angry +anguish +anguished +animal +animate +animosity +anjouan +ankara +anna +annal +annan +annapolis +annex +annexation +annexationist +annexe +annexed +annihilate +annihilation +anniversary +announce +announcement +annual +annually +annul +annum +anomalous +anomaly +anonymity +anonymous +another +ansar +answer +answerable +ant +antagonism +antagonist +antagonistic +antagonize +antananarivo +antarctic +antarctica +ante +antecedent +anthem +anthrax +anti +anticipate +anticipated +anticipation +anticipatory +antidemocratic +antidote +antigua +antille +antipathy +antipersonnel +antiquated +antiquity +antiretroviral +antithesis +antithetical +antonio +anwar +anxiety +anxious +anxiously +anybody +anymore +anyone +anything +anytime +anyway +anywhere +aosis +aouzou +apace +apart +apartheid +apartment +apathetic +apathy +apec +apex +aphorism +apia +aplomb +apocalypse +apocalyptic +apolitical +apologist +apologize +apology +apostle +appal +appalling +appallingly +apparatus +apparent +apparently +appeal +appealing +appear +appearance +appease +appeasement +appellate +append +appendage +appetite +applaud +applause +applicability +applicable +application +applied +apply +appoint +appointment +apportion +apportionment +apposite +appraisal +appraise +appreciable +appreciably +appreciate +appreciated +appreciation +appreciative +apprehend +apprehension +apprehensive +apprenticeship +apprise +approach +approbation +appropriate +appropriately +appropriateness +appropriation +approval +approve +approximate +approximately +april +apt +aptidon +aptitude +aptly +aqaba +aqsa +aquaculture +aquila +aquino +arab +arabia +arabian +arabic +arabisraeli +arable +arabs +arafat +aral +arap +arbiter +arbitral +arbitrarily +arbitrariness +arbitrary +arbitrate +arbitration +arbitrator +arc +arch +archaeological +archaic +archbishop +archipelagic +archipelago +architect +architectural +architecture +archive +arctic +ardent +ardently +ardour +arduous +arduously +area +arena +argentina +argentine +argentinian +arguably +argue +argument +arias +arid +ariel +arise +aristide +aristides +aristotle +arithmetic +arm +armada +armadas +armageddon +armament +armed +armenia +armenian +armenians +arming +armistice +armored +armoured +armoury +arms +army +around +arouse +arra +arrange +arrangement +array +arrear +arrest +arrival +arrive +arrogance +arrogant +arrogantly +arrogate +arsenal +arson +art +arta +artefact +artery +arthur +article +articulate +articulation +artifice +artificial +artificially +artillery +artisan +artist +artistic +arusha +ascend +ascendancy +ascension +ascent +ascertain +ascribe +asean +ashamed +ashe +ashgabat +ashraf +ashton +asia +asian +asians +aside +ask +asleep +asmara +aspect +asphyxiate +aspirant +aspiration +aspirational +aspire +assad +assail +assassin +assassinate +assassination +assault +assemblage +assemble +assembly +assent +assert +assertion +assertive +assess +assessed +assessment +asset +assiduous +assiduously +assign +assignment +assimilate +assimilation +assist +assistance +assistant +associate +associated +association +assuage +assume +assumption +assurance +assure +assured +assuredly +astana +astonish +astonishing +astonishment +astounding +astray +astronaut +astronomical +astute +asuncion +asunder +asylum +asymmetric +asymmetrical +asymmetry +atavistic +ate +athens +athlete +ation +atlanta +atlantic +atmosphere +atmospheric +atoll +atom +atomic +atone +atop +atrocious +atrocity +att +attach +attache +attached +attaches +attachment +attack +attacker +attain +attainable +attainment +attempt +attend +attendance +attendant +attention +attentive +attentively +attenuate +attest +attitude +attorney +attract +attraction +attractive +attributable +attribute +attrition +attune +attuned +auckland +audacious +audacity +audience +audit +auditor +augment +augmentation +augur +augury +august +augusto +aung +aura +auschwitz +auspex +auspicious +austere +austerity +australia +australian +austria +austrian +austro +autarchy +autarky +authentic +authentically +authenticity +author +authorise +authoritarian +authoritarianism +authoritative +authoritatively +authority +authorization +authorize +authorized +autocracy +autocratic +automatic +automatically +automobile +autonomous +autonomy +autumn +auxiliary +avail +availability +available +avalanche +avarice +avenge +avenue +aver +average +averse +aversion +avert +avian +aviation +avid +aviv +avoid +avoidable +avoidance +avow +await +awake +awaken +awakened +awakening +award +awarding +aware +awareness +awash +away +awe +awesome +awful +awkward +axe +axiom +axiomatic +axis +ayacucho +ayala +ayatollah +azania +azanian +azerbaijan +azerbaijani +aziz +bab +babel +babrak +baby +bachelet +back +backbone +backdrop +backer +backfire +background +backing +backlash +backlog +backslide +backsliding +backward +backwardness +backwards +backyard +bacteriological +bad +bade +badly +baffle +bag +baggage +baghdad +bahama +bahamas +bahamian +bahrain +bahraini +bail +bakassi +baker +bakili +baku +balaguer +balance +balanced +balancing +balfour +bali +balk +balkan +balkanization +balkans +ball +ballistic +ballot +balloting +baltic +bamako +ban +banana +band +banda +bandaranaike +bandit +banditry +bandung +bane +bangabandhu +bangkok +bangladesh +bangui +banish +banishment +banjul +bank +banker +banking +bankrupt +bankruptcy +banner +banning +bantu +bantustan +bantustanization +bantustans +banzer +baptize +bar +barack +barak +baranja +barbadian +barbado +barbados +barbarian +barbaric +barbarism +barbarity +barbarous +barbed +barbuda +barcelona +barco +bare +barely +bargain +bargaining +barometer +baron +barrack +barrage +barre +barrel +barren +barricade +barrier +barrow +barter +bas +base +basel +baseless +baseline +bashar +bashir +basic +basically +basin +basis +bask +basket +basotho +bassa +bast +bastion +bath +bathe +bather +baton +battalion +batter +battered +battery +battle +battlefield +battlefront +battleground +bauxite +bay +bayonet +bazaar +beach +beacon +beam +bean +bear +bearable +bearer +bearing +beast +beat +beating +beautiful +beauty +became +beckon +becloud +become +becomes +becoming +bed +bedevil +bedrock +bee +beef +beer +befall +befell +befit +beforehand +beg +beget +beggar +begin +beginning +begrudge +behalf +behave +behavior +behaviour +behavioural +behead +beheading +behest +behind +beholden +behoove +behove +beijing +beirut +bekaa +belarus +belarusian +belate +belatedly +beleaguer +beleaguered +belfast +belgian +belgium +belgrade +belie +belief +believe +believer +belittle +belize +belizean +bell +bellicose +belligerence +belligerency +belligerent +bellum +belong +belonging +beloved +belt +bemoan +ben +benazir +bench +benchmark +bend +beneath +benedict +benefactor +beneficent +beneficial +beneficially +beneficiary +benefit +benevolence +benevolent +bengal +bengali +benghazi +benign +benin +benite +benites +benjamin +bent +bequeath +bereaved +bereavement +bereft +beret +berlin +bernadotte +bernard +bernardo +bertrand +beseech +beset +beside +besides +besiege +beslan +besmirch +bespeak +bestial +bestiality +bestow +bet +betancur +bethlehem +betray +betrayal +better +betterment +bettino +beware +bewilder +bewildering +bewilderment +beye +beyond +bhutan +bhutanese +bhutto +bias +biased +bible +biblical +bicentenary +bicentennial +bicesse +bickering +bicommunal +bid +bidder +bidding +biennial +biennium +big +bight +bigot +bigotry +biketawa +biko +bikram +bilateral +bilateralism +bilaterally +bility +bill +billion +bin +binary +bind +binding +bio +biodiversity +bioenergy +biofuel +biological +biology +biomass +biometric +biosphere +biotechnology +bipartisan +bipolar +bipolarism +bipolarity +bipolarization +bir +bird +birendra +birth +birthday +birthplace +birthright +bisexual +bishkek +bishop +bissau +bit +bite +bitter +bitterly +bitterness +biya +bizarre +bizonal +black +blacklist +blackmail +blade +blair +blaise +blame +blameless +blanche +blanco +blandishment +blank +blanket +blasphemy +blast +blatant +blatantly +blaze +bleaching +bleak +bleaker +bleed +bleeding +blemish +blend +blending +bless +blessed +blessing +blight +blind +blindly +blindness +blinker +blitzkrieg +bloated +bloc +block +blockade +blockage +blocking +blocs +blondin +blood +bloodbath +blooded +bloodiest +bloodless +bloodletting +bloodshe +bloodshed +bloodstaine +bloodthirsty +bloody +bloom +blossom +blossoming +blot +blow +bludgeon +blue +blueprint +bluff +blunder +blunt +bluntly +blur +board +boast +boat +bob +bode +body +boer +bogge +boggle +bogota +bogus +boigny +boil +boipatong +bokassa +boko +bold +boldly +boldness +bolivar +bolivarian +bolivia +bolivian +bolivians +bolster +bolívar +bomb +bombard +bombardment +bomber +bombing +bona +bond +bondage +bone +bongo +boni +bonn +bonus +book +boom +boomerang +boon +boost +boot +booth +bootstrap +bophuthatswana +border +borderless +borderline +bore +boris +borrow +borrower +borrowing +bosnia +bosnian +boss +botha +bother +botswana +bottle +bottleneck +bottom +bottomless +boubacar +bougainville +boumediene +bounce +bound +boundary +bounden +boundless +bountiful +bounty +bourgeois +bourgeoisie +bourguiba +bout +bouteflika +boutros +bow +bowl +box +boy +boycott +bozizé +brace +bradford +brady +brahimi +brain +brake +branch +brand +brandish +brandt +brasilia +bratislava +brave +bravely +bravery +bravo +brazen +brazenly +brazil +brazilian +brazzaville +breach +bread +breadth +break +breakdown +breaking +breakthrough +breakup +breast +breath +breathe +breathing +breathtake +breathtaking +breed +breeding +breeze +bretton +brevity +brew +brezhnev +bribe +bribery +bric +brick +bridge +bridgehead +bridgetown +bridging +brief +briefing +briefly +brigade +brigadier +bright +brighten +brighter +brilliance +brilliant +brilliantly +brim +bring +bringing +brink +brinkmanship +bristle +britain +british +broach +broad +broadband +broadcast +broadcasting +broaden +broadening +broader +broadly +brockmann +broken +broker +brook +brooks +brooksrandolph +brother +brotherhood +brotherly +brow +brown +broz +brundtland +brunei +brunt +brush +brussels +brutal +brutality +brutalize +brutally +brute +brutish +bubble +buchar +bucharest +bud +budap +budapest +buddha +buddhism +buddhist +budget +budgetary +budgeting +buenos +buffer +buffet +build +builder +building +buildup +bujumbura +bulgaria +bulgarian +bulk +bull +bullet +bully +bullying +bulwark +bunker +buoyant +bur +burden +burdensome +bureau +bureaucracy +bureaucrat +bureaucratic +burgeon +burial +burkina +burma +burmese +burn +burner +burning +burst +burton +burundi +burundian +bury +bus +busan +bush +busily +business +businesslike +businessman +busy +butcher +butchery +butler +butter +button +buttress +buy +buyer +buying +buyoya +byelorussia +byelorussian +bygone +bypass +byproduct +bystander +bédié +cabinda +cabinet +cable +cabo +cabora +cabral +cadre +caesar +caetano +cairns +cairo +cake +calamitie +calamitous +calamity +calculate +calculated +calculation +caldera +calderón +caledonia +caledonian +calendar +caliber +calibrate +calibre +california +caliphate +call +callaghan +calling +callous +callously +callousness +calm +calmly +calmness +calumny +calvary +cam +cambodia +cambodian +cambodians +came +camera +cameroon +cameroonian +camouflage +camp +campaign +campin +campus +canada +canadian +canadians +canal +canberra +cancel +cancellation +cancer +cancerous +cancun +cancún +candid +candidacy +candidate +candidature +candidly +candle +candor +candour +cane +cannon +cannot +canoe +canon +cantonment +canvas +canvass +cap +capability +capable +capably +capacity +cape +capita +capital +capitalism +capitalist +capitalistic +capitalization +capitalize +capitulate +capitulation +caprice +capricious +captain +captive +captivity +capture +caputo +car +caraca +caracas +carbon +card +cardinal +cardiovascular +cardoso +care +career +careful +carefully +careless +carelessness +caretaker +cargo +caribbean +caricature +caricom +caring +carletonville +carlos +carlsson +carnage +carpet +carrier +carrington +carrot +carry +carrying +cart +carta +cartagena +carte +cartel +carter +cartoon +carve +carving +casablanca +case +cash +casino +caspian +cast +caste +castle +castro +casual +casually +casualty +cat +cataclysm +cataclysmic +catalog +catalogue +catalyse +catalyst +catalytic +catalyze +catapult +catastrophe +catastrophic +catch +catchword +categorical +categorically +categorization +categorize +category +cater +cathedral +catherine +catholic +cattle +caucasian +caucasus +cauldron +causal +cause +caution +cautious +cautiously +cave +cayman +cease +ceasefire +ceaseless +ceaselessly +ceausescu +cede +ceiling +celac +celebrate +celebration +celestial +cell +celsius +cement +cemetery +censor +censorship +censure +census +cent +centenary +centennial +center +central +centrality +centralization +centralize +centralized +centrally +centre +centrepiece +centrifugal +century +cereal +ceremonial +ceremony +certain +certainly +certainty +certificate +certification +certify +cesar +cessation +cession +cetera +ceuta +ceyhan +ceylon +cfa +cfe +chad +chadian +chago +chagos +chagrin +chain +chair +chairing +chairman +chairmanship +chairperson +chalk +challenge +challenging +chamber +chamorro +champion +championing +chance +chancellor +change +changed +changing +channel +channeling +channelling +chant +chaos +chaotic +chap +chapter +chapultepec +character +characterise +characteristic +characteristically +characterization +characterize +charade +charge +charitable +charity +charles +charm +chart +charter +chase +chasm +chat +chattel +chatter +chauvinism +chauvinist +chauvinistic +che +cheap +cheaply +cheat +chechnya +check +checkpoint +cheddi +cheer +cheerful +chemical +chemistry +cherish +cherished +chernobyl +chess +chest +chi +chiang +chicago +chicken +chief +chiefly +child +childbirth +childhood +childish +children +chile +chilean +chill +chilling +chiluba +china +chinese +chip +chirac +chissano +chittagong +chlorofluorocarbon +choice +choke +cholera +choose +choosing +chorus +chou +choudhury +chris +christ +christian +christianity +christians +christmas +christopher +chrome +chronic +chronically +chronicle +chronological +chung +church +churchill +churn +chávez +cia +cica +cilss +circle +circuit +circular +circulate +circulation +circumscribe +circumspection +circumstance +circumstantial +circumvent +cis +ciskei +citadel +cite +citizen +citizenry +citizenship +city +civic +civil +civilian +civilisation +civilised +civility +civilization +civilizational +civilize +civilized +civilizing +claim +claimant +clamor +clamour +clan +clandestine +clandestinely +clarification +clarify +clarion +clarity +clark +clash +class +classic +classical +classification +classify +classroom +claude +clause +claw +clay +cle +clean +cleanse +cleansing +clear +clearance +clearing +clearly +cleavage +clemency +clergy +clever +cleverly +cliche +cliché +client +climate +climatic +climax +climb +cling +clinic +clinton +clique +cloak +clock +cloning +close +closed +closely +closeness +closing +closure +clothe +clothing +cloud +clouded +clout +club +clue +clumsy +clung +cluster +clutch +cmea +cnn +coal +coalesce +coalition +coast +coastal +coastline +coat +cobalt +coca +cocaine +cochabamba +cockpit +cocktail +cocoa +code +codesa +codification +codify +coerce +coercion +coercive +coexist +coexistence +coffee +coffer +coffin +cogent +cogently +cognisance +cognizance +cognizant +cohabitation +coherence +coherent +coherently +cohesion +cohesive +cohesiveness +cohort +coin +coincide +coincidence +coincidental +coincidentally +cold +coldly +collaborate +collaboration +collaborative +collaboratively +collaborator +collapse +collate +collateral +colleague +collect +collection +collective +collectively +collectivism +collectivity +college +collide +collision +colloquium +collude +collusion +cologne +colombia +colombian +colombo +colon +colonel +colonial +colonialism +colonialist +colonist +colonization +colonize +colonized +colonizer +colonizing +colony +color +colored +colossal +colour +coloured +colourful +columbia +columbus +column +com +combat +combatant +combating +combination +combine +combined +combustible +come +comer +comesa +comfort +comfortable +comfortably +comforting +coming +comity +command +commander +commandment +commando +commemorate +commemoration +commemorative +commence +commencement +commend +commendable +commendation +commensurate +comment +commentary +commentator +commerce +commercial +commercialization +commercially +commiserate +commission +commissioner +commit +commitment +committed +committee +commodity +common +commonality +commonly +commonplace +commonwealth +communal +commune +communicable +communicate +communication +communion +communique +communiqué +communism +communist +community +communityí +comore +comorian +comoro +comoros +compact +companion +company +compaore +compaoré +comparable +comparative +comparatively +compare +comparison +compartment +compartmentalization +compass +compassion +compassionate +compatibility +compatible +compatriot +compel +compelling +compendium +compensate +compensation +compensatory +compete +competence +competency +competent +competently +competition +competitive +competitively +competitiveness +competitor +compilation +compile +complacency +complacent +complacently +complain +complaint +complement +complementarily +complementarity +complementary +complete +completely +completion +complex +complexion +complexity +compliance +compliant +complicate +complicated +complication +complicit +complicity +complie +compliment +comply +component +compose +composite +composition +composure +compound +comprehend +comprehension +comprehensive +comprehensively +comprehensiveness +compress +comprise +compromise +compulsion +compulsory +compunction +computer +computerized +comrade +con +conakry +conceal +concealment +concede +conceit +conceivable +conceivably +conceive +concentrate +concentrated +concentration +concept +conception +conceptual +conceptualize +conceptually +concern +concerned +concert +concertation +concerted +concession +concessional +concessionary +conciliate +conciliation +conciliatory +concise +conclave +conclude +concluding +conclusion +conclusive +conclusively +concoct +concomitant +concomitantly +concord +concordance +concrete +concretely +concretization +concretize +concur +concurrence +concurrent +concurrently +condemn +condemnable +condemnation +condemnatory +condescend +condescension +condition +conditional +conditionality +conditionally +conditioning +condolence +condominium +condone +conducive +conduct +conducting +conduit +cone +conf +confederal +confederate +confederation +confer +conference +confess +confession +confessional +confidence +confident +confidential +confidently +configuration +confine +confinement +confirm +confirmation +confiscate +confiscation +conflagration +conflict +conflicting +confluence +conform +conformity +confound +confront +confrontation +confrontational +confuse +confused +confusing +confusion +cong +congenial +conglomerate +congo +congolese +congratulate +congratulation +congratulatory +congress +congressional +congressman +conjecture +conjunction +conjuncture +conjure +connect +connected +connection +connectivity +connexion +connivance +connive +connotation +conquer +conquered +conqueror +conquest +conscience +conscientious +conscientiously +conscious +consciously +consciousness +conscript +conscription +consecrate +consecration +consecutive +consensual +consensus +consent +consequence +consequent +consequential +consequently +conservation +conservatism +conservative +conserve +consider +considerable +considerably +consideration +considered +consign +consist +consistency +consistent +consistently +consolation +console +consolidate +consolidated +consolidation +consonance +consonant +consortium +conspicuous +conspicuously +conspiracy +conspirator +conspire +constancy +constant +constantly +constellation +consternation +constituency +constituent +constitute +constituted +constitution +constitutional +constitutionality +constitutionally +constitutive +constrain +constraint +construct +construction +constructive +constructively +construe +consul +consular +consulate +consult +consultant +consultation +consultative +consume +consumer +consumerism +consuming +consummate +consummation +consumption +contact +contadora +contagion +contagious +contain +container +containment +contaminate +contamination +conte +contemplate +contemplation +contemporary +contempt +contemptible +contemptuous +contemptuously +contend +content +contention +contentious +contentment +contest +contestant +contested +context +contexts +contiguous +continent +continental +contingency +contingent +continual +continually +continuance +continuation +continue +continued +continuity +continuous +continuously +continuum +contour +contours +contra +contract +contracting +contraction +contractual +contradict +contradiction +contradictory +contrary +contras +contrast +contravene +contravention +contribute +contributing +contribution +contributor +contributory +contrive +contrived +control +controllable +controversial +controversy +conté +convene +convenience +convenient +conveniently +convening +convention +conventional +converge +convergence +convergent +converging +conversation +conversely +conversion +convert +convertibility +convey +conveying +convict +conviction +convince +convinced +convincing +convincingly +convocation +convoy +convulse +convulsion +convulsive +cook +cool +cooperate +cooperation +cooperative +cooperatively +coordinate +coordinated +coordinating +coordination +coordinator +cop +cope +copenhagen +copper +copy +coral +cord +cordial +cordiality +cordially +cordon +cordovez +core +corn +corner +cornerstone +corollary +corp +corporate +corporation +corps +corpse +corpus +corr +correa +correct +correction +corrective +correctly +correctness +correlate +correlation +correlative +correspond +correspondence +correspondent +corresponding +correspondingly +corridor +corroborate +corrode +corrosive +corrupt +corruption +corvalan +cosmetic +cosmic +cosmo +cosmopolitan +cosponsor +cost +costa +costly +cote +cotonou +cotton +couch +could +coun +council +councilís +counsel +counselling +count +countdown +countenance +counter +counteract +counteraction +counterbalance +counterfeit +countermeasure +counterpart +counterproductive +counterrevolutionary +counterterrorism +countervail +counterweight +counting +countless +countrie +countries +countryman +countryside +countrywide +countryâs +countryí +county +coup +couple +courage +courageous +courageously +course +court +courteous +courtesy +cousin +covenant +cover +coverage +covert +covertly +covet +coveted +covetousness +cow +coward +cowardice +cowardly +cpa +cplp +crack +crackdown +cradle +craft +crafting +craftsmanship +crash +crass +crave +craven +craving +craxi +craze +crazy +crease +create +creation +creative +creatively +creativity +creator +creature +credence +credential +credibility +credible +credibly +credit +creditable +creditor +creditworthiness +credo +creed +creep +crescendo +crescent +crete +crew +crime +crimea +crimean +criminal +criminality +criminalize +criminally +cripple +crippled +crippling +crisis +criss +criterion +critic +critical +critically +criticism +criticize +croat +croatia +croatian +crocodile +crony +crop +cros +cross +crossfire +crossing +crossroad +crowd +crowded +crown +crowning +crucial +crucially +crucible +crude +crudest +cruel +cruell +cruelly +cruelty +cruise +crumb +crumble +crumbling +crusade +crusader +crush +crushing +crux +cruz +cry +crystal +crystallization +crystallize +csce +ctbt +ctc +cuba +cuban +cubans +cubic +cue +cuellar +cull +culminate +culmination +culpability +culpable +culprit +cult +cultivate +cultivation +cultural +culturally +culture +cultured +cumbersome +cumulative +cumulatively +cunene +cunning +cup +cur +curable +curative +curb +curbing +cure +curfew +curiosity +curious +curiously +currency +current +currently +curriculum +curse +cursory +curtail +curtailment +curtain +curve +cushion +custodial +custodian +custody +custom +customary +customer +customs +cut +cutback +cutting +cyber +cybercrime +cybersecurity +cyberspace +cycle +cyclic +cyclical +cyclone +cynic +cynical +cynically +cynicism +cypriot +cyprus +cyrus +czech +czechoslovak +czechoslovakia +côte +cùte +dada +daddah +dae +daesh +dag +daily +dakar +dam +damage +damaging +damascus +damocle +dampen +dams +dance +danger +dangerous +dangerously +dangle +daniel +danish +dante +danube +dar +dare +daresay +darfur +daring +dark +darken +darkest +darkness +darussalam +dash +dastardly +data +database +datum +daughter +daunt +daunting +dauntless +david +davos +daw +dawda +dawn +dawning +day +dayan +daylight +dayton +dazzle +dead +deadline +deadlock +deadlocke +deadlocked +deadly +deaf +deafen +deal +dealer +dealing +dean +dear +dearly +dearth +death +debacle +debar +debase +debasement +debatable +debate +debating +debilitate +debit +debris +debt +debtor +deby +decade +decadence +decadent +decades +decapitate +decay +deceased +deceit +deceitful +deceive +decelerate +deceleration +december +decency +decent +decently +decentralization +decentralize +decentralized +deception +deceptive +decide +decidedly +decimate +decimation +decision +decisive +decisively +decisiveness +declaration +declaratory +declare +decline +decolonisation +decolonization +decolonize +decommission +decommissioning +decrease +decree +decreed +decry +dedicate +dedicated +dedication +deduce +deduct +deduction +deed +deem +deep +deepen +deepening +deepest +deeply +defamation +defame +default +defeat +defeatism +defect +defective +defence +defenceless +defend +defendant +defender +defense +defenseless +defensible +defensive +defer +deference +defiance +defiant +defiantly +deficiency +deficient +deficit +defile +define +defining +definite +definitely +definition +definitive +definitively +deflationary +deflect +defoliant +deforestation +deform +defray +defunct +defuse +defusing +defy +degenerate +degeneration +degradation +degrade +degraded +degrading +degree +dehumanization +dehumanize +dehumanizing +deir +deiss +del +delano +delay +delayed +delaying +delegate +delegation +delegitimize +delete +deleterious +delhi +deliberate +deliberately +deliberation +deliberative +delicacy +delicate +delicately +delight +delighted +delimit +delimitation +delineate +delineation +delinquency +delinquent +deliver +deliverable +deliverance +delivering +delivery +delta +delude +deluge +delusion +delve +demagogic +demagogue +demagoguery +demagogy +demand +demanding +demarcate +demarcation +demarche +demean +demeaning +demilitarisation +demilitarise +demilitarised +demilitarization +demilitarize +demilitarized +demine +demining +demise +demit +demo +demobilization +demobilize +democracy +democrat +democratic +democratically +democratisation +democratise +democratization +democratize +democratized +democrats +demographic +demographically +demography +demolish +demolition +demon +demonize +demonstrable +demonstrably +demonstrate +demonstrated +demonstration +demonstrator +demoralize +den +dence +denial +denigrate +denigration +denis +denktas +denktash +denmark +denomination +denominator +denote +denounce +dense +densely +density +dent +denuclearization +denuclearize +denuclearized +denude +denunciation +denver +deny +depart +department +departure +depend +dependable +dependant +dependence +dependency +dependent +depict +deplete +depletion +deplorable +deplore +deploy +deployable +deployment +depoliticize +depopulation +deport +deportation +depose +deposit +depositary +deposition +depository +depot +deprecate +depreciate +depreciation +depredation +depress +depressed +depressing +depression +deprivation +deprive +deprived +depth +deputy +der +derail +derailment +deregulation +derek +dereliction +deride +derision +derivative +derive +deriving +derogate +derogation +des +desalination +descend +descendant +descent +describe +description +desecrate +desecration +desert +desertification +deserve +deserved +deservedly +deserving +design +designate +designation +desirability +desirable +desire +desirous +desist +desk +desmond +desolate +desolation +despair +despairing +desperate +desperately +desperation +despicable +despise +despite +despoil +despoiled +despoliation +despondency +despondent +despot +despotic +despotism +destabilisation +destabilising +destabilization +destabilize +destabilizing +destination +destine +destiny +destitute +destitution +destroy +destroyer +destruction +destructive +destructiveness +detach +detachment +detail +detailed +detain +detainee +detect +detection +detente +detention +deter +deteriorate +deterioration +determinant +determination +determine +determined +determinedly +determinism +deterrence +deterrent +detest +detestable +detonate +detonation +detour +detract +detracting +detractor +detriment +detrimental +dev +devaluation +devalue +devastate +devastated +devastating +devastatingly +devastation +devel +develop +developed +developing +development +developmental +deviant +deviate +deviation +device +devil +devilish +devious +devise +devising +devoid +devolution +devolve +devote +devoted +devotedly +devotion +devour +devoutly +dexterity +dhabi +dhaka +dhanapala +dhlakama +diabete +diabetes +diabolic +diabolical +diagnose +diagnosis +dialect +dialectic +dialectical +dialectics +dialog +dialogue +diametrically +diamond +diarrhoea +diaspora +diatribe +dichotomy +dick +dictate +dictator +dictatorial +dictatorship +dictionary +dictum +didier +die +diego +diet +dietrich +differ +difference +different +differential +differentiate +differentiated +differentiation +differently +difficult +difficulty +diffuse +diffusion +dig +digest +digit +digital +digitize +dignified +dignify +dignitary +dignity +digress +dike +diktat +dilatory +dilemma +dili +diligence +diligent +diligently +dilma +dilute +dilution +dim +dimension +dimensional +diminish +diminished +diminishing +diminution +din +dinner +dint +diogo +diouf +dioxide +dip +diplomacy +diplomat +diplomatic +diplomatically +dire +direct +direction +directive +directly +director +directorate +dirty +dis +disability +disable +disabled +disadvantage +disadvantaged +disadvantageous +disaffect +disaffection +disagree +disagreement +disappear +disappearance +disappoint +disappointed +disappointing +disappointingly +disappointment +disapproval +disapprove +disarm +disarma +disarmament +disarray +disassociate +disaster +disastrous +disastrously +disavow +disband +disbanding +disbelief +disburse +disbursement +discard +discern +discernible +discernment +discharge +disciple +disciplinary +discipline +disciplined +disclose +disclosure +discomfort +disconcert +disconcerting +disconnect +discontent +discontinuance +discontinue +discord +discordant +discount +discourage +discouraged +discouragement +discouraging +discourse +discover +discovery +discredit +discreet +discreetly +discrepancy +discretion +discretionary +discriminate +discrimination +discriminatory +discuss +discussion +disdain +disdainful +disease +disenchanted +disenchantment +disenfranchise +disenfranchised +disenfranchisement +disengage +disengagement +disentangle +disequilibrium +disfigure +disgrace +disgraceful +disguise +disguised +disgust +disgusting +dish +disharmony +dishearten +disheartening +dishonest +dishonour +disillusion +disillusioned +disillusionment +disincentive +disinformation +disingenuous +disinherit +disintegrate +disintegration +disintereste +disinterested +disinvestment +dislike +dislocate +dislocation +dislodge +dismal +dismally +dismantle +dismantlement +dismantling +dismay +dismayed +dismember +dismemberment +dismiss +dismissal +disobedience +disorder +disorderly +disorganization +disorganize +disorient +disown +disparage +disparate +disparity +dispassionate +dispassionately +dispatch +dispatching +dispel +dispensary +dispensation +dispense +dispersal +disperse +dispersed +dispersion +displace +displaced +displacement +display +displeasure +disposable +disposal +dispose +disposed +disposition +dispossess +dispossession +disproportion +disproportionate +disproportionately +disprove +dispute +disqualify +disquiet +disquieting +disregard +disregarded +disrespect +disrespectful +disrupt +disruption +disruptive +dissatisfaction +dissatisfied +disseminate +dissemination +dissension +dissent +disservice +dissident +dissimilar +dissipate +dissipation +dissociate +dissolution +dissolve +dissuade +dissuasion +distance +distant +distil +distinct +distinction +distinctive +distinctly +distinguish +distinguished +distort +distorted +distortion +distract +distraction +distress +distressed +distressing +distressingly +distribute +distribution +distributive +district +distrust +disturb +disturbance +disturbed +disturbing +disturbingly +disunite +disunity +ditch +diverge +divergence +divergency +divergent +diverging +diverse +diversification +diversified +diversify +diversion +diversionary +diversity +divert +divest +divide +divided +dividend +dividing +divine +divisible +division +divisive +divisiveness +divorce +dizzy +dizzying +djakarta +djamena +djibouti +dniester +docile +dock +doctor +doctrinaire +doctrinal +doctrine +document +documentary +documentation +dodge +doe +doer +dog +dogma +dogmatic +dogmatism +doha +dole +dollar +dom +domain +dome +domestic +domestically +domicile +dominance +dominant +dominate +dominating +domination +domineering +domingo +dominica +dominican +dominion +domino +donate +donation +donor +doom +doomsday +door +doorstep +dormant +dos +dose +dossier +dot +double +doubling +doubly +doubt +doubtful +doubtless +douglas +dovetail +downfall +downgrade +downgrading +downing +downplay +downright +downsize +downsizing +downstream +downtrodden +downturn +downward +dozen +draconian +draft +drafter +drafting +drag +drain +drama +dramatic +dramatically +dramatize +drastic +drastically +draw +drawback +drawing +dread +dreadful +dream +dreamer +drench +dress +drift +driftnet +drill +drilling +drink +drinking +drive +driver +driving +drone +drop +drought +drown +drug +drum +dry +drying +dual +duality +duarte +dub +dubious +dublin +duchy +due +dull +duly +duma +dumas +dumbarton +dump +dumping +dupe +duplicate +duplication +duplicity +durability +durable +duration +durban +duress +dushanbe +dust +dustbin +dutch +dutifully +duty +duvalier +dwarf +dwell +dweller +dwelling +dwelt +dwindle +dynamic +dynamically +dynamism +dynamite +dynasty +dysfunction +dysfunctional +détente +eager +eagerly +eagerness +ear +early +earmark +earn +earner +earnest +earnestly +earnestness +earning +earth +earthly +earthquake +ease +easily +easing +east +eastern +eastw +eastward +easy +eat +eban +ebb +ebola +eca +eccas +ece +echelon +echeverria +echo +ecla +eclipse +eco +ecological +ecologically +ecology +ecomog +economic +economical +economically +economist +economize +economy +ecosoc +ecosystem +ecowa +ecowas +ecuador +ecuadorian +ecumenical +edge +edict +edifice +edify +edition +editorial +eduard +eduardo +educate +educated +education +educational +educator +edvard +edward +eec +eelam +effect +effecting +effective +effectively +effectiveness +efficacious +efficacy +efficiency +efficient +efficiently +effort +effrontery +egalitarian +egg +ego +egocentric +egoism +egoistic +egotism +egotistical +egregious +egypt +egyptian +egyptians +ehud +eight +eighteen +eighteenth +eighth +eighthly +eighty +einstein +eisenhower +either +eject +eke +elaborate +elaboration +elapse +elated +elation +eld +elder +elderly +elect +election +elective +elector +electoral +electorate +electric +electrical +electricity +electrification +electronic +elegant +element +elemental +elementary +elephant +elevate +elevated +elevation +eleven +eleventh +elias +eliasson +elicit +eligibility +eligible +eliminate +elimination +elite +elitist +ell +ellen +eloquence +eloquent +eloquently +else +elsewhere +elucidate +elude +elusive +emaciated +emanate +emanating +emancipate +emancipation +emasculate +embargo +embark +embarrass +embarrassed +embarrassing +embarrassment +embassy +embattle +embed +ember +embitter +emblem +emblematic +embodiment +embody +embolden +embrace +embroil +embryo +embryonic +emerge +emerged +emergence +emergency +emergent +emigrant +emigrate +emigration +emilio +eminence +eminent +eminently +emir +emirate +emirates +emissary +emission +emit +emitter +emomali +emotion +emotional +emotionally +empathize +empathy +emperor +emphasis +emphasise +emphasize +emphatic +emphatically +empire +empirical +emplacement +employ +employee +employer +employment +empower +empowerment +empt +empte +emptiness +emptive +empty +emulate +emulation +enable +enabler +enact +enactment +encamp +encampment +encapsulate +encircle +encirclement +enclave +enclose +encompass +encounter +encourage +encouraged +encouragement +encouraging +encroach +encroachment +encumber +encumbrance +encyclical +end +endanger +endangered +endeavor +endeavour +endeavours +ended +endemic +ending +endless +endlessly +endogenous +endorse +endorsement +endow +endowed +endowment +endurance +endure +enduring +enemy +energetic +energetically +energize +energized +energy +enfeeble +enforce +enforceable +enforcement +enforcer +engage +engaged +engagement +engender +engendering +engine +engineer +engineering +england +english +engrave +engulf +engulfs +enhance +enhanced +enhancement +enhancing +enjoin +enjoy +enjoyment +enlarge +enlarged +enlargement +enlighten +enlightened +enlightening +enlightenment +enlist +enmesh +enmity +ennoble +enormity +enormous +enormously +enough +enquiry +enrich +enriched +enriching +enrichment +enrique +enrol +enrolment +enshrine +enshrined +enslave +enslavement +ensue +ensure +ensuring +entail +entangle +entanglement +entente +enter +enterprise +entertain +enthusiasm +enthusiastic +enthusiastically +entice +entire +entirely +entirety +entitle +entitled +entitlement +entity +entrance +entrap +entreat +entreaty +entrench +entrenched +entrenchment +entrepreneur +entrepreneurial +entrepreneurship +entrust +entry +enumerate +enumeration +enunciate +enunciation +envelop +enver +enviable +environ +environment +environmental +environmentalist +environmentally +envisage +envision +envoy +envy +ephemeral +epic +epicentre +epidemic +episode +epitome +epitomize +epoch +epochal +equal +equality +equalize +equally +equanimity +equate +equation +equator +equatorial +equilibrium +equip +equipment +equipped +equipping +equitable +equitably +equity +equivalent +equivocal +equivocate +equivocation +era +eradicate +eradication +erase +ere +erect +erection +eric +erich +erika +eritrea +eritrean +ernest +ernesto +erode +erosion +err +erratic +errol +erroneous +erroneously +error +erstwhile +erudition +erupt +eruption +escalate +escalating +escalation +escap +escape +eschew +escort +escoto +esoteric +especial +especially +espionage +espousal +espouse +esquipula +esquipulas +essay +essence +essential +essentially +essy +estab +establish +establishe +established +establishment +estaing +estate +este +esteem +esteemed +estimate +estimation +estonia +estonian +estrangement +etat +etc +etch +eternal +eternally +eternity +ethanol +ether +ethic +ethical +ethically +ethiopia +ethiopian +ethnic +ethnically +ethnicity +ethno +ethos +etre +euphemism +euphemistically +euphoria +euphoric +euphrate +eurasia +eurasian +euro +europa +europe +european +europeans +eurozone +evacuate +evacuation +evade +evaluate +evaluation +evans +evaporate +evasion +evasive +evatt +eve +even +evening +evenly +event +eventful +eventual +eventuality +eventually +ever +everlaste +everlasting +evermore +every +everybody +everyday +everyone +everything +everywhere +evict +eviction +evidence +evident +evidently +evil +evince +evo +evoke +evolution +evolutionary +evolve +evolving +exacerbate +exacerbated +exacerbation +exact +exaction +exactly +exaggerate +exaggerated +exaggeration +exalt +exaltation +exalted +examination +examine +example +exasperate +exasperation +excavation +exceed +exceedingly +excel +excellence +excellency +excellent +except +exception +exceptional +exceptionally +excerpt +excess +excessive +excessively +exchange +excise +excited +excitement +exciting +exclaim +exclude +exclusion +exclusionary +exclusive +exclusively +exclusiveness +exclusivist +exclusivity +excruciating +excuse +execute +execution +executioner +executive +exemplary +exemplified +exemplify +exempt +exemption +exercise +exert +exertion +exhaust +exhausted +exhausting +exhaustion +exhaustive +exhaustively +exhibit +exhibition +exhort +exhortation +exigency +exile +exiled +exist +existence +existent +existential +existing +exit +exodus +exogenous +exonerate +exorbitant +exorcise +exotic +expand +expanse +expansion +expansionary +expansionism +expansionist +expansive +expatriate +expect +expectancy +expectation +expected +expedience +expediency +expedient +expedite +expedition +expeditious +expeditiously +expel +expend +expenditure +expense +expensive +experience +experienced +experiment +experimental +experimentation +expert +expertise +expiration +expire +expiry +explain +explanation +explicit +explicitly +explode +exploit +exploitable +exploitation +exploitative +exploiter +exploiting +exploration +exploratory +explore +explorer +explosion +explosive +expo +exponent +exponential +exponentially +export +exportable +exportation +exporter +exporting +expose +exposed +exposition +exposure +expound +express +expression +expressive +expressly +expropriate +expropriation +expulsion +extant +extend +extended +extension +extensive +extensively +extent +exterminate +extermination +external +externally +extinct +extinction +extinguish +extirpate +extol +extoll +extort +extortion +extra +extract +extraction +extractive +extradite +extradition +extrajudicial +extraneous +extraordinarily +extraordinary +extraterritorial +extravagance +extravagant +extreme +extremely +extremism +extremist +extricate +eyadema +eye +ezulwini +fabric +fabricate +fabrication +fabulous +facade +face +faceless +facet +faceted +facile +facilitate +facilitation +facilitator +facility +fact +faction +factional +factionalism +facto +factor +factory +factual +faculty +fade +fading +fahd +fail +failed +failing +failure +faint +fair +faire +fairer +fairly +fairness +faisal +fait +faith +faithful +faithfully +faithfulness +fake +falkland +fall +fallacious +fallacy +fallen +fallout +false +falsehood +falsely +falsification +falsify +falter +famagusta +fame +familiar +familiarity +familiarize +family +famine +famous +fan +fanatic +fanatical +fanaticism +fancy +fanfare +fantastic +fantasy +fao +far +farabundo +farah +faraway +farc +farce +fare +farewell +farm +farmer +farming +farmland +farsighte +farsightedness +farth +fascinating +fascism +fascist +fashion +fashionable +fashioning +faso +fast +fat +fatal +fatalism +fatalistic +fatality +fatally +fate +fateful +fatf +father +fatherland +fathom +fatigue +fault +faulty +fauna +favor +favorable +favorably +favorite +favour +favourable +favourably +favoured +favourite +favouritism +fbr +fear +fearful +fearfully +fearless +fearlessly +fearsome +feasibility +feasible +feast +feat +feather +feature +february +federal +federalism +federate +federated +federation +federative +fee +feeble +feed +feeding +feel +feeling +feels +feign +felicitation +felicitous +felipe +felix +fell +fellow +fellowship +felt +female +fence +fend +ference +ferment +fernando +fernández +ferocious +ferociously +ferocity +fertile +fertility +fertilizer +fervent +fervently +fervor +fervour +fester +festival +festivity +fetch +fetched +fetter +feud +feudal +feudalism +fever +feverish +feverishly +fez +fiasco +fiat +fibre +fiction +fictional +fictitious +fide +fidel +fidelity +field +fierce +fiercely +fiercer +fifteen +fifteenth +fifth +fifthly +fiftieth +fifty +fight +fighter +fighting +figueiredo +figuere +figuratively +figure +fiji +fijian +file +filipino +filipinos +fill +film +filter +filth +fin +final +finality +finalization +finalize +finally +finance +financial +financially +financier +financing +find +finding +fine +finesse +finger +finish +finished +finite +finland +finnish +fir +fire +firearm +firewood +firing +firm +firmly +firmness +first +firsthand +firstly +fiscal +fish +fisherman +fishery +fishing +fissile +fissionable +fist +fit +fitting +fittingly +fitzgerald +five +fivefold +fix +fixed +fixing +fixture +flag +flagging +flagrant +flagrantly +flagship +flame +flank +flare +flash +flashpoint +flat +flatly +flatter +flaunt +flaw +flawed +fledge +fledged +fledgling +flee +fleet +fleeting +flesh +flew +flex +flexibility +flexible +flexibly +flicker +flight +flimsy +flinch +fling +flip +flo +float +flock +flood +flooding +floor +flora +florida +florin +flotilla +flounder +flour +flourish +flourishing +flout +flouting +flow +flower +flowering +floyd +flu +fluctuate +fluctuation +fluid +fluidity +flurry +flush +flux +fly +fmln +fnl +focal +focus +focused +fodder +foe +fog +foil +foist +fold +folk +follow +follower +following +folly +foment +fomenting +fond +food +foodstuff +fool +foolhardy +foolish +foot +football +foothold +footing +footprint +footstep +forbearance +forbid +force +forced +forceful +forcefully +forcefulness +forcible +forcibly +ford +fore +forebear +foreboding +forecast +forecasting +foreclose +forefather +forefront +forego +foregoing +foregone +foreground +foreign +foreigner +foremost +forerunner +foresaw +foresee +foreseeable +foreshadow +foresight +forest +forestall +forestation +forestry +foretell +forever +foreword +forfeit +forfeiture +forge +forget +forging +forgive +forgiveness +forgo +fork +forlorn +form +formal +formalism +formality +formalization +formalize +formally +format +formation +formative +former +formerly +formidable +forming +formosa +formula +formulae +formulate +formulation +forsake +forswear +fort +forth +forthcoming +forthright +forthrightly +forthwith +fortieth +fortification +fortify +fortiori +fortitude +fortnight +fortress +fortuitous +fortunate +fortunately +fortune +forty +forum +forward +forwards +fossil +foster +fostering +fought +foul +found +foundation +foundational +founder +founding +fountain +four +fourfold +fourteen +fourteenth +fourth +fourthly +fox +fraction +fracture +fractured +fragile +fragility +fragment +fragmentary +fragmentation +fragmented +frail +frailty +frame +framer +framework +framing +franc +france +franceville +franchise +francis +francisco +franco +francois +francophone +francophonie +frank +franklin +frankly +frankness +frantic +frantically +françois +fraternal +fraternally +fraternity +fratricidal +fratricide +fraud +fraudulent +fraught +fray +frederick +frederik +free +freedom +freeing +freely +freetown +freeze +freezing +freight +freita +freitas +frelimo +french +frenetic +frente +frenzied +frenzy +frequency +frequent +frequently +fresh +freshly +freshwater +fretilin +friction +friday +friend +friendliness +friendly +friendship +frighten +frightened +frightening +frighteningly +frightful +fringe +fritter +frivolous +fro +front +frontal +frontally +frontier +frontline +frost +frown +frozen +fruit +fruitful +fruitfully +fruition +fruitless +frustrate +frustrated +frustrating +frustratingly +frustration +frías +fuel +fuelled +fuerzas +fugitive +fujimori +fukushima +ful +fulcrum +fulfil +fulfill +fulfilling +fulfillment +fulfilment +full +fullness +fully +function +functional +functionality +functioning +fund +fundamental +fundamentalism +fundamentalist +fundamentally +funding +funeral +furnish +furth +furtherance +furthering +furthermore +fury +fuse +fusion +fuss +futile +futility +future +félix +gabon +gabonese +gaborone +gabriel +gage +gain +gainful +gainsay +galaxy +galilee +gallant +gallop +galvanize +gamal +gambari +gambia +gambian +gamble +game +gamut +gandhi +ganev +gang +gange +gangster +gaol +gap +gape +garang +garb +garba +garbage +garcia +garcía +garden +garment +garner +garrison +gas +gaston +gate +gateway +gather +gathering +gatt +gatumba +gauge +gaulle +gay +gayoom +gaza +gaze +gbadolite +gcc +gdp +gear +gef +gem +gemayel +gendarme +gender +gene +genera +generality +generalization +generalize +generalized +generally +generalís +generate +generating +generation +generator +generic +generis +generosity +generous +generously +genesis +genetic +geneva +genie +genital +genius +genoa +genocidal +genocide +genome +genscher +gentium +gentle +gentleman +genuine +genuinely +geo +geographic +geographical +geographically +geography +geological +geometric +geopolitic +geopolitical +geopolitically +george +georgetown +georgia +georgian +geoscience +geostationary +geostrategic +geothermal +germ +german +germane +germans +germany +germinate +gestation +gesture +get +getting +ghajar +ghali +ghana +ghanaian +ghastly +ghetto +ghost +giant +gibraltar +gierek +gift +gifted +gigantic +gilbert +girl +giscard +gist +give +giver +giving +glacial +glacier +glad +gladly +glance +glare +glaring +glaringly +glasnost +glass +gleam +gleneagle +glimmer +glimpse +global +globalisation +globalism +globality +globalization +globalize +globalized +globalizing +globally +globe +gloom +gloomy +glorieuse +glorification +glorify +glorious +glory +gloss +glossed +glove +glow +glowing +gnassingbe +gnassingbé +gnaw +gnp +goal +god +going +golan +gold +golden +goma +good +goodness +goodwill +gorbachev +gordian +gordon +gospel +govern +governability +governance +governing +government +governmental +governmentís +governor +governorate +gowon +grab +grace +gracious +graciously +grade +gradual +gradualism +gradually +graduate +graduation +graft +grain +gramme +granary +grand +grandchild +grande +grandeur +grandfather +grandiose +grandmother +grandparent +grant +granting +graphic +graphically +grapple +grasp +grass +grassroots +grateful +gratefully +gratification +gratified +gratify +gratifying +gratitude +gratuitous +grave +gravely +graver +gravest +graveyard +gravity +gray +graze +great +greatly +greatness +greatpower +greece +greed +greedy +greek +green +greenhouse +greenland +greet +greeting +grenada +grenade +grenadian +grenadine +grey +grid +grief +grievance +grieve +grievous +grievously +grim +grind +grip +gro +groan +gromyko +grope +gross +grossly +grotesque +grotius +ground +groundbreake +groundbreaking +groundless +groundwater +groundwork +group +grouping +grow +grower +growth +grudge +grudging +gruesome +guadalajara +guadeloupe +guam +guantanamo +guantánamo +guarantee +guaranteeing +guarantor +guard +guardian +guardianship +guatemala +guatemalan +guatemalteca +guelleh +guerrero +guerrilla +guess +guest +guevara +guidance +guide +guideline +guiding +guido +guilt +guilty +guine +guinea +guineabissau +guinean +guise +gulf +gulfs +gun +gunboat +gunfire +gunnar +gunpoint +gurirab +gusmão +gustav +guyana +habib +habit +habitable +habitat +habitation +habitual +habitually +habre +habyarimana +hackneyed +hadi +hadj +hadji +haemorrhage +haemorrhagic +hafez +hague +hail +haile +hair +haiti +haitian +half +halfway +halifax +hall +hallmark +hallowed +halonen +halt +halting +halve +hamad +hamas +hambro +hamid +hamilton +hamlet +hammarskjold +hammarskjöld +hammer +hamper +hamra +hamstring +hamstrung +han +hand +handed +handedly +handedness +handful +handicap +handicapped +handing +handle +handling +handout +handover +handshake +hang +hanish +hanoi +hans +haphazard +hapless +happen +happening +happily +happiness +happy +haq +haram +harare +harass +harassment +harbinger +harbor +harbour +harbouring +hard +harden +hardening +hardly +hardship +hardware +hardworke +hare +hark +harlem +harm +harmful +harmless +harmonious +harmoniously +harmonise +harmonization +harmonize +harmonized +harmony +harness +harnessing +harp +harri +harrowing +harry +harsh +harshly +harshness +harvard +harvest +harvesting +hashemite +hassan +hast +haste +hasten +hastily +hasty +hat +hatch +hatchet +hate +hateful +hatre +hatred +haunt +havana +havel +havoc +hawk +hay +haya +hazard +hazardous +head +headed +heading +headline +headlong +headquarter +headquarters +headway +heal +healing +health +healthy +heap +hear +hearing +heart +heartbreaking +hearte +hearted +heartedly +hearten +heartening +heartfelt +hearth +heartiest +heartily +heartland +heartless +heartrende +hearty +heat +heated +heating +heave +heaven +heavenly +heavily +heavy +hebrew +hebride +hebron +hectare +hectic +hedge +hee +heed +heedless +heel +hegel +hegemonic +hegemonism +hegemonist +hegemonistic +hegemony +height +heighten +heightened +heightening +heinous +heir +held +helicopter +hell +hellenic +hellish +helm +helmet +helms +helmsman +helmut +help +helped +helpful +helping +helpless +helplessly +helplessness +helsinki +hem +hemisphere +hemispheric +hence +henceforth +henceforward +henchman +heng +hennadiy +henri +henrique +henry +herald +herbert +herbicide +herculean +herd +hereafter +hereby +herein +heresy +heretofore +heritage +hero +heroic +heroically +heroin +heroism +herrera +herzegovina +hesitancy +hesitant +hesitate +hesitation +hest +heterogeneous +hew +hezbollah +hiatus +hidden +hide +hideous +hiding +hie +hierarchical +hierarchy +high +higher +highest +highland +highlight +highly +highness +highway +hijack +hijacked +hijacker +hijacking +hike +hill +hilt +himalayan +himalayas +hinder +hindrance +hindsight +hindu +hindus +hinge +hint +hinterland +hipc +hire +hiroshima +hispanic +hispaniola +hissein +historian +historic +historical +historically +history +hit +hitch +hitherto +hitler +hitlerite +hiv +hizbollah +hizbullah +hoax +hobble +hoc +hoist +hold +holder +holding +hole +holiday +holiness +holistic +holistically +holkeri +hollai +hollande +hollow +holocaust +holy +homage +home +homegrown +homeland +homeless +homelessness +homicide +homo +homogeneity +homogeneous +homogenous +hon +hondura +honduran +honduras +hone +honecker +honest +honestly +honesty +honey +hong +honiara +honor +honorable +honorably +honorary +honored +honour +honourable +honourably +honoured +honouring +hood +hoodwink +hook +hop +hope +hopeful +hopefully +hopeless +hopelessly +hopelessness +horde +horizon +horizontal +horizontally +hormuz +horn +horrendous +horrible +horrific +horrify +horrifying +horror +horse +horseman +horta +hosni +hospitable +hospital +hospitality +host +hostage +hostile +hostility +hosting +hot +hotbe +hotbed +hotel +hotspot +houari +hound +houphouet +houphouët +hour +house +household +housing +houston +hover +however +hoxha +hrc +hub +hubert +huddle +hue +huge +hugely +hugo +human +humane +humanely +humanism +humanist +humanistic +humanitarian +humanitarianism +humanity +humanization +humanize +humankind +humanly +humayun +humble +humblest +humbly +humiliate +humiliating +humiliation +humility +humour +hun +hunan +hundred +hundredth +hungarian +hungary +hunger +hungry +hunt +hunte +hunter +hunting +hurdle +hurl +hurricane +hurry +hurt +hurtle +husak +husband +husbandry +hussain +hussein +hut +hutu +hutus +hybrid +hydra +hydro +hydrocarbon +hydroelectric +hydrogen +hydropow +hydropower +hygiene +hymn +hyper +hyperinflation +hypocrisy +hypocritical +hypocritically +hypothesis +hypothetical +hysteria +hysterical +iaea +ian +iberian +ibero +ibid +ibn +ibrahim +ibrahimi +ibrd +icao +icc +ice +iceland +icelander +icelandic +icj +icon +icrc +ict +icty +icy +ida +idea +ideal +idealism +idealist +idealistic +ideally +identical +identifiable +identification +identifie +identify +identity +ideological +ideologically +ideologist +ideology +idi +idiosyncrasy +idle +idly +idps +idriss +idyllic +ieng +ifad +ifni +ifor +igad +igadd +ignite +ignoble +ignominious +ignominiously +ignominy +ignorance +ignorant +ignore +ihe +ill +illegal +illegality +illegally +illegitimacy +illegitimate +illicit +illiteracy +illiterate +illness +illogical +illogically +illueca +illuminate +illusion +illusive +illusory +illustrate +illustration +illustrative +illustrious +ilo +ilyich +image +imaginable +imaginary +imagination +imaginative +imaginatively +imagine +imam +imbalance +imbroglio +imbue +imf +imitate +imitation +immanuel +immeasurable +immeasurably +immediacy +immediate +immediately +immemorial +immense +immensely +immensity +immerse +immigrant +immigration +imminence +imminent +immobility +immobilize +immoderate +immoral +immorality +immortal +immune +immunity +immunization +immunize +immunodeficiency +immutable +imo +imp +impact +impair +impaired +impairment +impairs +impart +impartial +impartiality +impartially +impasse +impassione +impassioned +impassive +impatience +impatient +impatiently +impeccable +impede +impediment +impel +impend +impending +impenetrable +imperative +imperatively +imperceptible +imperfect +imperfection +imperfectly +imperial +imperialism +imperialist +imperialistic +imperil +imperille +impermissible +impersonal +impervious +impetuous +impetus +impinge +implacable +implacably +implant +implantation +imple +implement +implementable +implementation +implicate +implication +implicit +implicitly +implode +implore +implosion +imply +import +importance +important +importantly +importation +importer +importing +impose +imposition +impossibility +impossible +impotence +impotent +impoverish +impoverished +impoverishment +impracticable +impractical +imprecise +impregnable +imprescriptible +impress +impressed +impression +impressive +impressively +imprint +imprison +imprisonment +improbable +improper +improperly +improve +improved +improvement +improvisation +improvise +imprudent +impudent +impugn +impulse +impunity +impute +imre +inability +inaccessible +inaccurate +inaction +inactive +inactivity +inadequacy +inadequate +inadequately +inadmissibility +inadmissible +inadvertently +inalienable +inapplicable +inappropriate +inasmuch +inaugural +inaugurate +inauguration +inauspicious +incalculable +incapable +incapacitate +incapacity +incarcerate +incarceration +incarnate +incarnation +incendiary +incentive +inception +incessant +incessantly +inch +incidence +incident +incidental +incidentally +incipient +incisive +incite +incitement +inclination +incline +inclined +include +inclusion +inclusive +inclusiveness +inclusivity +incoherent +income +incoming +incomparable +incomparably +incompatibility +incompatible +incompetence +incompetent +incomplete +incomprehensible +incomprehension +inconceivable +inconclusive +incongruity +incongruous +inconsequential +inconsiderable +inconsistency +inconsistent +incontestable +incontestably +incontrovertible +inconvenience +inconvenient +incorporate +incorporation +incorrect +incorrectly +increase +increased +increasingly +incredible +incredibly +incremental +inculcate +inculcation +incumbent +incur +incurable +incursion +ind +indalecio +inde +indebted +indebtedness +indecision +indecisiveness +indeed +indefatigable +indefatigably +indefensible +indefinite +indefinitely +indelible +indelibly +indemnity +indepen +independence +independencia +independent +independente +independently +independência +indescribable +indestructible +index +indexation +indexing +india +indian +indians +indias +indicate +indication +indicative +indicator +indict +indictment +indie +indifference +indifferent +indigence +indigenous +indigent +indignant +indignantly +indignation +indignity +indira +indirect +indirectly +indiscriminate +indiscriminately +indispensability +indispensable +indisputable +indisputably +indissoluble +indissolubly +individual +individualism +individualistic +individuality +individually +indivisibility +indivisible +indlovukazi +indo +indochina +indochinese +indoctrinate +indomitable +indonesia +indonesian +indubitably +induce +induced +inducement +induction +indulge +indulgence +industrial +industrialise +industrialised +industrialist +industrialization +industrialize +industrialized +industrially +industrious +industry +ineffective +ineffectively +ineffectiveness +ineffectual +inefficiency +inefficient +ineligible +ineluctable +inequality +inequitable +inequity +inertia +inescapable +inescapably +inestimable +inevitability +inevitable +inevitably +inexcusable +inexhaustible +inexorable +inexorably +inexpensive +inexplicable +inextricable +inextricably +inf +infallible +infamous +infamy +infancy +infant +infantry +infect +infected +infection +infectious +infer +inference +inferior +inferiority +infernal +inferno +infest +infestation +infighting +infiltrate +infiltration +infiltrator +infinite +infinitely +infinitesimal +infinitum +inflame +inflammatory +inflate +inflated +inflation +inflationary +inflexibility +inflexible +inflict +infliction +inflow +influence +influential +influenza +influx +inform +informal +informally +informatic +information +informational +informative +informed +infra +infraction +infrastructural +infrastructure +infrequently +infringe +infringement +infuse +infusion +ing +ingenious +ingenuity +ingrain +ingrained +ingredient +inhabit +inhabitant +inherent +inherently +inherit +inheritance +inheritor +inhibit +inhibition +inhospitable +inhuman +inhumane +inhumanity +inimical +iniquitous +iniquity +initial +initially +initiate +initiation +initiative +initiator +inject +injection +injunction +injure +injured +injurious +injury +injustice +ink +inkatha +inland +innate +inner +innermost +innocence +innocent +innocuous +innovate +innovation +innovative +innumerable +inoperative +inordinate +inordinately +input +inquire +inquiry +inquisition +inroad +insanally +insane +insanity +insatiable +inscribe +inscription +insect +insecticide +insecure +insecurity +insensitive +insensitivity +inseparable +inseparably +insert +insertion +inside +insidious +insidiously +insight +insightful +insignificance +insignificant +insincere +insincerity +insinuation +insist +insistence +insistent +insistently +insofar +insolence +insolent +insolently +insoluble +insolvency +insolvent +inspect +inspection +inspector +inspiration +inspirational +inspire +inspiring +instability +instal +install +installation +instalment +instance +instant +instantly +instead +instigate +instigation +instigator +instil +instill +instinct +instinctive +instinctively +institute +institution +institutional +institutionalisation +institutionalise +institutionalised +institutionalization +institutionalize +institutionalized +institutionally +instraw +instruct +instruction +instructive +instructor +instrument +instrumental +instrumentality +insufficiency +insufficient +insufficiently +insular +insularity +insulate +insulation +insult +insulting +insuperable +insupportable +insurance +insure +insurgency +insurgent +insurmountable +insurrection +intact +intake +intangible +integral +integrally +integrate +integrated +integration +integrationist +integrative +integrity +intellect +intellectual +intellectually +intelligence +intelligent +intelligently +intend +intended +intense +intensely +intensification +intensifie +intensified +intensify +intensity +intensive +intensively +intent +intention +intentional +intentionally +intentione +inter +interact +interaction +interactive +interahamwe +interamerican +intercept +interception +interchange +intercommunal +interconnect +interconnected +interconnectedness +interconnection +interconnectivity +intercontinental +intercourse +intercultural +interdependence +interdependency +interdependent +interdict +interdiction +interdisciplinary +interest +interested +interesting +interface +interfaith +interfere +interference +intergovernmental +interim +interior +interlink +interlinkage +interlinked +interlock +interlocutor +interlude +intermediary +intermediate +interminable +interminably +intermingle +intermittent +intermittently +intern +interna +internal +internalize +internally +internationalisation +internationalism +internationalist +internationalization +internationalize +internationally +internecine +internet +interoceanic +interpersonal +interplay +interpol +interpose +interposition +interpret +interpretation +interpreter +interracial +interregional +interrelate +interrelated +interrelatedness +interrelation +interrelationship +interreligious +interrogation +interrupt +interruption +intersect +intersection +interstate +intertwine +interval +intervene +intervention +interventionism +interventionist +interview +interweave +interwoven +intifada +intifadah +intimate +intimately +intimidate +intimidating +intimidation +intolerable +intolerably +intolerance +intolerant +intoxicate +intra +intractability +intractable +intransigence +intransigent +intraregional +intricacy +intricate +intricately +intrigue +intrinsic +intrinsically +introduce +introduction +introductory +introspection +introspective +intrude +intruder +intrusion +intrusive +intuition +inundate +inundation +invade +invader +invalid +invalidate +invaluable +invariable +invariably +invasion +invective +invent +invention +inventive +inventiveness +inventory +inverse +invert +invest +investigate +investigation +investigative +investigator +investing +investiture +investment +investor +invidious +invigorate +invincibility +invincible +inviolability +inviolable +inviolate +invisible +invitation +invite +invocation +invoke +involuntarily +involuntary +involve +involved +involvement +invulnerable +inward +inácio +ion +iota +ipcc +ipso +iran +iranian +iranians +iraq +iraqi +iraqis +ireland +irgun +irian +irish +irishman +iron +ironic +ironical +ironically +irony +irrational +irrationality +irreconcilable +irredentism +irredentist +irreducible +irrefutable +irrefutably +irregular +irregularity +irrelevance +irrelevant +irremediable +irremediably +irreparable +irreparably +irreplaceable +irrepressible +irresistible +irresistibly +irrespective +irresponsibility +irresponsible +irresponsibly +irretrievable +irretrievably +irreversibility +irreversible +irreversibly +irrevocable +irrevocably +irrigate +irrigation +irritate +irritation +isa +isaac +isaf +isaiah +isil +isis +islam +islamabad +islamic +islamist +islamophobia +island +islander +islands +isle +ism +ismail +ismat +iso +isolate +isolated +isolation +isolationism +isolationist +israel +israeli +israelis +issuance +issue +istanbul +isthmus +itaipu +italian +italy +item +ito +ituri +ivan +ivoire +ivorian +ivory +izetbegovic +jaber +jack +jacket +jacob +jacque +jacques +jagan +jail +jaime +jakarta +jam +jamahiriya +jamaica +jamaican +james +jammeh +jammu +jan +janeiro +january +japan +japanese +jar +jargon +jarring +jaruzelski +javier +jawaharlal +jawara +jayewardene +jealous +jealously +jealousy +jean +jeddah +jefferson +jeffrey +jensen +jeopardise +jeopardize +jeopardy +jeremi +jeremić +jericho +jerusalem +jesus +jet +jew +jewel +jewish +jewry +jews +jigme +jihad +jihadist +jimmy +jirga +jirgah +joao +joaquim +joaquin +job +jobless +joblessness +johannesburg +john +johnson +johnston +join +joining +joint +jointly +joke +jolt +jomo +jonas +jonathan +jong +jordan +jordanian +jordanians +jorge +jos +jose +joseph +joshua +josip +jostle +josé +journal +journalism +journalist +journey +joy +joyful +joyfully +joyous +juan +juarez +juba +jubilation +jubilee +judaism +judaization +judaize +judea +judge +judgement +judgment +judicial +judiciary +judicious +judiciously +juggle +julian +julio +julius +july +jump +junction +juncture +june +jung +jungle +junior +junta +jure +juridical +juridically +jurisdiction +jurisdictional +jurisprudence +jurist +jury +jus +justice +justifiable +justifiably +justification +justifie +justified +justify +justly +justness +juvenile +juxtaposition +kabbah +kabila +kabul +kahamba +kai +kairaba +kampala +kampuchea +kampuchean +kampucheans +kanak +kananaskis +kandahar +kant +karabakh +karachi +karadzic +karimov +karl +karzai +kashmir +kashmiri +kashmiris +kathmandu +katrina +kaunda +kavan +kazakh +kazakhstan +kazakstan +keel +keen +keener +keenly +keep +keeper +keeping +keg +keita +kemal +kennedy +kenneth +kenya +kenyan +kenyatta +kerim +kerry +ketumile +key +keynote +keystone +kfor +khalifa +khalil +khan +khartoum +khatami +khmer +khomeini +kick +kidnap +kidnapper +kidnapping +kiev +kigali +kill +killer +killing +kilogram +kilometer +kilometre +kilowatt +kim +kin +kind +kindle +kindly +kindness +kindre +king +kingdom +kingston +kinkel +kinshasa +kinship +kippur +kirchner +kiribati +kissinger +kith +kittani +kitts +kivu +klaus +klerk +knee +knell +knesset +knife +knit +knock +knot +know +knowingly +knowledge +knowledgeable +known +koevoet +kofi +koh +kohl +kolingba +konan +konaré +kong +koran +korea +korean +koreans +koryo +kosovar +kosovo +kountche +kreisky +kremlin +kuala +kuchma +kurd +kurdish +kurdistan +kurt +kutesa +kuwait +kuwaiti +kuwaitis +kwame +kyi +kyiv +kyoto +kyprianou +kyrgyz +kyrgyzstan +label +labor +laboratory +laborious +laboriously +labour +labourer +labyrinth +lack +lackey +lacking +lacunae +ladd +ladder +lade +laden +lady +lag +lago +lagos +lagrant +lahore +lai +laissez +lake +lakhdar +lamb +lament +lamentable +lamentably +lamentation +lamizana +lancaster +land +landfall +landing +landless +landlocke +landlocked +landmark +landmine +landowner +landscape +landslide +lane +language +languish +lanka +lankan +lankans +lansana +lao +laos +laotian +lap +lapse +large +largely +largesse +las +laser +lash +lasso +last +lasting +lastingly +lastly +late +lately +latent +later +lateral +latin +latitude +latter +latvia +latvian +laud +laudable +laughable +launch +launcher +launching +launder +launderer +laundering +laureate +laurel +laurent +lausanne +lavish +law +lawful +lawfully +lawless +lawlessness +lawyer +lax +laxity +lay +layer +laying +lazar +ldc +ldcs +lead +leader +leadership +leading +leaf +league +leak +lean +leaning +leap +learn +learning +lease +least +leave +lebanese +lebanon +lection +lecture +ledger +lee +leeway +left +leftist +leg +legacy +legal +legalistic +legality +legalization +legalize +legalized +legally +legend +legendary +legion +legislate +legislation +legislative +legislator +legislature +legitimacy +legitimate +legitimately +legitimise +legitimization +legitimize +lehman +leisure +leitmotif +lem +lend +lender +lending +length +lengthen +lengthy +leniency +lenient +lenin +leninist +lens +leon +leone +leonean +leoneans +leonid +leopold +leopoldo +lesbian +lesotho +less +lessen +lessening +lesson +lest +leste +lester +let +lethal +lethargy +letter +letting +levant +level +lever +leverage +levy +lewis +liability +liable +liaison +libel +liberal +liberalisation +liberalise +liberalism +liberalization +liberalize +liberalized +liberate +liberated +liberating +liberation +liberator +liberia +liberian +liberians +libertagao +liberty +library +libreville +libya +libyan +libyans +libération +licence +license +licensing +licte +lid +lie +liechtenstein +lieu +lieutenant +lievano +life +lifeblood +lifeless +lifeline +lifelong +lifestyle +lifetime +lift +lifting +light +lighten +lighting +lightly +lightning +like +likelihood +likely +liken +likewise +liking +lima +limb +limbo +limelight +limit +limitation +limited +limiting +limitless +limp +linchpin +lincoln +lindh +line +lineage +linear +liner +linger +linguistic +lining +link +linkage +linking +lion +lip +liquid +liquidate +liquidation +liquidity +lisbon +lishe +list +listen +listing +litany +literacy +literal +literally +literary +literate +literature +lithuania +lithuanian +litigation +litmus +litter +little +littoral +livable +live +liveable +lived +livelihood +lively +livestock +living +lldcs +load +loan +loath +loathsome +lobby +local +locality +localize +localized +locally +locate +location +lock +locked +lockerbie +locomotive +locus +locust +lodge +lofty +log +logic +logical +logically +logistic +logistical +logistically +lome +lomé +lon +london +lone +lonely +long +longer +longevity +longing +longstanding +longterm +lood +look +loom +loop +loophole +loose +loosen +loot +looting +lopez +lopsided +lord +los +lose +loser +loss +lost +lot +loud +louder +loudly +louis +louise +love +loved +lover +loving +low +lower +lowering +loya +loyal +loyalist +loyally +loyalty +lra +ltte +luanda +lucia +lucid +lucidity +lucidly +luck +luckily +lucky +lucrative +ludicrous +luis +luiz +lukashenka +lukewarm +lula +lull +lumber +lump +lumpur +lumumba +lunatic +lunch +lung +lurch +lure +lurk +lusaka +lust +luther +lution +luxembourg +luxury +lykketoft +lyon +maaouya +maastricht +macabre +macao +macedonia +macedonian +machel +machiavellian +machination +machine +machinery +macro +macroeconomic +mad +madagascar +madam +madame +madeleine +madman +madness +madrid +maduro +maelstrom +mafia +magazine +maghreb +magic +magical +magistrate +magna +magnanimity +magnanimous +magnificent +magnify +magnitude +mahathir +mahatma +mahmoud +maiden +mail +maim +maiming +main +mainland +mainly +mainstay +mainstream +mainstreame +mainstreaming +maintain +maintaining +maintenance +maize +majestic +majesty +major +majority +majuro +makario +makarios +make +maker +makeshift +makeup +making +malabo +malacca +maladjustment +malady +malagasy +malaise +malaria +malawi +malawian +malaysia +malaysian +malaysians +maldive +maldives +male +malevolent +malfunction +malfunctioning +mali +malian +malice +malicious +maliciously +malign +malignancy +malignant +malik +mall +malnourish +malnourished +malnutrition +malpractice +malta +maltese +malthusian +maltreat +maltreatment +malvina +malvinas +mammal +mammoth +man +manage +manageable +management +manager +managerial +managing +managua +mandate +mandatory +mandela +maneuver +maneuvering +manganese +manhattan +manichean +manif +manifest +manifestation +manifestly +manifesto +manifold +manila +manipulate +manipulation +manipulator +mankind +manmade +manner +mano +manoeuvre +manoeuvring +manpower +mansour +mantle +mantra +manual +manuel +manufacture +manufactured +manufacturer +manufacturing +many +mao +maoist +maori +map +mapping +maputo +mar +marathon +march +marco +marcos +marcoussis +mare +margaret +margin +marginal +marginalisation +marginalise +marginalization +marginalize +marginalized +marginally +maria +mariam +marie +marien +marijuana +marine +marino +mario +maritime +mark +marked +markedly +marker +market +marketing +marketplace +marking +marrakech +marrakesh +marriage +marry +marshal +marshall +marshallese +marti +martial +martin +martinique +martti +martyr +martyrdom +martyrs +martí +marvel +marvellous +marvelous +marx +marxism +marxist +mary +masire +mask +masquerade +mass +massacre +massacring +masse +massive +massively +mast +master +masterful +masterfully +masterly +mastermind +masterpiece +mastery +match +matching +material +materialism +materialistic +materialization +materialize +materially +materiel +maternal +maternity +mathematic +mathematical +mathieu +matignon +matrix +matter +matthew +mature +maturity +matériel +maubere +maurice +mauritania +mauritanian +mauritian +mauritius +max +maxim +maximal +maximalist +maximization +maximize +maximum +may +mayan +maybe +mayhem +mayor +mayotte +mazar +maze +mazowiecki +mbasogo +mbeki +mcnamara +mdg +mdgs +meager +meagre +meal +mean +meaning +meaningful +meaningfully +meaningless +means +meantime +meanwhile +measle +measurable +measure +measured +measurement +meat +mecca +mechanic +mechanical +mechanism +mechanization +mechanized +medal +meddle +meddling +media +median +mediate +mediating +mediation +mediator +mediatory +medical +medication +medicinal +medicine +medieval +mediocre +meditate +meditation +mediterranean +medium +meet +meeting +meetingj +mega +megaton +megawatt +mekong +melancholy +melanesian +melilla +mello +melt +meltdown +melting +mem +member +membership +memoir +memorable +memorandum +memorial +memory +menace +menachem +menacing +menacingly +mend +menem +mengistu +ment +mental +mentality +mentally +mentation +mention +mentor +menu +mercantile +mercenarism +mercenary +merchandise +merchant +merciful +mercifully +merciless +mercilessly +mercosur +mercy +mere +merely +merge +merger +merging +merit +meritorious +mesh +message +messenger +messianic +metal +metamorphosis +metaphor +metaphysical +mete +meteorological +meteorology +meter +method +methodical +methodically +methodological +methodology +meticulous +meticulously +metohija +metre +metric +metropolis +metropolitan +mettle +mew +mexican +mexico +miami +michael +michel +michelle +micro +microcosm +microcredit +microfinance +micronesia +micronesian +mid +middle +midnight +midpoint +midrand +midst +midterm +midway +midwife +might +mightily +mighty +migrant +migrate +migration +migratory +miguel +mikhail +mil +milan +mild +mildly +mile +milestone +milieu +militancy +militant +militarily +militarism +militarist +militaristic +militarization +militarize +military +militate +militia +milk +mill +millenium +millennia +millennial +millennium +million +millionaire +millstone +milosevic +mind +minded +mindedly +mindedness +mindful +mindless +mindset +mine +minefield +miner +mineral +mingle +minh +mini +minimal +minimally +minimise +minimize +minimum +mining +miniscule +minister +ministerial +ministry +minor +minority +minsk +minugua +minurso +minuscule +minustah +minute +miracle +miraculous +miraculously +mirage +mire +mired +mirror +misadventure +misappropriation +miscalculation +mischief +misconception +misconstrue +misdeed +misdirect +miserable +miserably +misery +misfortune +misgiving +misguide +misguided +misinformation +misinterpret +misinterpretation +misjudge +mislead +misleading +mismanagement +misnomer +misperception +misplace +misplaced +misrepresent +misrepresentation +miss +missile +missing +mission +missionary +mist +mistake +mistaken +mistakenly +mistreat +mistreatment +mistrust +mistura +misunderstand +misunderstanding +misunderstood +misuse +mitch +mitchell +mitigate +mitigation +mitterand +mitterrand +mix +mixed +mixture +mkapa +mladic +moat +mob +mobile +mobilisation +mobilise +mobility +mobilization +mobilize +mobilizing +mobutu +mock +mockery +modality +mode +model +modem +moderate +moderately +moderation +modern +modernisation +modernity +modernization +modernize +modernized +modest +modestly +modesty +modicum +modification +modify +module +modus +mogadishu +mogen +mohamad +mohame +mohamed +mohammad +mohamme +mohammed +mohamud +moi +mojsov +moktar +mold +moldova +moldovan +molotov +mombasa +moment +momentarily +momentary +momentous +momentum +mon +monaco +monarch +monarchy +monastery +monday +monde +mondlane +monetary +money +monger +mongolia +mongolian +monie +monitor +monitoring +monk +monnet +mono +monolithic +monologue +monopolistic +monopolization +monopolize +monopoly +monotheism +monotheistic +monroe +monrovia +monsoon +monster +monstrosity +monstrous +montego +montenegrin +montenegro +monterrey +montevideo +month +monthly +montreal +montserrat +monuc +monument +monumental +mood +moon +moot +moral +morale +morales +morality +morally +morass +moratorium +morbid +morbidity +moreover +moresby +moribund +morning +moro +moroccan +morocco +moroni +morrow +morse +mortal +mortality +mortar +mortgage +mosaic +moscow +moses +moslem +mosque +mosquito +mostar +mostly +mote +mother +motherhood +motherland +motion +motivate +motivated +motivating +motivation +motive +motor +motto +mould +mount +mountain +mountainous +mourn +mourning +mousa +mouse +moussa +mouth +mouvement +move +movement +mover +movie +movimento +moving +movingly +mow +mozambican +mozambique +mpla +mrs +mswati +muammar +mubarak +much +mud +mudslide +mugabe +muhammad +mujahideen +mujahidin +mujibur +multi +multicultural +multiculturalism +multidimensional +multidisciplinary +multiethnic +multifacete +multifaceted +multifarious +multiform +multilateral +multilateralism +multilateralization +multilaterally +multilingual +multinational +multiparty +multiple +multipli +multiplication +multiplicity +multiplie +multiply +multiplying +multipolar +multipolarity +multipolarization +multiracial +multireligious +multisectoral +multitude +muluzi +mumbai +munich +municipal +municipality +munition +munity +murder +murderer +murderous +mururoa +musa +muscle +museum +museveni +mushroom +music +musical +musician +muskoka +muslim +muslims +must +muster +mutation +mutatis +mute +mutilate +mutilation +mutiny +mutual +mutuality +mutually +muzorewa +muzzle +mwalimu +myanmar +myopia +myopic +myriad +mysterious +mystery +mystique +myth +mythical +mythology +mzee +nacional +nafta +nagasaki +nagging +nagorno +nagorny +nahyan +nail +nairobi +naivasha +naive +naked +nakedly +nam +name +namely +namese +namibia +namibian +namization +nan +napalm +naple +naples +napoleon +narco +narcotic +narcotics +nargis +narrative +narrow +narrowing +narrowly +narrowness +nascent +nassau +nasser +nassir +natal +nation +national +nationale +nationalism +nationalist +nationalistic +nationalists +nationality +nationalization +nationalize +nationally +nationhood +nationwide +native +nato +natter +natural +naturally +nature +naught +nauru +nautical +naval +nave +navigable +navigate +navigation +navigational +navigator +navy +nay +nazarbaev +nazarbayev +nazi +nazism +naïve +ncds +near +nearby +nearer +nearly +neatly +nebulous +necessarily +necessary +necessitate +necessity +neck +need +needed +needless +needlessly +needy +nefarious +negate +negation +negative +negatively +negativism +neglect +negligence +negligible +negotia +negotiable +negotiate +negotiated +negotiating +negotiation +negotiator +negro +nehru +neighbor +neighborhood +neighboring +neighborliness +neighborly +neighbour +neighbourhood +neighbouring +neighbourliness +neighbourly +neighbours +neither +nelson +neo +neocolonialism +neocolonialist +neoliberal +neoliberalism +nepad +nepal +nepalese +nepotism +nerve +nervous +ness +nest +net +netanyahu +netherlands +neto +network +networking +neutral +neutrality +neutralization +neutralize +neutron +never +nevertheless +nevis +new +newborn +newcomer +newfound +newly +news +newspaper +next +nexus +ngo +ngouabi +nguema +nguesso +nguyen +nicaragua +nicaraguan +nice +nicety +nicholas +nickel +nicolae +nicolas +nicosia +niger +nigeria +nigerian +nigh +night +nightmare +nightmarish +nil +nile +nilly +nimble +nine +nineteen +nineteenth +ninetieth +ninety +ninth +ninthly +nip +niue +nixon +niyazov +niño +nkomati +nkrumah +nkurunziza +nobel +nobility +noble +nobler +noblest +nobly +nobody +nodule +noise +noisy +nol +nomadic +nominal +nominate +nomination +nominee +nonaggression +nonaligne +nonaligned +nonalignment +none +nonetheless +nonexistent +nongovernmental +noninterference +nonintervention +nonnuclear +nonproliferation +nonsense +noose +nordic +norm +normal +normalcy +normalisation +normalise +normality +normalization +normalize +normalizing +normally +normative +norodom +north +northeast +northern +northsouth +northward +northwest +norway +norwegian +nose +nostalgia +nostalgic +notable +notably +note +noteworthy +nothing +notice +noticeable +noticeably +notification +notify +notion +notoriety +notorious +notoriously +notwithstanding +nought +noumea +nouméa +nourish +nourishment +nova +novel +novelty +november +nowadays +nowhere +noxious +npt +nuance +nuclear +nuclearization +nucleus +nuisance +nujoma +null +nullify +numb +number +numerical +numerically +numerous +nun +nuremberg +nurse +nursultan +nurture +nurturing +nusra +nut +nutrition +nutritional +nutritious +nutshell +nyerere +oak +oar +oas +oasis +oath +oau +oauj +obama +obasanjo +obduracy +obdurate +obdurately +obedience +obedient +obey +obfuscate +obiang +object +objection +objectionable +objective +objectively +objectivity +obligate +obligation +obligatory +oblige +obliged +obliterate +obliteration +oblivion +oblivious +obnoxious +obscene +obscenity +obscurantism +obscurantist +obscure +observance +observation +observe +observer +obsess +obsession +obsessive +obsolescence +obsolete +obstacle +obstinacy +obstinate +obstinately +obstruct +obstruction +obstructionism +obstructionist +obstructive +obtain +obtaining +obviate +obvious +obviously +occasion +occasional +occasionally +occupancy +occupant +occupation +occupied +occupier +occupy +occupying +occur +occurrence +ocean +oceania +oceanic +oceanographic +october +oda +odd +odious +oec +oecd +oecs +offence +offend +offender +offense +offensive +offer +offering +office +officer +offices +official +officially +offing +offset +offshoot +offshore +offspring +oft +often +oftentime +ogata +ohrid +oic +oil +oilfield +oily +okinawa +oklahoma +olara +old +oligarchic +oligarchy +oligopoly +olive +olmert +olof +olusegun +olympiad +olympic +omagh +oman +omani +omar +ombudsman +omega +omen +omer +ominous +ominously +omission +omit +omne +omnipresent +ondimba +one +oneness +onerous +oneself +ongoing +onion +online +onlooker +onset +onslaught +onto +onumoz +onus +onward +onwards +opcw +ope +opec +open +opening +openly +openness +oper +operandi +operate +operating +operation +operational +operationalization +operationalize +operationally +operative +operatively +operator +opertti +opinion +opium +opponent +opportune +opportunism +opportunist +opportunistic +opportunity +oppose +opposed +opposing +opposite +opposition +oppress +oppressed +oppression +oppressive +oppressor +opprobrious +opprobrium +opt +optic +optimal +optimally +optimism +optimist +optimistic +optimistically +optimize +optimum +option +optional +opulence +opulent +oral +orange +oratory +orbit +orchestrate +orchestrated +ordain +ordeal +order +ordered +ordering +orderly +ordinance +ordinary +ordinate +ordinated +ordinating +ordination +ordinator +ordnance +ore +organ +organi +organic +organically +organisation +organise +organism +organiza +organization +organizational +organize +organized +organizer +organizing +orgy +orient +oriental +orientation +oriented +origin +original +originality +originally +originate +oro +orphan +ortega +orthodox +orthodoxy +osama +oscar +osce +oscillate +oslo +ossetia +ossetian +ostensible +ostensibly +ostracism +ostracize +ostrich +otherwise +ottawa +ottoman +otunnu +ouagadougou +ouattara +ought +ould +ounce +oust +outbreak +outburst +outcast +outcome +outcry +outdate +outdated +outdo +outer +outflow +outgoing +outgrow +outgrowth +outlaw +outlawing +outlaws +outlay +outlet +outline +outlive +outlook +outmoded +outnumber +outpace +outpost +outpouring +output +outrage +outraged +outrageous +outrageously +outreach +outright +outset +outside +outsider +outskirt +outspoken +outstanding +outstretched +outstrip +outward +outweigh +overall +overarch +overarching +overboard +overburden +overcome +overcoming +overcrowding +overdue +overemphasize +overestimate +overexploit +overexploitation +overfishe +overflight +overflow +overhang +overhaul +overhead +overkill +overlap +overlapping +overload +overlook +overlord +overly +overnight +overpopulation +overpower +overproduction +overridden +override +overriding +overrule +overrun +overseas +oversee +overshadow +oversight +oversimplification +oversimplify +overstate +overstatement +overstep +overstress +overstretch +overt +overtake +overthrow +overtly +overtone +overture +overturn +overview +overweene +overwhelm +overwhelmed +overwhelming +overwhelmingly +owe +owen +owner +ownership +oxygen +ozone +pablo +pac +pace +pacem +pacific +pacification +pacifism +pacifist +pacify +pack +package +pact +pacta +pad +page +paigc +pain +painful +painfully +painless +painstaking +painstakingly +paint +pair +pakistan +pakistani +palace +palau +palauan +pale +palestine +palestinian +palestinians +pall +palliative +palm +palme +palpable +palpably +paltry +pan +panacea +panama +panamanian +panamanians +pandemic +pandora +panel +pang +panic +panoply +panorama +papal +papandreou +paper +papua +par +para +parade +paradigm +paradise +paradox +paradoxical +paradoxically +paragraph +paraguay +paraguayan +parallel +parallelism +paralyse +paralysed +paralysis +paralyze +paralyzed +parameter +paramilitary +paramount +parana +paranoia +paraphrase +parcel +pardon +parent +pariah +paris +parity +park +parliament +parliamentarian +parliamentary +parochial +parochialism +parody +part +partake +parti +partial +partiality +partially +participant +participate +participation +participative +participatory +particular +particularity +particularly +partido +partisan +partisanship +partition +partitioning +partitionist +partly +partner +partnership +party +pascal +pass +passage +passenger +passing +passion +passionate +passionately +passive +passively +passivity +passport +past +pastoral +pasture +patassé +patch +patent +patently +paternalism +paternalistic +path +pathetic +pathological +pathway +patience +patient +patiently +patrice +patrimonial +patrimony +patriot +patriotic +patriotism +patrol +patron +patronage +patronize +pattern +patty +paucity +paul +paulo +pauperization +pause +pave +pawn +pax +pay +payable +payment +paz +peace +peaceably +peacebuilde +peacebuilding +peaceful +peacefully +peacekeeper +peacekeeping +peacemake +peacemaker +peacemaking +peacetime +peak +pearce +pearson +peasant +peculiar +peculiarity +peddle +pedestal +pedro +peer +peke +peking +pelindaba +pen +penal +penalize +penalty +penchant +pende +pendence +pending +pendulum +penetrate +penetration +penh +peninsula +pennsylvania +penny +pension +pentagon +penury +people +peoples +peopleís +per +perceive +percent +percentage +perceptible +perceptibly +perception +perceptive +perceptiveness +perch +pereira +peremptory +perennial +perennially +peres +perestroika +perez +perfect +perfecting +perfection +perfectly +perfidious +perforce +perform +performance +perfume +perhaps +peril +perilous +perilously +perimeter +period +periodic +periodical +periodically +peripheral +periphery +perish +permanence +permanent +permanently +permeate +permissible +permission +permissive +permissiveness +permit +pernicious +perpetrate +perpetration +perpetrator +perpetual +perpetually +perpetuate +perpetuation +perpetuity +perplex +perplexing +perplexity +perry +persecute +persecution +perseverance +persevere +persevering +pershe +persian +persist +persistence +persistent +persistently +person +personage +personal +personality +personally +personify +personnel +perspective +perspicacity +persuade +persuasion +persuasive +pertain +pertinence +pertinent +pertinently +perturb +peru +peruvian +pervade +pervasive +perverse +perversion +perversity +pervert +perverted +peso +pessimism +pessimist +pessimistic +pest +pesticide +pestilence +pet +peter +petersburg +petition +petitioner +petrocaribe +petrochemical +petrodollar +petroleum +petty +peuple +phantom +pharmaceutical +phase +phasing +phenomena +phenomenal +phenomenon +philanthropic +philanthropy +philip +philippine +philippines +philosopher +philosophic +philosophical +philosophy +phnom +phoenix +phone +phosphate +photo +photograph +phrase +phraseology +physical +physically +physician +physics +physiognomy +pick +picture +pie +piece +piecemeal +pierce +pierre +piety +pig +pigmentation +pile +pilgrim +pilgrimage +piling +pill +pillage +pillaging +pillar +pilot +pin +ping +pinie +pinies +piniés +pinnacle +pinochet +pinpoint +pio +pioneer +pioneering +pious +pipe +pipeline +piracy +pirate +piratical +pit +pitch +pitfall +pitiful +pitifully +pitiless +pittsburgh +pity +pivot +pivotal +placate +place +placement +placing +plague +plain +plainly +plan +plane +planet +planetary +plank +planned +planner +planning +plant +plantation +planting +plastic +plata +plate +plateau +platform +platitude +plato +plausible +play +player +playground +playing +plaza +plc +plea +plead +pleading +pleasant +please +pleased +pleasing +pleasure +plebiscite +pledge +plenary +plenipotentiary +plentiful +plenty +plethora +plight +plo +plot +plotter +plotting +plough +ploughshare +ploy +plug +plummet +plunder +plundering +plunge +plural +pluralism +pluralist +pluralistic +plurality +plus +plutonium +ply +pocket +podium +poem +poet +poetic +poetry +pogrom +poignant +poignantly +point +pointedly +pointing +pointless +poise +poison +poisoning +poisonous +pol +poland +polar +polarisation +polarity +polarization +polarize +polarized +pole +polemic +polemical +police +policeman +policing +policy +policymake +policymaker +polio +polisario +polish +politburo +polite +politic +political +politically +politician +politicization +politicize +politicized +politico +politics +polity +poll +polling +pollutant +pollute +polluted +polluter +polluting +pollution +polynesia +ponder +pontiff +pontificate +pool +pooling +poor +poorer +poorly +pope +poppy +populace +popular +popularity +popularly +populate +populated +population +populism +populist +populous +pornography +porous +port +portal +portend +portent +portfolio +portillo +portion +portrait +portray +portrayal +portugal +portuguese +pose +posit +position +positive +positively +possess +possession +possessor +possibility +possible +possibly +possidetis +post +postal +posterity +postpone +postponement +postulate +posture +posturing +postwar +pot +potable +potent +potential +potentiality +potentially +potsdam +pound +pour +poverty +powder +power +powerful +powerfully +powerhouse +powerless +powerlessly +powerlessness +powers +practicability +practicable +practical +practicality +practically +practice +practise +practitioner +pragmatic +pragmatically +pragmatism +prague +praia +praise +praiseworthy +prance +pray +prayer +pre +preach +preacher +preaching +preamble +preambular +precarious +precariously +precariousness +precaution +precautionary +precede +precedence +precedent +precept +precinct +precious +precipice +precipitate +precipitation +precipitous +precise +precisely +precision +preclude +preconceive +preconceived +preconception +precondition +precursor +predate +predator +predatory +predecessor +predestine +predetermine +predetermined +predicament +predicate +predict +predictability +predictable +predictably +prediction +predispose +predominance +predominant +predominantly +predominate +preeminent +preempt +preemptive +preface +prefer +preferable +preferably +preference +preferential +preferred +pregnancy +pregnant +prejudge +prejudice +prejudiced +prejudicial +preliminary +prelude +premature +prematurely +premeditate +premeditated +premier +premise +premium +premonition +preoccupation +preoccupie +preoccupied +preoccupy +preparation +preparatory +prepare +prepared +preparedness +preponderance +preponderant +preposterous +prerequisite +prerogative +presage +prescribe +prescribed +prescription +presence +present +presentation +presently +preservation +preserve +preside +presidency +president +presidential +presiding +presidium +press +pressed +pressing +pressure +prestige +prestigious +presumably +presume +presumption +presumptuous +presuppose +pretence +pretend +pretense +pretension +pretext +pretoria +pretty +prevail +prevailing +prevalence +prevalent +prevaricate +prevarication +prevent +preventable +preventative +prevention +preventive +previous +previously +prey +price +priceless +pricing +pride +priest +primacy +primarily +primary +prime +primitive +primordial +prin +prince +princess +principal +principality +principally +principe +principle +principled +print +prior +priori +prioritization +prioritize +priority +prise +prism +prison +prisoner +pristina +pristine +privacy +private +privately +privation +privatization +privatize +privilege +privileged +prize +pro +proactive +proactively +prob +probability +probable +probably +probe +probity +problem +problematic +problematical +procedural +procedure +proceed +proceeding +process +processing +procession +proclaim +proclamation +procrastinate +procrastination +procure +procurement +prod +prodigious +produce +producer +producing +product +production +productive +productively +productivity +profess +profession +professional +professionalism +professionalization +professionalize +professor +proffer +proficiency +proficient +profile +profit +profitability +profitable +profitably +profiteer +profligate +profound +profoundly +profusion +prognosis +program +programmatic +programme +programming +progress +progression +progressive +progressively +prohibit +prohibition +prohibitive +project +projection +proletarian +proletariat +proliferate +proliferation +proliferator +prolong +prolongation +prolonged +prom +prominence +prominent +prominently +promise +promising +promote +promoter +promotion +prompt +promptly +promptness +promulgate +promulgation +prone +pronged +pronounce +pronounced +pronouncement +proof +prop +propaganda +propagandist +propagate +propagation +propel +propensity +proper +properly +property +prophecy +prophesy +prophet +prophetic +propitious +proponent +proportion +proportional +proportionality +proportionate +proportionately +proposal +propose +proposition +propound +proscribe +proscription +prosecute +prosecution +prosecutor +prospect +prospective +prosper +prosperity +prosperous +prostitution +protagonist +protect +protection +protectionism +protectionist +protective +protector +protectorate +protein +protest +protestant +protestation +protester +protocol +protract +protracted +proud +proudly +prove +proven +proverb +proverbial +provide +providence +provider +province +provincial +provision +provisional +provisionally +provocation +provocative +provoke +provoking +prowess +proximate +proximity +proxy +prst +prudence +prudent +prudently +pruning +préval +pseudo +psyche +psychological +psychologically +psychology +psychosis +psychotropic +pta +public +publication +publicity +publicize +publicly +publish +puerto +pull +pulse +pump +punctuality +punctuate +punish +punishable +punishment +punitive +punta +pupil +puppet +pur +purchase +purchaser +purchasing +pure +purely +purge +purification +purify +purity +purport +purportedly +purpose +purposeful +purposefully +purposely +purse +pursuance +pursuant +pursue +pursuit +purveyor +purview +push +put +putin +putsch +putting +puzzle +pyongyang +pyramid +pyrenee +pérez +qaboos +qaddafi +qadhafi +qaeda +qaida +qana +qatar +qatari +qau +qua +quadripartite +quadruple +quagmire +qualification +qualified +qualify +qualitative +qualitatively +quality +qualm +quantifiable +quantify +quantitative +quantitatively +quantity +quantum +quarrel +quarter +quartet +quasi +qud +quds +quebec +queen +quell +quench +query +quest +question +questionable +questioning +questionnaire +quibble +quick +quicken +quickly +quicksand +quid +quiet +quietly +quintessence +quintessential +quit +quite +quito +quo +quota +quotation +quote +rabat +rabbani +rabid +rabin +race +racial +racialism +racialist +racially +racism +racist +rack +radar +radiant +radiation +radical +radicalism +radicalization +radicalize +radically +radio +radioactive +radioactivity +radiological +rafael +raft +rage +rahman +raid +rail +railroad +railway +rain +rainbow +rainfall +rainforest +rainy +raise +raising +raison +rajiv +rally +rallying +ralph +ramadan +ramallah +ramgoolam +ramification +ramos +rampage +rampant +rampart +ramsi +rancor +rancour +randolph +random +range +ranging +rangoon +rank +ranking +ransom +rapacious +rape +rapid +rapidity +rapidly +rapport +rapporteur +rapprochement +rare +rarely +rarotonga +rash +rashe +rasheed +rate +rather +ratification +ratify +rating +ratio +ration +rational +rationale +rationality +rationalization +rationalize +rationally +rations +ratsiraka +rattle +rauf +raul +ravage +ravaging +raw +ray +razali +raze +raúl +reach +reaching +react +reaction +reactionary +reactivate +reactivation +reactive +reactor +read +reader +readie +readily +readiness +reading +readjust +readjustment +readmission +readmitte +ready +reaffirm +reaffirmation +reaffirming +reagan +real +realign +realignment +realisation +realise +realism +realist +realistic +realistically +reality +realizable +realization +realize +reallocate +reallocation +really +realm +realpolitik +reap +reappear +reappearance +reappointment +reappraisal +reappraise +rear +rearguard +rearm +rearmament +rearrange +rearrangement +reason +reasonable +reasonableness +reasonably +reasoned +reasoning +reassert +reassertion +reassess +reassessment +reassume +reassurance +reassure +reassuring +reawaken +reawakening +rebalance +rebalancing +rebel +rebellion +rebellious +rebirth +reborn +rebound +rebuff +rebuild +rebuilding +rebuke +recalcitrance +recalcitrant +recall +recant +recapitulate +recapture +recast +recede +receipt +receive +receiver +receiving +recent +recently +reception +receptive +receptivity +recess +recession +recessionary +recipe +recipient +reciprocal +reciprocally +reciprocate +reciprocity +recite +reckless +recklessly +recklessness +reckon +reckoning +reclaim +reclamation +recog +recognise +recognition +recognizable +recognize +recognized +recoil +recollection +recolonization +recolonize +recommence +recommend +recommendation +recommit +recommitment +recompense +reconcile +reconciliation +reconfiguration +reconfigure +reconfirm +reconnaissance +reconqu +reconquer +reconquest +reconsider +reconsideration +reconstitute +reconstitution +reconstruct +reconstruction +reconvene +reconvening +reconversion +record +recount +recourse +recover +recovery +recreate +recreation +recreational +recrimination +recrudescence +recruit +recruiting +recruitment +rectification +rectify +rectitude +recuperation +recur +recurrence +recurrent +recycle +recycling +red +redaction +redd +rededicate +rededication +redeem +redefine +redefinition +redemption +redeploy +redeployment +redesign +redirect +redirection +rediscover +rediscovery +redistribute +redistribution +redouble +redoubled +redoubling +redound +redraw +redrawing +redrawn +redress +reduce +reduced +reduction +redundancy +redundant +reed +reef +reel +reestablish +reestablishment +reexamine +ref +refashion +refer +reference +referendum +referral +refinance +refine +refined +refinement +refinery +refining +reflect +reflection +reflective +reflex +refocus +reforest +reforestation +reform +reformation +reformed +reformer +reformist +reformulate +reformulation +refrain +refresh +refreshing +refuge +refugee +refurbish +refusal +refuse +refute +regain +regard +regardless +regenerate +regeneration +regime +regiment +region +regional +regionalism +regionalization +regionalize +regionally +register +registration +registry +regress +regression +regressive +regret +regretfully +regrettable +regrettably +regroup +regrouping +regular +regularity +regularize +regularly +regulate +regulated +regulating +regulation +regulator +regulatory +rehabilitate +rehabilitation +rehearse +reich +reign +reignite +reimburse +reimbursement +rein +reinforce +reinforcement +reinforcing +reinstate +reinstatement +reintegrate +reintegration +reinterpret +reintroduce +reinvent +reinvest +reinveste +reinvigorate +reinvigorated +reinvigoration +reiterate +reiteration +reject +rejection +rejectionist +rejoice +rejoicing +rejoin +rejuvenate +rejuvenation +rekindle +relapse +relate +related +relation +relations +relationship +relative +relatively +relativism +relaunch +relaunche +relaunching +relax +relaxation +relaxed +relay +release +relegate +relegation +relent +relentless +relentlessly +relevance +relevancy +relevant +reliability +reliable +reliably +reliance +reliant +relic +relief +relieve +relieved +religion +religious +religiously +relinquish +relish +relive +relocate +relocation +reluctance +reluctant +reluctantly +rely +remain +remainder +remained +remaining +remains +remake +remark +remarkable +remarkably +remedial +remedie +remedy +remember +remembrance +remind +reminder +reminiscent +remiss +remit +remittance +remnant +remodelle +remorse +remote +remotely +remoteness +remotest +removal +remove +removed +remuneration +remunerative +renaissance +rename +renamo +rend +render +rendering +rendezvous +rene +renegade +renege +renegotiate +renegotiation +renew +renewable +renewal +renewed +renounce +renovate +renovation +renown +renowne +renowned +rent +renunciation +rené +reopen +reopening +reorder +reordering +reorganization +reorganize +reorient +reorientation +repair +reparation +repatriate +repatriation +repay +repayment +repeal +repeat +repeated +repeatedly +repel +repercussion +repetition +repetitious +repetitive +replace +replacement +replant +replenish +replenishment +replete +replicate +reply +report +reportedly +reporter +reporting +repose +reposition +repository +reprehensible +represent +representation +representational +representative +representativeness +representatives +representativity +repress +repressed +repression +repressive +reprisal +reproach +reprobation +reprocess +reproduce +reproduction +reproductive +republic +republican +republika +repudiate +repudiation +repugnance +repugnant +repulse +repulsive +reputable +reputation +repute +request +require +required +requirement +requires +requisite +requite +reread +res +reschedule +rescheduling +rescind +rescue +research +researcher +resemblance +resemble +resent +resentment +reservation +reserve +reservoir +reset +resettle +resettlement +reshape +reshaping +reside +residence +residency +resident +residential +residual +residue +resign +resignation +resilience +resilient +resist +resistance +resistant +reso +resolute +resolutely +resoluteness +resolution +resolve +resonance +resonate +resort +resound +resounding +resource +resourceful +resourcefulness +respect +respectability +respectable +respected +respectful +respectfully +respective +respectively +respiratory +respite +respond +response +responsi +responsibility +responsible +responsibly +responsive +responsiveness +rest +restart +restate +restatement +restaurant +restitution +restless +restlessness +restoration +restore +restrain +restrained +restraining +restraint +restrict +restricted +restriction +restrictive +restructure +restructuring +result +resultant +results +resume +resumption +resurface +resurgence +resurgent +resurrect +resurrection +resuscitate +retain +retake +retaliation +retaliatory +retard +retardation +retarded +retention +rethink +rethinking +rethought +reticence +reticent +retire +retired +retirement +retrain +retreat +retrenchment +retribution +retrieve +retrograde +retrogression +retrogressive +retrospect +retrospective +retroviral +return +returnee +reunification +reunified +reunify +reunion +reunite +reunited +rev +revaluation +revamp +revanchism +revanchist +reveal +revealing +revelation +revenge +revenue +reverberate +reverberation +revere +reverence +reverend +reversal +reverse +reversed +reversion +revert +review +revise +revised +revision +revisionism +revisionist +revisit +revitalisation +revitalise +revitalization +revitalize +revitalizing +revival +revive +revocation +revoke +revolt +revolting +revolucionaria +revolucionarias +revolucionario +revolution +revolutionary +revolutionize +revolve +revolving +revulsion +reward +rewarding +rewrite +reykjavik +rhetoric +rhetorical +rhodesia +rhodesian +rhythm +ribbentrop +rica +rican +ricardo +rice +rich +richard +richly +richness +rico +rid +riddle +ride +ridicule +ridiculous +rife +rifle +rift +rig +riga +right +righteous +righteousness +rightful +rightfully +rightist +rightly +rightness +rights +rigid +rigidity +rigidly +rigor +rigorous +rigorously +rigour +riidiger +rim +ring +ringing +rio +riot +rip +riparian +ripe +ripple +rise +risk +risky +rite +ritual +ritualistic +rival +rivalry +rive +river +rivero +riyadh +road +roadblock +roadmap +roam +roar +rob +robben +robbery +robert +roberto +robinson +robust +robustly +rock +rocket +rocky +rod +rodrigo +rodriguez +roe +roger +rogers +rogue +roh +roland +role +roll +rollback +roma +roman +romania +romanian +romantic +rome +romulo +ronald +roof +room +roosevelt +roost +root +rooted +rope +rose +roster +rostrum +rosy +rot +rotate +rotation +rotational +rote +rotten +rouble +rouge +rough +roughly +roughshod +rouhani +round +roundly +rouse +rousseff +roust +route +routine +routinely +row +royal +royalty +rub +rubber +rubble +rubric +rude +rudely +rudiger +rudimentary +ruf +rugged +ruin +ruinous +rule +ruler +ruling +rumbling +rumor +rumour +run +runaway +runner +running +rupture +rural +ruse +rush +russia +russian +russians +rut +ruthless +ruthlessly +ruthlessness +ruz +rwanda +rwandan +rwandese +saarc +saavedra +sabah +sabotage +saboteur +sabra +sabre +sachs +sack +sacred +sacredness +sacrifice +sacrificial +sacrilege +sacrosanct +sad +sadako +sadat +sadc +sadcc +saddam +sadden +saddle +sadistic +sadly +sadness +sadruddin +safe +safeguard +safeguarding +safely +safety +saga +sagacious +sagacity +sage +sahara +saharan +saharawi +sahel +sahelian +sahelo +sahnoun +sahraoui +sahrawi +said +saigon +sail +sailor +saint +sake +salaam +salad +salary +salazar +sale +saleh +salient +salim +salisbury +salman +salt +salutary +salutation +salute +salvador +salvadoran +salvadorian +salvage +salvation +sam +samaria +samdech +samir +samoa +samoan +samora +sample +samrin +samuel +san +sana +sanchez +sanctify +sanction +sanctity +sanctuary +sand +sandinist +sandinista +sandino +sandwich +sane +saner +sanguinary +sanguine +sanitary +sanitation +sanity +santa +santiago +santo +santos +sao +sap +sar +sarajevo +sarkis +sarkozy +sarney +sary +sassou +satanic +sate +satellite +satisfaction +satisfactorily +satisfactory +satisfied +satisfy +satisfying +saturate +saturation +saturday +saud +saudi +savage +savagely +savagery +save +savimbi +saving +saviour +say +saying +scale +scaling +scandal +scandalous +scandinavia +scandinavian +scant +scanty +scapegoat +scar +scarce +scarcely +scarcity +scare +scarred +scatter +scenario +scene +sceptic +sceptical +scepticism +schedule +scheduled +scheduling +scheme +scheming +schism +schmidt +scholar +scholarship +school +schoolchildren +schooling +schumann +science +scientific +scientifically +scientist +scoff +scope +scorch +score +scorecard +scorn +scornful +scotland +scottish +scourge +scramble +scrap +scratch +screen +script +scripture +scruple +scrupulous +scrupulously +scrutinize +scrutiny +scuttle +sdg +sdgs +sea +seabed +seafarer +seal +seamless +seaport +sear +search +searching +season +seasonal +seasoned +seat +seating +seattle +sec +secede +secession +secessionist +second +secondary +secondly +secrecy +secret +secretariat +secretary +secretarygeneral +secretly +sect +sectarian +sectarianism +section +sectional +sector +sectoral +sectorial +secular +secularism +secure +secured +securely +securing +security +sedar +sedition +seduce +seduction +see +seed +seek +seeker +seeking +seem +seemingly +seems +seethe +segment +segregate +segregation +segregationist +seine +seismic +seize +seized +seizure +seko +sekou +sela +selassie +seldom +select +selection +selective +selectively +selectivity +self +selfish +selfishly +selfishness +selfless +selflessly +selflessness +sell +seller +selling +semantic +semblance +sembly +semi +seminal +seminar +semipalatinsk +semitic +semitism +sen +senate +senator +send +sendai +sending +senegal +senegalese +senghor +senior +sensational +sense +senseless +senselessly +senselessness +sensibility +sensible +sensibly +sensing +sensitise +sensitive +sensitivity +sensitize +sentence +sentiment +sentimental +seoul +separate +separated +separately +separation +separatism +separatist +september +sequel +sequence +sequester +sequestration +serb +serbia +serbian +serene +serenely +serenity +sergio +series +serious +seriously +seriousness +sermon +servanda +servant +serve +service +serviceman +servicemen +servicing +servile +servitude +sese +session +sessional +set +setback +setting +settle +settlement +settler +settling +seung +seven +seventeen +seventeenth +seventh +seventhly +seventieth +seventy +sever +several +severally +severance +severe +severely +severity +sex +sexual +sexually +seychelle +seyni +sfor +shab +shaba +shabaab +shackle +shade +shadow +shadowy +shah +shaheed +shaikh +shake +shakespeare +shaky +shall +shallow +sham +shamble +shame +shameful +shamefully +shameless +shamelessly +shamir +shanghai +shanty +shape +shaping +share +shared +shareholder +sharia +shariah +sharif +sharing +shark +sharm +sharon +sharp +sharpen +sharpening +sharpeville +sharply +shatila +shatt +shatter +shattered +shattering +shcherbitsky +sheba +shed +shedding +sheep +sheer +sheet +sheikh +sheikha +shek +shelf +shell +shelling +shelter +shelve +shepherd +shevardnadze +shia +shield +shifa +shift +shifting +shihabi +shiite +shimon +shine +ship +shipment +shipping +shirk +shirley +shoal +shock +shocked +shocking +shoe +shoot +shooting +shop +shopping +shore +shoreline +short +shortage +shortcoming +shortcut +shorten +shortfall +shorthand +shortly +shortsighted +shortsightedness +shot +shoulder +shouldering +shout +show +showcase +shower +shred +shrine +shrink +shrinkage +shrinking +shroud +shrug +shudder +shultz +shun +shunt +shura +shut +shuttle +shy +siad +siaka +siberia +sica +sick +sickness +sid +side +sided +sideline +sidestep +sidetrack +sids +siege +sierra +sigh +sight +sighted +sightedness +sign +signal +signatory +signature +significance +significant +significantly +signify +signing +signpost +sihanouk +silence +silent +silently +silk +silo +silos +silva +silver +similar +similarity +similarly +simla +simmer +simon +simple +simplicity +simplification +simplified +simplify +simplistic +simply +simultaneous +simultaneously +simón +sin +sinai +since +sincere +sincerely +sincerity +sine +sing +singapore +singe +singer +single +singly +singular +singularly +singye +sinister +sink +sinking +sino +sion +siphon +sir +siren +sirimavo +sirleaf +sirmium +sirte +sister +sisterly +sisyphu +sit +site +sitting +situ +situa +situate +situation +six +sixteen +sixteenth +sixth +sixthly +sixtieth +sixty +sizable +size +sizeable +sized +skeleton +skeptic +skeptical +skepticism +sketch +skew +skewed +skilful +skilfully +skill +skilled +skillful +skin +skirmish +sky +skyrocket +slacken +slackening +slam +slander +slanderous +slap +slate +slaughter +slave +slavery +slavonia +slay +sleep +sleeve +slender +slice +slick +slide +slight +slightly +slim +slip +slippage +slippery +slogan +slope +slovak +slovakia +slovenia +slovenian +slow +slowdown +slowing +slowly +slowness +sluggish +sluggishness +slum +slumber +slump +smack +small +smallholder +smallness +smallpox +smart +smash +smear +smell +smile +smith +smoke +smokescreen +smoking +smolder +smooth +smoothly +smother +smoulder +smuggle +smuggler +smuggling +snail +snap +snatch +snow +snuff +soak +soar +soares +sober +sobering +soberly +sobriety +social +socialism +socialist +socialiste +socially +societal +society +socio +sociocultural +socioeconomic +sociological +sociologist +sociopolitical +soeharto +sofia +soft +soften +soil +sol +solace +solana +solar +soldier +sole +solely +solemn +solemnity +solemnly +solicit +solicitude +solid +solidarity +solidify +solidly +solitary +solitude +solomon +solu +solution +solve +solvency +solvent +solving +soma +somali +somalia +somalian +somaliland +somalis +somare +sombre +somebody +someday +somehow +someone +something +sometime +sometimes +somewhat +somewhere +somoza +son +song +songun +soo +soon +soothe +sophisticated +sophistication +sophistry +sordid +sore +sorely +sorrow +sorrowful +sorry +sort +soul +sound +sounder +sounding +soundly +soundness +sour +source +south +southeast +southern +southsouth +southward +southwest +sovereign +sovereignly +sovereignty +soviet +sovietism +sow +soweto +sowing +spaak +space +spaceship +spain +span +spaniard +spanish +spare +spark +spate +spawn +speak +speaker +speaking +spear +spearhead +special +specialised +specialist +specialization +specialize +specialized +specially +specie +species +specific +specifically +specificity +specified +specify +specious +spectacle +spectacular +spectacularly +spectator +specter +spectre +spectrum +speculate +speculation +speculative +speculator +speech +speed +speediest +speedily +speeding +speedy +spell +spelt +spend +spending +spew +sphere +spice +spike +spill +spillover +spin +spiral +spiralling +spirit +spirited +spiritual +spirituality +spiritually +spite +spla +splendid +splendor +splendour +splinter +split +splitting +splm +spoil +spoiler +spokesman +spoliation +sponsor +sponsorship +spontaneity +spontaneous +spontaneously +sporadic +sporadically +sport +sporting +sportsman +spot +spotlight +spouse +spread +spreading +spring +springboard +sprout +spur +spurious +spurn +spy +squabble +squad +squalid +squalor +squander +squandering +square +squarely +squeeze +srebrenica +srgjan +sri +srpska +ssr +stabex +stabilisation +stabilise +stability +stabilization +stabilize +stabilizing +stable +stack +stadium +staff +staffan +stage +stagger +staggering +staging +stagnant +stagnate +stagnation +stain +stake +stakeholder +stale +stalemate +stalin +stalinist +stalk +stall +stalling +stalwart +stamina +stamp +stance +stand +standard +standardization +standardize +standardized +standby +standing +standoff +standpoint +stands +standstill +stanislaw +staple +star +stare +stark +starkly +start +starting +startling +starvation +starve +starving +stat +statecraft +stated +statehood +stateless +statement +states +statesman +statesmanlike +statesmanship +static +station +stationing +statistic +statistical +statue +stature +status +statute +statutory +staunch +staunchly +stave +stay +stead +steadfast +steadfastly +steadfastness +steadily +steady +steal +steam +steel +steep +steer +steering +stellar +stem +stench +step +stephen +stepping +stereotype +stereotyped +stereotyping +sterile +sterility +sterling +stern +sternly +steven +stevens +steward +stewardship +stick +stiff +stiffen +stifle +stifling +stigma +stigmatize +still +stimulant +stimulate +stimulating +stimulation +stimulus +stipend +stipulate +stipulation +stir +stirring +stock +stockholm +stocking +stockpile +stockpiling +stocktake +stocktaking +stoel +stoicism +stoke +stoltenberg +stomach +stone +stooge +stop +stopgap +stoppage +stopping +storage +store +storehouse +storm +stormy +story +stoyan +straddle +straggle +straight +straighten +straightforward +strain +strained +strait +strand +strange +strangely +stranger +strangle +stranglehold +strangulate +strangulation +strap +strasbourg +strata +stratagem +strategic +strategically +strategist +strategy +stratification +straw +stray +stream +streamline +streamlined +streamlining +street +strength +strengthen +strengthened +strengthening +strenuous +strenuously +stress +stretch +strew +stricken +strict +strictly +stricture +stride +strident +strife +strike +striking +strikingly +string +stringency +stringent +strip +stripe +strive +striving +stroessner +stroke +strong +stronghold +strongly +strove +structural +structurally +structure +structured +structuring +struggle +stubborn +stubbornly +stubbornness +student +study +stuff +stultify +stumble +stumbling +stun +stunning +stunt +stupendous +stupid +stupidity +sturdy +style +stymie +suarez +sub +subcommittee +subcontinent +subdue +subgroup +subhuman +subject +subjected +subjection +subjective +subjugate +subjugation +sublime +submarine +submerge +submission +submissive +submit +subordinate +subordination +subregion +subregional +subscribe +subscription +subsequent +subsequently +subservience +subservient +subside +subsidiary +subsidize +subsidized +subsidy +subsist +subsistence +subsoil +substance +substantial +substantially +substantiate +substantive +substantively +substitute +substitution +subsume +subterfuge +subtle +subtlety +subtly +suburb +subversion +subversive +subvert +succeed +success +successful +successfully +succession +successive +successively +successor +succinct +succinctly +succor +succour +succumb +suck +sucre +sudan +sudanese +sudano +sudden +suddenly +sue +suez +suffer +sufferer +suffering +suffice +sufficiency +sufficient +sufficiently +suffocate +suffocating +suffrage +sugar +suggest +suggestion +sui +suicidal +suicide +suit +suitability +suitable +suitably +suited +sukhumi +sula +sully +sultan +sultanate +sum +summarily +summarize +summary +summer +summit +summon +sun +sunday +sung +sunlight +sunni +sunset +sunshine +sunt +sup +super +superb +superficial +superfluous +superhighway +superhuman +superimpose +superior +superiority +superpower +supersede +superstructure +supervise +supervision +supervisory +supplant +supplement +supplementary +supplier +supply +supplying +support +supporter +supportive +supports +suppose +supposedly +suppress +suppression +supra +supranational +supremacist +supremacy +supreme +supremely +surcharge +sure +surely +surer +surface +surge +surgery +surgical +suriname +surinamese +surmount +surpass +surplus +surprise +surprised +surprising +surprisingly +surrender +surreptitiously +surrogate +surround +surrounding +surveillance +survey +survival +survive +survivor +sus +susceptibility +susceptible +suspect +suspend +suspension +suspicion +suspicious +sustain +sustainability +sustainable +sustainably +sustained +sustaining +sustenance +suu +suva +svi +swafo +swallow +swamp +swap +swapo +swapoj +swarm +sway +swazi +swaziland +swear +sweat +sweden +swedish +sweep +sweeping +sweet +swell +swift +swifter +swiftly +swiftness +swim +swing +swirl +swiss +switch +switzerland +swollen +swoop +sword +sydney +symbiosis +symbiotic +symbol +symbolic +symbolically +symbolise +symbolism +symbolize +symmetrical +symmetry +sympathetic +sympathetically +sympathize +sympathizer +sympathy +symposium +symptom +symptomatic +synagogue +synchronize +synchronous +syndicate +syndrome +synergistic +synergy +synonym +synonymous +synthesis +synthesize +synthetic +syria +syrian +syrians +system +systematic +systematically +systematize +systemic +taba +table +taboo +tacit +tacitly +tack +tackle +tackling +tact +tactfully +tactic +tactical +tad +tadeusz +tae +tag +tahrir +taif +tail +tailor +taint +taipei +taiwan +taiwanese +tajik +tajikistan +take +takeover +taker +taking +tale +talent +talented +taliban +talk +talking +tall +tally +tame +tamil +tamper +tamuz +tan +tanaka +tandem +tangible +tangibly +tangle +tangled +tank +tanker +tantamount +tanzania +tanzanian +taoiseach +tap +tape +tapestry +tardy +tarfaya +target +targeted +targeting +tariff +tarja +tarnish +tary +tashkent +task +taste +tat +tax +taxation +taxis +taxpayer +taya +taylor +tbilisi +tea +teach +teacher +teaching +teal +team +teamwork +tear +tech +technical +technicality +technically +technician +technique +technocrat +technocratic +technological +technologically +technology +tectonic +ted +tedious +tee +teem +teen +teeter +tegucigalpa +teheran +tehran +tejan +tel +tela +telecommunication +telecommunications +telephone +televise +television +tell +telling +temerity +temper +temperament +temperate +temperature +tempest +template +temple +tempo +temporal +temporarily +temporary +tempore +tempt +temptation +tempting +ten +tenable +tenacious +tenaciously +tenacity +tenant +tend +tendency +tendentious +tender +tenet +tenfold +tenor +tense +tension +tent +tentacle +tentative +tenth +tenuous +tenure +teresa +term +terminal +terminate +termination +terminology +tern +terra +terrain +terrestrial +terri +terrible +terribly +terrify +terrifying +territorial +territorially +territory +terror +terrorism +terrorist +terroristic +terrorize +tertiary +test +testament +testifie +testify +testimony +testing +text +textbook +textile +texture +thabo +thai +thailand +thani +thank +thankful +thankfully +thankless +thant +thatcher +thaw +thawing +theater +theatre +thee +theft +thematic +theme +theo +theologian +theological +theoretical +theoretically +theoretician +theory +therapy +thereafter +thereby +therefore +therefrom +therein +thereof +thereon +thereto +therewith +thermal +thermo +thermonuclear +thesis +thessaloniki +thi +thick +thief +thieu +thin +thing +think +thinker +thinking +thinly +third +thirdly +thirst +thirsty +thirteen +thirteenth +thirtieth +thirty +tho +thomas +thorn +thorny +thorough +thoroughgoe +thoroughly +thoroughness +thou +though +thought +thoughtful +thoughtfully +thoughtless +thousand +thrash +thread +threadbare +threat +threaten +threatened +threatening +three +threefold +threshold +thrive +thriving +throat +throe +throne +throughout +throw +thrust +thug +thumb +thunder +thursday +thus +thwart +thy +tic +ticad +tick +ticket +tidal +tide +tiding +tie +tier +tiger +tight +tighten +tightening +tightly +tile +till +tilt +timber +timbuktu +time +timeframe +timeless +timeline +timeliness +timely +times +timetable +timid +timidity +timidly +timing +timor +timorese +tin +tinder +tine +tinge +tinker +tinue +tiny +tion +tional +tip +tirana +tiraspol +tire +tired +tireless +tirelessly +tissue +tit +titan +titanic +title +tito +tive +tlatelolco +tobacco +tobago +today +todayâs +todayí +todor +toe +together +togetherness +togo +togolese +toil +tokelau +token +tokyo +tolerable +tolerance +tolerant +tolerate +toleration +toll +tom +tomb +tome +tomorrow +ton +tone +tonga +tongan +tongue +tonne +tony +tool +tooth +top +topic +topical +topicality +topmost +topography +topple +tor +torch +torment +tormented +torn +tornado +toronto +torpedo +torrent +torrential +torrijos +tortuous +torture +tortured +torturer +toss +total +totalitarian +totalitarianism +totality +totally +totter +touch +touching +touchstone +tough +toumani +tour +toure +tourism +tourist +touré +toussaint +tout +toward +towards +tower +town +township +toxic +toxin +toy +trace +tracing +track +tracking +tract +tractor +trade +trader +trading +tradition +traditional +traditionally +traffic +trafficker +trafficking +tragedy +tragic +tragically +trail +train +training +trait +traitor +trajectory +trample +trampling +tran +tranquil +tranquility +tranquillity +trans +transaction +transatlantic +transborder +transboundary +transcend +transcendent +transcendental +transcontinental +transdniestria +transdniestrian +transfer +transform +transformation +transformational +transformative +transgender +transgress +transgression +transgressor +transhipment +transient +transit +transition +transitional +transitory +transkei +translate +translation +transmission +transmit +transnational +transnistria +transnistrian +transparency +transparent +transparently +transpire +transplant +transplanted +transport +transportation +transpose +transregional +transshipment +traore +trap +trapping +trauma +traumatic +traumatize +travail +travel +traveler +traveller +traverse +travesty +trawl +trawling +tre +treacherous +treachery +tread +treason +treasure +treasury +treat +treatable +treatise +treatment +treaty +treble +tree +treki +tremble +tremendous +tremendously +tremor +trench +trend +trepczynski +trepczyriski +trepidation +tri +triad +trial +triangle +triangular +tribal +tribalism +tribe +tribulation +tribunal +tribune +tributary +tribute +trick +trickery +trickle +trie +triennial +trigger +trilateral +trillion +trilogy +trim +trinidad +trinity +trip +tripartite +triple +tripoli +trite +triumph +triumphalism +triumphant +triumphantly +trivial +troika +trojan +tromelin +troop +tropical +trouble +troubled +troublesome +troubling +truce +truck +true +truer +truism +truly +truman +trump +trumpet +trust +trusted +trustee +trusteeship +trustful +trustworthy +truth +truthful +truthfully +try +trygve +trying +tsarist +tse +tsetung +tskhinvali +tsunami +tsunamis +tuberculosis +tuesday +tug +tuition +tum +tumb +tumble +tumult +tumultuous +tuna +tunb +tune +tung +tuni +tunis +tunisia +tunisian +tunnel +turbulence +turbulent +turk +turkey +turkish +turkman +turkmen +turkmenistan +turmoil +turn +turnabout +turnaround +turner +turnhalle +turning +turnout +turnover +turquoise +tutelage +tutsi +tuvalu +tween +twelfth +twelve +twentieth +twenty +twice +twilight +twin +twist +twisted +two +twofold +type +typhoon +typical +typically +typify +tyrannical +tyranny +tyrant +tyrol +tyrolean +ubiquitous +udovenko +uganda +ugandan +ugly +ukraine +ukrainian +ulaanbaatar +ulterior +ultimate +ultimately +ultimatum +ultra +umbrella +ummah +unabashed +unabashedly +unabate +unabated +unable +unacceptability +unacceptable +unacceptably +unaccounted +unachievable +unaddresse +unaddressed +unaffected +unaid +unalterable +unaltered +unambiguous +unambiguously +unamid +unamir +unamsil +unanimity +unanimous +unanimously +unanswered +unanticipated +unarmed +unashamedly +unassailable +unasur +unattainable +unattained +unattended +unauthorized +unavailability +unavem +unavoidable +unavoidably +unaware +unbalanced +unbanning +unbearable +unbelievable +unbelievably +unbiased +unblock +unborn +unbounded +unbreakable +unbridgeable +unbridle +unbridled +unbroken +uncaring +uncdf +uncease +unceasing +unceasingly +unced +uncertain +uncertainty +unchallengeable +unchallenged +unchangeable +unchanged +unchanging +uncharted +unchecked +uncivil +uncivilized +unclear +unclo +uncomfortable +uncommitted +uncommon +uncompetitive +uncompromise +uncompromising +uncompromisingly +unconcerned +unconditional +unconditionally +unconnected +unconquerable +unconscionable +unconstitutional +unconstructive +uncontested +uncontrollable +uncontrolle +uncontrolled +unconventional +unconvincing +uncooperative +uncoordinated +uncover +unctad +unctadj +und +undaunte +undaunted +undcp +undecided +undeclared +undefined +undemocratic +undeniable +undeniably +undercurrent +undercut +underdeveloped +underdevelopment +underemployed +underemployment +underestimate +underfoot +undergird +undergo +undergone +underground +underhanded +underlay +underlie +underline +underlying +undermine +undermining +underneath +undernourished +undernourishment +underpin +underpinning +underprivileged +underrate +underrepresented +underscore +underscored +understand +understandable +understandably +understanding +understate +undertake +undertaking +undertone +undervalue +underwater +underway +underworld +underwrite +undeserved +undesirable +undeterred +undeveloped +undiluted +undiminishe +undiminished +undisguise +undisguised +undisputed +undisturbed +undivided +undo +undocumented +undof +undoubted +undoubtedly +undp +undreame +undue +unduly +undying +unease +uneasiness +uneasy +uneconomic +uneducated +unemployed +unemployment +unende +unending +unenviable +unep +unequal +unequalled +unequally +unequivocal +unequivocally +unesco +unethical +uneven +unevenly +unevenness +unexpected +unexpectedly +unexploded +unexploited +unexplored +unfaile +unfailing +unfailingly +unfair +unfairly +unfairness +unfaltere +unfamiliar +unfathomable +unfavorable +unfavourable +unfavourably +unfccc +unfeasible +unfettered +unficyp +unfinished +unfit +unflagge +unflagging +unflaggingly +unflinche +unflinchingly +unfold +unfolding +unforeseeable +unforeseen +unforgettable +unforgivable +unfortunate +unfortunately +unfounded +unfpa +unfriendly +unfulfilled +ungovernable +unhampere +unhappily +unhappiness +unhappy +unhcr +unhealthy +unheard +unheeded +unhelpful +unhesitatingly +unhindered +unholy +unicef +unidad +unidentified +unido +unifem +unification +unified +unifil +uniform +uniformed +uniformity +uniformly +unify +unifying +unilateral +unilateralism +unilaterally +unimaginable +unimagined +unimpeachable +unimpeded +unimplemented +unimportant +uninhabitable +uninhabited +uninhibited +unintended +unintentional +unintentionally +uninterrupted +uninterruptedly +uninvited +uninvolved +union +unionism +unionist +unipolar +unipolarity +unique +uniquely +uniqueness +unison +unit +unita +unitaid +unitar +unitary +unite +uniting +unity +universal +universalism +universalist +universality +universalization +universalize +universally +universe +university +união +unjust +unjustifiable +unjustifiably +unjustified +unjustly +unknown +unlawful +unlawfully +unleash +unleashed +unleashing +unless +unlike +unlikely +unlimited +unlock +unmanageable +unmask +unmasked +unmatched +unmee +unmet +unmik +unmil +unmindful +unmistakable +unmistakably +unmitigated +unmove +unnatural +unnecessarily +unnecessary +unnoticed +unobstructed +unofficial +unomil +unosom +unpaid +unpalatable +unparalleled +unpardonable +unpatriotic +unpayable +unplanned +unpleasant +unpopular +unprecedented +unprecedentedly +unpredep +unpredictability +unpredictable +unprejudiced +unprepared +unprincipled +unproductive +unprofor +unprotected +unprovoked +unpunished +unqualified +unquenchable +unquestionable +unquestionably +unquestione +unquestioned +unravel +unravelling +unreal +unrealistic +unrealized +unreasonable +unreasonableness +unreasonably +unregulated +unrelated +unrelenting +unrelentingly +unreliable +unremitting +unremittingly +unrepentant +unreported +unrepresentative +unrepresented +unreserved +unreservedly +unresolved +unresponsive +unrest +unrestrained +unrestricted +unruly +unrwa +unsafe +unsatisfactory +unsatisfied +unscathed +unscom +unscrupulous +unscrupulously +unseemly +unseen +unselfish +unselfishly +unselfishness +unsettle +unsettled +unsettling +unshakable +unshakeable +unshaken +unsolved +unsound +unspeakable +unstable +unsteady +unstinted +unstinting +unstintingly +unstoppable +unsuccessful +unsuccessfully +unsuitable +unsuited +unsung +unsure +unsuspected +unsuspecting +unsustainable +unswerve +unswervingly +untac +untag +untapped +untenable +unthinkable +untie +untied +untimely +untire +untiring +untiringly +unto +untold +untouchable +untouched +untoward +untrammelled +untrue +unturned +unusable +unused +unusual +unusually +unveil +unwanted +unwarranted +unwavere +unwavering +unwaveringly +unwelcome +unwholesome +unwieldy +unwilling +unwillingly +unwillingness +unwise +unwitting +unwittingly +unworkable +unworthy +unyielde +unyielding +upcoming +update +updating +upgrade +upgrading +upheaval +uphill +uphold +upholder +upholding +upkeep +uplift +uplifting +upon +upper +uppermost +upright +uprising +uproot +uprooted +uprooting +upset +upsetting +upshot +upside +upstream +upsurge +upswing +upturn +upward +upwards +ural +uranium +urban +urbanization +urge +urgency +urgent +urgently +urging +urng +uruguay +uruguayan +usable +usage +use +useful +usefully +usefulness +useless +uselessly +uselessness +user +usher +ushering +ussr +usual +usually +usurp +usurpation +usurped +usurper +usurping +uthant +uti +utilisation +utilise +utility +utilization +utilize +utmost +utopia +utopian +utopias +utter +utterance +utterly +uzbek +uzbekistan +vacant +vacate +vaccinate +vaccination +vaccine +vacillate +vacillation +vaclav +vacuum +vagary +vague +vagueness +vain +vainly +valery +valiant +valiantly +valid +validate +validity +validly +valletta +valley +valor +valour +valuable +value +valve +van +vance +vancouver +vandalism +vanguard +vanish +vanity +vano +vanquish +vanquished +vantage +vanuatu +variability +variable +variance +variant +variation +varied +variety +various +variously +vary +varying +vassal +vast +vastly +vastness +vatican +vault +vaunt +vaunted +vector +veer +vegetable +vegetation +vehemence +vehement +vehemently +vehicle +veil +veiled +vein +venda +vendetta +venerable +venerate +venezuela +venezuelan +vengeance +venice +vent +vention +venture +venue +veracity +verbal +verbally +verbatim +verdant +verde +verdean +verdict +verge +verifiable +verification +verify +verily +veritable +verner +versa +versaille +versailles +versatile +verse +version +versus +vertical +vertically +vessel +vest +vested +vestige +vet +veteran +veto +vex +vexing +via +viability +viable +vibrancy +vibrant +vibration +vice +vicinity +vicious +viciously +vicissitude +victim +victimization +victimize +victor +victoria +victorious +victoriously +victory +video +vie +vieira +vienna +vientiane +viet +vietnam +vietnamese +vietnamization +view +viewpoint +vigilance +vigilant +vigor +vigorous +vigorously +vigour +vile +vilification +vilify +village +villager +villain +vilnius +vincent +vindicate +vindication +violate +violation +violator +violence +violent +violently +virgilio +virgin +virtual +virtually +virtue +virtuous +virulence +virulent +virus +vis +visa +visibility +visible +visibly +vision +visionary +visit +visitor +vista +visualize +vital +vitality +vitally +vitiate +vivendi +vivid +vividly +vladimir +vladivostok +vll +vocabulary +vocal +vocation +vocational +vociferous +vociferously +vogue +voice +voiceless +void +vojvodina +vol +volatile +volatility +volcanic +volcano +volcker +volition +volta +volume +voluminous +voluntarily +voluntary +volunteer +von +voracious +vorster +vortex +vote +voter +voting +vow +voyage +vuk +vulgar +vulnerability +vulnerable +vulture +václav +wade +wage +wager +waging +wail +wait +waiting +waive +waiver +wake +waldheim +wale +walk +wall +wallow +walter +walvis +wan +wand +wander +wanderer +wandering +wane +wangchuck +want +wanton +wantonly +war +ward +ware +warehouse +warfare +warhead +warlike +warlord +warm +warming +warmly +warmonger +warmongere +warmongering +warmth +warn +warning +warp +warped +warplane +warrant +warrior +warsaw +warship +wartime +wary +wash +washington +wastage +waste +wasteful +wastefulness +wasteland +wasting +watch +watchdog +watchful +watchword +water +watercourse +watershed +watertight +waterway +wave +waver +way +wayside +weak +weaken +weakened +weakening +weakness +weal +wealth +wealthy +wean +weapon +weaponry +weapons +wear +weariness +weary +weather +weave +web +website +wechmar +wed +wedge +wednesday +weed +week +weekend +weekly +weep +weigh +weight +weighted +weighty +wel +welcome +welcoming +welfare +well +wellbee +wellbeing +wellspring +welter +west +western +westgate +wet +weu +wfp +whale +whatever +whatsoever +wheat +wheel +whence +whenever +whereas +whereby +wherein +wherever +wherewithal +whether +whichever +whilst +whim +whip +whirlwind +white +whittle +whoever +whole +wholehearte +wholeheartedly +wholesale +wholesome +wholly +whomever +whose +wicked +wide +widely +widen +widening +wider +widespread +widow +width +wield +wife +wild +wilderness +wildfire +wildlife +wildly +wilful +wilfully +willful +willfully +william +williamsburg +willing +willingly +willingness +willy +wilson +wilt +win +wind +windhoek +window +windward +wine +wing +winner +winston +winter +wipe +wire +wiriyamu +wisdom +wise +wisely +wiser +wish +wisher +wishful +wit +witch +withdraw +withdrawal +wither +withering +withhold +withholding +within +without +withstand +witness +wittingly +wmds +woe +woeful +woefully +wojciech +wolf +woman +womb +women +wonder +wonderful +woo +wood +woodrow +woods +word +wording +work +workable +worker +workers +workforce +working +workload +workplace +workshop +worldwide +worldâs +worldí +worldís +worried +worrisome +worry +worrying +worse +worsen +worsening +worship +worst +worth +worthily +worthiness +worthless +worthwhile +worthy +would +wound +wounded +wounding +wrack +wrangle +wrangling +wrap +wrath +wreak +wreck +wrench +wrest +wrestle +wretched +wretchedness +wring +writ +write +writer +writing +wrongdoing +wrongful +wrongfully +wrongly +wto +wye +xanana +xenophobia +xenophobic +xix +xlix +xvi +xvii +xviii +xxi +xxii +xxiii +xxiv +xxix +xxv +xxvi +xxvii +xxviii +xxx +yacyreta +yahya +yalta +yamoussoukro +yangon +yankee +yaounde +yaoundé +yard +yardstick +yasser +yassin +yasushi +yawn +yayi +yazidi +yearly +yearn +yearning +years +yellow +yeltsin +yemen +yemeni +yen +yerevan +yesterday +yesteryear +yet +yield +yitzhak +yoke +yom +york +young +youngster +youth +youthful +yugoslav +yugoslavia +yuri +zagreb +zaire +zairian +zambia +zambian +zation +zayed +zeal +zealand +zealander +zealous +zealously +zelaya +zenawi +zenith +zepa +zero +zhivkov +zia +ziaur +zimbabwe +zimbabwean +zine +zion +zionism +zionist +zonal +zone +zuma +zurich +état +être +рlo diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000000000000000000000000000000000000..afee23ff63e49951556ca5ffc4e0c3b330fb82c6 --- /dev/null +++ b/environment.yml @@ -0,0 +1,27 @@ +name: dtect +channels: + - defaults + - conda-forge + - pytorch +dependencies: + - python=3.9 + - pip + - pip: + - anthropic==0.55.0 + - faiss-cpu==1.11.0 + - gensim==3.8.3 + - langchain==0.3.26 + - langchain-community==0.3.26 + - langchain-core==0.3.66 + - langchain-google-genai==2.1.5 + - langchain-openai==0.3.25 + - octis==1.14.0 + - plotly==6.1.2 + - scikit-learn==1.1.0 + - scipy==1.10.1 + - sentence-transformers==4.1.0 + - streamlit==1.46.0 + - tiktoken==0.9.0 + - torch==2.7.1 + - torchvision==0.22.1 + diff --git a/main.py b/main.py new file mode 100644 index 0000000000000000000000000000000000000000..31354ec1389994b5f6708c7d915fdcc6bb76ba6e --- /dev/null +++ b/main.py @@ -0,0 +1 @@ +_ diff --git a/requirements.txt b/requirements.txt index 28d994e22f8dd432b51df193562052e315ad95f7..4a0bc09aab19930a5e572fcc114f50942b1cd5d3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,19 @@ altair pandas -streamlit \ No newline at end of file +anthropic==0.55.0 +faiss-cpu==1.11.0 +gensim +langchain==0.3.26 +langchain-community==0.3.26 +langchain-core==0.3.66 +langchain-google-genai==2.1.5 +langchain-openai==0.3.25 +octis +plotly==6.1.2 +scikit-learn==1.1.0 +scipy==1.10.1 +sentence-transformers==4.1.0 +streamlit==1.46.0 +tiktoken==0.9.0 +torch==2.7.1 +torchvision==0.22.1