Spaces:

Awlly
/

NLP_app

Sleeping

App Files Files Community

Awlly commited on Feb 2, 2024

Commit

a15e210

1 Parent(s): 67ee02a

first

Browse files

Files changed (32) hide show

__pycache__/preprocessing.cpython-310.pyc +0 -0
app_models/__pycache__/bag_of_words_MODEL.cpython-310.pyc +0 -0
app_models/__pycache__/gpt_MODEL.cpython-310.pyc +0 -0
app_models/__pycache__/lstm_MODEL.cpython-310.pyc +0 -0
app_models/__pycache__/rubert_MODEL.cpython-310.pyc +0 -0
app_models/__pycache__/toxicity_MODEL.cpython-310.pyc +0 -0
app_models/bag_of_words_MODEL.py +20 -0
app_models/gpt_MODEL.py +37 -0
app_models/lstm_MODEL.py +90 -0
app_models/rubert_MODEL.py +33 -0
app_models/toxicity_MODEL.py +20 -0
app_pages/__pycache__/page1_model_comparison.cpython-310.pyc +0 -0
app_pages/__pycache__/page2_rubert_toxicity.cpython-310.pyc +0 -0
app_pages/__pycache__/page3_gpt_model.cpython-310.pyc +0 -0
app_pages/page1_model_comparison.py +21 -0
app_pages/page2_rubert_toxicity.py +20 -0
app_pages/page3_gpt_model.py +14 -0
main_app.py +14 -0
model_data/bow_model.joblib +3 -0
model_data/bow_vectorizer.joblib +3 -0
model_data/finetuned_gpt/config.json +41 -0
model_data/finetuned_gpt/generation_config.json +7 -0
model_data/finetuned_gpt/merges.txt +0 -0
model_data/finetuned_gpt/model.safetensors +3 -0
model_data/finetuned_gpt/special_tokens_map.json +37 -0
model_data/finetuned_gpt/tokenizer_config.json +58 -0
model_data/finetuned_gpt/vocab.json +0 -0
model_data/logreg_model_v2.joblib +3 -0
model_data/lstm_model.pth +3 -0
model_data/vocab_kinopoisk_lstm.json +0 -0
preprocessing.py +57 -0
requirements.txt +115 -0

__pycache__/preprocessing.cpython-310.pyc ADDED Viewed

Binary file (2.32 kB). View file

app_models/__pycache__/bag_of_words_MODEL.cpython-310.pyc ADDED Viewed

Binary file (630 Bytes). View file

app_models/__pycache__/gpt_MODEL.cpython-310.pyc ADDED Viewed

Binary file (1.08 kB). View file

app_models/__pycache__/lstm_MODEL.cpython-310.pyc ADDED Viewed

Binary file (3.49 kB). View file

app_models/__pycache__/rubert_MODEL.cpython-310.pyc ADDED Viewed

Binary file (1.43 kB). View file

app_models/__pycache__/toxicity_MODEL.cpython-310.pyc ADDED Viewed

Binary file (985 Bytes). View file

app_models/bag_of_words_MODEL.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import streamlit as st
+import joblib
+from preprocessing import data_preprocessing
+# Load your trained BoW model and vectorizer
+vectorizer_path = 'model_data/bow_vectorizer.joblib'
+model_path = 'model_data/bow_model.joblib'
+vectorizer = joblib.load(vectorizer_path)
+model = joblib.load(model_path)
+# Streamlit UI
+def predict(input):
+    processed_text = data_preprocessing(input)
+    user_input_bow = vectorizer.transform([processed_text])
+    # Make a prediction
+    prediction = model.predict(user_input_bow)
+    return prediction
+# User text input

app_models/gpt_MODEL.py ADDED Viewed

	@@ -0,0 +1,37 @@

+from transformers import GPT2LMHeadModel, GPT2Tokenizer
+import torch
+# Load the model and tokenizer
+model_path = '/home/owly/ds_bootcamp/phase_2/NLP_processing/misc/finetuned_model'
+tokenizer = GPT2Tokenizer.from_pretrained(model_path)
+model = GPT2LMHeadModel.from_pretrained(model_path)
+# Move model to GPU if available
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model.to(device)
+def generate_text(prompt_text, length, temperature):
+    encoded_prompt = tokenizer.encode(prompt_text, add_special_tokens=False, return_tensors="pt")
+    encoded_prompt = encoded_prompt.to(device)
+    output_sequences = model.generate(
+        input_ids=encoded_prompt,
+        max_length=length,
+        temperature=temperature,
+        top_k=20,
+        top_p=0.9,
+        repetition_penalty=1.2,
+        do_sample=True,
+        num_return_sequences=1,
+    )
+    # Decode the generated text
+    generated_sequence = output_sequences[0].tolist()
+    text = tokenizer.decode(generated_sequence, clean_up_tokenization_spaces=True)
+    # Remove the prompt from the generated text
+    text = text[len(tokenizer.decode(encoded_prompt[0], clean_up_tokenization_spaces=True)) :]
+    return text.strip()
+# Streamlit interface

app_models/lstm_MODEL.py ADDED Viewed

	@@ -0,0 +1,90 @@

+import json
+import torch
+import torch.nn as nn
+from dataclasses import dataclass
+from preprocessing import preprocess_single_string
+with open('model_data/vocab_kinopoisk_lstm.json', 'r') as file:
+    vocab_to_int = json.load(file)
+@dataclass
+class ConfigRNN:
+    vocab_size: int
+    device : str
+    n_layers : int
+    embedding_dim : int
+    hidden_size : int
+    seq_len : int
+    bidirectional : bool or int
+net_config = ConfigRNN(
+    vocab_size = len(vocab_to_int)+1,
+    device='cpu',
+    n_layers=3,
+    embedding_dim=64,
+    hidden_size=64,
+    seq_len = 100,
+    bidirectional=False
+)
+class LSTMClassifier(nn.Module):
+    def __init__(self, rnn_conf = net_config) -> None:
+        super().__init__()
+        self.embedding_dim   = rnn_conf.embedding_dim
+        self.hidden_size     = rnn_conf.hidden_size
+        self.bidirectional   = rnn_conf.bidirectional
+        self.n_layers        = rnn_conf.n_layers
+        self.embedding = nn.Embedding(rnn_conf.vocab_size, self.embedding_dim)
+        self.lstm = nn.LSTM(
+            input_size    = self.embedding_dim,
+            hidden_size   = self.hidden_size,
+            bidirectional = self.bidirectional,
+            batch_first   = True,
+            num_layers    = self.n_layers
+        )
+        self.bidirect_factor = 2 if self.bidirectional else 1
+        self.clf = nn.Sequential(
+            nn.Linear(self.hidden_size * self.bidirect_factor, 32),
+            nn.Tanh(),
+            nn.Dropout(),
+            nn.Linear(32, 3)
+        )
+    def model_description(self):
+        direction = 'bidirect' if self.bidirectional else 'onedirect'
+        return f'lstm_{direction}_{self.n_layers}'
+    def forward(self, x: torch.Tensor):
+        embeddings = self.embedding(x)
+        out, _ = self.lstm(embeddings)
+        out = out[:, -1, :] # [все элементы батча, последний h_n, все элементы последнего h_n]
+        out = self.clf(out.squeeze())
+        return out
+def load_lstm_model():
+    model = LSTMClassifier()
+    model.load_state_dict(torch.load('model_data/lstm_model.pth'))
+    model.eval()
+    return model
+model = load_lstm_model()
+def predict_review(review_text, model=model, net_config=net_config, vocab_to_int=vocab_to_int):
+    sample = preprocess_single_string(review_text, net_config.seq_len, vocab_to_int)
+    model.eval()
+    with torch.no_grad():
+        output = model(sample.unsqueeze(0)).to(net_config.device)
+        if output.dim() == 1:
+            output = output.unsqueeze(0)  # Adjust if necessary
+            _, predicted_class = torch.max(output, dim=1)
+            if predicted_class.item() == 0:
+                return "Это положительный комментарий! Хорошо, что тебе понравился этот фильм! Можешь перейти в раздел с моделью GPT2 и обсудить с ней фильм!"
+            elif predicted_class.item() == 1:
+                return "Скорее всего... это комментарий нейтрального характера.. какой-то ты скучный..."
+            else:
+                return "Ты что такой токсик? Будь сдержанее, не понравился фильм - пройди мимо и не порьт авторам настроение, они же старались!"

app_models/rubert_MODEL.py ADDED Viewed

	@@ -0,0 +1,33 @@

+from transformers import AutoModel, AutoTokenizer
+import torch
+import numpy as np
+from sklearn.linear_model import LogisticRegression
+import joblib
+# Load RuBERT model and tokenizer
+rubert_model_name = "cointegrated/rubert-tiny2"  # Example model name, adjust as needed
+tokenizer = AutoTokenizer.from_pretrained(rubert_model_name)
+model = AutoModel.from_pretrained(rubert_model_name)
+# Load Logistic Regression model
+logreg_model_path = "/home/owly/ds_bootcamp/phase_2/NLP_processing/misc/logreg_model_v2.joblib"
+logreg_model = joblib.load(logreg_model_path)
+def embed_bert_cls(text, model, tokenizer):
+    """Generate embeddings for input text using the RuBERT model."""
+    inputs = tokenizer(text, padding=True, truncation=True, max_length=512, return_tensors="pt")
+    with torch.no_grad():
+        outputs = model(**inputs)
+    embeddings = outputs.last_hidden_state[:, 0, :]
+    embeddings = torch.nn.functional.normalize(embeddings)
+    return embeddings.cpu().numpy()
+def classify_text(text, model = model, tokenizer = tokenizer, classifier = logreg_model):
+    """Classify text as toxic or non-toxic using embeddings from RuBERT and Logistic Regression."""
+    embeddings = embed_bert_cls(text, model, tokenizer)
+    prediction = classifier.predict(embeddings)
+    dict_class = {0: 'Good', 1: 'Neutral', 2: 'Bad'}
+    return dict_class[prediction[0]]

app_models/toxicity_MODEL.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import torch
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+model_checkpoint = 'cointegrated/rubert-tiny-toxicity'
+tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
+model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint)
+if torch.cuda.is_available():
+    model.cuda()
+def text2toxicity(text, aggregate=True):
+    """ Calculate toxicity of a text (if aggregate=True) or a vector of toxicity aspects (if aggregate=False)"""
+    with torch.no_grad():
+        inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True).to(model.device)
+        proba = torch.sigmoid(model(**inputs).logits).cpu().numpy()
+    if isinstance(text, str):
+        proba = proba[0]
+    if aggregate:
+        return 1 - proba.T[0] * (1 - proba.T[-1])
+    return proba

app_pages/__pycache__/page1_model_comparison.cpython-310.pyc ADDED Viewed

Binary file (904 Bytes). View file

app_pages/__pycache__/page2_rubert_toxicity.cpython-310.pyc ADDED Viewed

Binary file (794 Bytes). View file

app_pages/__pycache__/page3_gpt_model.cpython-310.pyc ADDED Viewed

Binary file (845 Bytes). View file

app_pages/page1_model_comparison.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import streamlit as st
+from app_models.rubert_MODEL import classify_text
+from app_models.bag_of_words_MODEL import predict
+from app_models.lstm_MODEL import predict_review
+class_prefix =  'This review is likely...'
+def run():
+    st.title("Movie Review Classification")
+    st.write("This page will compare three models: Bag of Words/TF-IDF, LSTM, and BERT.")
+    # Example placeholder for user input
+    user_input = st.text_area("")
+    # Placeholder buttons for model selection
+    if st.button('Classify with BoW/TF-IDF'):
+        st.write(f'{class_prefix}{predict(user_input)}')
+    if st.button('Classify with LSTM'):
+        st.write(f'{class_prefix}{predict_review(user_input)}')
+    if st.button('Classify with ruBERT'):
+        st.write(f'{class_prefix}{classify_text(user_input)}')

app_pages/page2_rubert_toxicity.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import streamlit as st
+from app_models.toxicity_MODEL import text2toxicity
+def run():
+    st.title('Toxicity Detection')
+    st.write('This tool classifies text as toxic or non-toxic using RuBERT.')
+    user_input = st.text_area("Enter text to classify", "Type your text here...")
+    if st.button('Classify'):
+        toxicity_score = text2toxicity(user_input)
+        st.write('Toxicity score:', toxicity_score)
+        # Optional: Interpret the score for the user
+        if toxicity_score > 0.5:
+            st.write("This text is likely to be considered toxic.")
+        else:
+            st.write("This text is likely to be considered non-toxic.")

app_pages/page3_gpt_model.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import streamlit as st
+from app_models.gpt_MODEL import generate_text
+def run():
+    st.title('GPT Text Generation')
+    prompt_text = st.text_area("Input Text", "Type here...")
+    length = st.slider("Length of Generated Text", min_value=50, max_value=500, value=200)
+    temperature = st.slider("Temperature", min_value=0.1, max_value=1.0, value=0.7, step=0.1)
+    if st.button('Generate Text'):
+        with st.spinner('Generating...'):
+            generated_text = generate_text(prompt_text, length, temperature)
+            st.text_area("Generated Text", generated_text, height=250)

main_app.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import streamlit as st
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+from app_pages import page1_model_comparison, page2_rubert_toxicity, page3_gpt_model
+st.sidebar.title('Navigation')
+selection = st.sidebar.radio("Go to", ["Model Comparison", "RuBERT Toxicity Detection", "GPT Model"])
+if selection == "Model Comparison":
+    page1_model_comparison.run()
+elif selection == "RuBERT Toxicity Detection":
+    page2_rubert_toxicity.run()
+elif selection == "GPT Model":
+    page3_gpt_model.run()

model_data/bow_model.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f5b6a472e5fd4a44099fdde129a19e7fdf4f7c078d88b9fb53bd0ed4508a46ac
+size 3942479

model_data/bow_vectorizer.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d760947efd85a7b54b7f17c20215386cda613e8329a1c55a76cc8e4707faae19
+size 4126902

model_data/finetuned_gpt/config.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+  "_name_or_path": "sberbank-ai/rugpt3small_based_on_gpt2",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 1,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "id2label": {
+    "0": "LABEL_0"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "LABEL_0": 0
+  },
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 2048,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 2048,
+  "pad_token_id": 0,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.37.2",
+  "use_cache": true,
+  "vocab_size": 50264
+}

model_data/finetuned_gpt/generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "pad_token_id": 0,
+  "transformers_version": "4.37.2"
+}

model_data/finetuned_gpt/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model_data/finetuned_gpt/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9cf4a373f976b99cfdc892f83394728c1d04a62d45b0be7e923fcb9b4128d6ba
+size 500941440

model_data/finetuned_gpt/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

model_data/finetuned_gpt/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "<mask>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "</s>",
+  "errors": "replace",
+  "mask_token": "<mask>",
+  "model_max_length": 2048,
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "tokenizer_class": "GPT2Tokenizer",
+  "truncation_side": "left",
+  "trust_remote_code": false,
+  "unk_token": "<unk>"
+}

model_data/finetuned_gpt/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

model_data/logreg_model_v2.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:92d11a87982a5d6f81eb49df6ceb5640aedefbf692df41f1c03b201c8bd7f032
+size 8383

model_data/lstm_model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ba752f56cf91b275b17d9e7f774661db66297e074d2cfedf865645dce1045b43
+size 4496930

model_data/vocab_kinopoisk_lstm.json ADDED Viewed

The diff for this file is too large to render. See raw diff

preprocessing.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import re
+import string
+import numpy as np
+import torch
+import unicodedata
+from nltk.corpus import stopwords
+stop_words = set(stopwords.words('russian', 'english'))
+def data_preprocessing(text: str) -> str:
+    text = text.lower()
+    text = text.replace('-', ' ').replace('\n', ' ')
+    text = re.sub('<.*?>', '', text)
+    text = ''.join([c for c in text if unicodedata.category(c).startswith(('L', 'N', 'Z')) or c == "'"])
+    text = ' '.join([word for word in text.split() if word.lower() not in stop_words])
+    text = ' '.join([word for word in text.split() if not word.isdigit()])
+    return text
+def get_words_by_freq(sorted_words: list, n: int = 10) -> list:
+    return list(filter(lambda x: x[1] > n, sorted_words))
+def padding(review_int: list, seq_len: int) -> np.array: # type: ignore
+    features = np.zeros((len(review_int), seq_len), dtype = int)
+    for i, review in enumerate(review_int):
+        if len(review) <= seq_len:
+            zeros = list(np.zeros(seq_len - len(review)))
+            new = zeros + review
+        else:
+            new = review[: seq_len]
+        features[i, :] = np.array(new)
+    return features
+def preprocess_single_string(
+    input_string: str,
+    seq_len: int,
+    vocab_to_int: dict,
+    verbose : bool = False
+    ) -> torch.tensor:
+    preprocessed_string = data_preprocessing(input_string)
+    result_list = []
+    for word in preprocessed_string.split():
+        try:
+            result_list.append(vocab_to_int[word])
+        except KeyError as e:
+            if verbose:
+                print(f'{e}: not in dictionary!')
+            pass
+    result_padded = padding([result_list], seq_len)[0]
+    return torch.tensor(result_padded)

requirements.txt ADDED Viewed

	@@ -0,0 +1,115 @@

+accelerate==0.26.1
+altair==5.2.0
+asttokens==2.4.1
+attrs==23.2.0
+blinker==1.7.0
+cachetools==5.3.2
+certifi==2023.11.17
+charset-normalizer==3.3.2
+click==8.1.7
+comm==0.2.1
+contourpy==1.2.0
+cycler==0.12.1
+debugpy==1.8.0
+decorator==5.1.1
+exceptiongroup==1.2.0
+executing==2.0.1
+filelock==3.13.1
+fonttools==4.47.2
+fsspec==2023.12.2
+gensim==4.3.2
+gitdb==4.0.11
+GitPython==3.1.41
+huggingface-hub==0.20.3
+idna==3.6
+imbalanced-learn==0.12.0
+imblearn==0.0
+importlib-metadata==7.0.1
+ipykernel==6.29.0
+ipython==8.21.0
+jedi==0.19.1
+Jinja2==3.1.3
+joblib==1.3.2
+jsonlines==4.0.0
+jsonschema==4.21.1
+jsonschema-specifications==2023.12.1
+jupyter_client==8.6.0
+jupyter_core==5.7.1
+kiwisolver==1.4.5
+lightning-utilities==0.10.1
+markdown-it-py==3.0.0
+MarkupSafe==2.1.4
+matplotlib==3.8.2
+matplotlib-inline==0.1.6
+mdurl==0.1.2
+mpmath==1.3.0
+nest-asyncio==1.6.0
+networkx==3.2.1
+nltk==3.8.1
+numpy==1.26.3
+nvidia-cublas-cu12==12.1.3.1
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-cuda-nvrtc-cu12==12.1.105
+nvidia-cuda-runtime-cu12==12.1.105
+nvidia-cudnn-cu12==8.9.2.26
+nvidia-cufft-cu12==11.0.2.54
+nvidia-curand-cu12==10.3.2.106
+nvidia-cusolver-cu12==11.4.5.107
+nvidia-cusparse-cu12==12.1.0.106
+nvidia-nccl-cu12==2.19.3
+nvidia-nvjitlink-cu12==12.3.101
+nvidia-nvtx-cu12==12.1.105
+packaging==23.2
+pandas==2.2.0
+parso==0.8.3
+pexpect==4.9.0
+pillow==10.2.0
+platformdirs==4.2.0
+prompt-toolkit==3.0.43
+protobuf==4.25.2
+psutil==5.9.8
+ptyprocess==0.7.0
+pure-eval==0.2.2
+pyarrow==15.0.0
+pydeck==0.8.1b0
+Pygments==2.17.2
+pyparsing==3.1.1
+python-dateutil==2.8.2
+pytz==2023.4
+PyYAML==6.0.1
+pyzmq==25.1.2
+referencing==0.33.0
+regex==2023.12.25
+requests==2.31.0
+rich==13.7.0
+rpds-py==0.17.1
+safetensors==0.4.2
+scikit-learn==1.4.0
+scipy==1.12.0
+six==1.16.0
+smart-open==6.4.0
+smmap==5.0.1
+stack-data==0.6.3
+streamlit==1.30.0
+sympy==1.12
+tenacity==8.2.3
+threadpoolctl==3.2.0
+tokenizers==0.15.1
+toml==0.10.2
+toolz==0.12.1
+torch==2.2.0
+torchmetrics==1.3.0.post0
+torchutils==0.0.4
+tornado==6.4
+tqdm==4.66.1
+traitlets==5.14.1
+transformers==4.37.2
+triton==2.2.0
+typing_extensions==4.9.0
+tzdata==2023.4
+tzlocal==5.2
+urllib3==2.2.0
+validators==0.22.0
+watchdog==3.0.0
+wcwidth==0.2.13
+zipp==3.17.0