Spaces:

sudharshan106
/

rprecommender

Paused

+import streamlit as st
+import cohere
+import numpy as np
+import pandas as pd
+from qdrant_client.http import models
+# import warnings
+# warnings.filterwarnings('ignore')
+import qdrant_client
+import easynmt
+# from config import CONFIG
+model_translation = easynmt.EasyNMT('m2m_100_418M')# mbart50_en2m
+model_type = "small"
+cohere_api_key = st.secrets["COHERE_API_KEY"]
+QDRANT_URL = st.secrets["QDRANT_URL"]
+QDRANT_API_KEY = st.secrets["QDRANT_API_KEY"]
+ds = pd.read_csv('data/dataarxivfinal.csv')
+print(ds.shape)
+cohere_client = cohere.Client(api_key=cohere_api_key)
+embeddings = np.load("embedding_model_comp.npz")['a']
+collection_name = "my_collection"
+distance = models.Distance.COSINE
+client = qdrant_client.QdrantClient(
+    url= QDRANT_URL,
+    api_key=QDRANT_API_KEY,
+)
+# Create Qdrant collection and upload the Embeddings
+button_for_upload = st.sidebar.button('Load')
+if button_for_upload:
+    with st.spinner("Loading Models"):
+        collection_id = client.recreate_collection(collection_name = collection_name,
+                                            vectors_config= models.VectorParams(size=embeddings.shape[1], distance=distance))
+        vectors=[list(map(float, vector)) for vector in embeddings]
+        ids = []
+        for i, j in enumerate(embeddings):
+            ids.append(i)
+        client.upload_collection(
+            collection_name=collection_name,
+            ids=ids,
+            vectors=vectors,
+            batch_size=128
+            )
+article_rec_type = st.sidebar.selectbox(
+    "Recommend article type by",
+    ( "Article Name", "Article Content", "Article Translator", "Article Summarizer")
+)
+def article_summarizer():
+    col1, col2 = st.columns(2)
+    summarize_decision  = st.button('Summarize')
+    with col1:
+        with st.expander("Input text"):
+            prompt = st.text_area("Paste the sentence that needs to be Summarized")
+    with col2:
+        with st.expander("Summarized texts"):
+            if summarize_decision:
+                response = cohere_client.generate(
+                                    model='xlarge',
+                                    prompt = prompt,
+                                    max_tokens=512,
+                                    temperature=0.6,
+                                    k=0,
+                                    p=1,
+                                    frequency_penalty=0,
+                                    presence_penalty=0,
+                                    stop_sequences=["--"],truncate="end"
+                                    )
+                summary = response.generations[0].text
+                st.write(summary)
+language_dict =  {"Tamil":"ta", "Nepali":"ne", "Indonesian":"id", "Thai":"th","Spanish":"es", "Russian":"ru", "Turkish":"tr", "French":"fr"}
+def article_translator():
+    col1, col2 = st.columns(2)
+    language = st.sidebar.selectbox(
+    "Select Language",
+    ( "Tamil", "Nepali", "Indonesian", "Thai","Spanish", "Russian", "Turkish", "French")
+    )
+    translate_decision  = st.button('Translate')
+    with col1:
+        with st.expander("Input text"):
+            text = st.text_area("Paste the sentence that needs to be Translated")
+    with col2:
+        with st.expander("Translated text"):
+            if translate_decision:
+                result = model_translation.translate(text, target_lang=language_dict[language])
+                st.write(result)
+def article_name():
+    title = st.selectbox('Article Name', options=tuple(ds['title'].values))
+    top_k = st.slider("Number of recommendations", 1, 10, step=1)
+    button = st.button('Predict')
+    if button:
+        query_to_ = ds[ds['title']==title].head(1)['abstract'].values[0]
+        query_vector = cohere_client.embed([query_to_], model=model_type, truncate="RIGHT").embeddings[0]
+        query_vector = list(map(float, query_vector))
+        search_result = client.search(collection_name=collection_name, query_vector=query_vector,limit=top_k)
+        similar_text_indices = [hit.id for hit in search_result]
+        score_ =  [record.score for record in search_result]
+        for j,i in enumerate(ds.iloc[similar_text_indices].iterrows()):
+            st.write(f"**{i[1]['title']}** score:{score_[j]}")
+def article_content():
+    search_decision  = st.button('Search')
+    with st.expander("Input text"):
+        query_to_ = st.text_area("Paste the Contents that need to be searched for")
+        top_k = st.slider("Number of recommendations", 1, 10, step=1)
+    if search_decision:
+        query_vector = cohere_client.embed([query_to_], model=model_type, truncate="RIGHT").embeddings[0]
+        query_vector = list(map(float, query_vector))
+        search_result = client.search(collection_name=collection_name, query_vector=query_vector,limit=top_k)
+        similar_text_indices = [hit.id for hit in search_result]
+        score_ =  [record.score for record in search_result]
+        for j,i in enumerate(ds.iloc[similar_text_indices].iterrows()):
+            st.write(f"**{i[1]['title']}** score:{score_[j]}")
+if article_rec_type=='Article Name':
+    article_name()
+elif article_rec_type == 'Article Translator':
+    article_translator()
+elif article_rec_type == "Article Summarizer":
+    article_summarizer()
+else:
+    article_content()

requirements.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+cohere==3.10.0
+EasyNMT==2.0.2
+fasttext==0.9.2
+nltk==3.8.1
+numba==0.56.4
+pandas==1.3.5
+qdrant-client==1.0.5
+regex==2022.10.31
+sentencepiece==0.1.97
+streamlit==1.20.0
+tokenizers==0.13.2
+torch==1.13.1
+tqdm==4.65.0
+transformers==4.27.1