Spaces:
Paused
Paused
File size: 5,307 Bytes
f0f2cc2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
import streamlit as st
import cohere
import numpy as np
import pandas as pd
from qdrant_client.http import models
# import warnings
# warnings.filterwarnings('ignore')
import qdrant_client
import easynmt
# from config import CONFIG
model_translation = easynmt.EasyNMT('m2m_100_418M')# mbart50_en2m
model_type = "small"
cohere_api_key = st.secrets["COHERE_API_KEY"]
QDRANT_URL = st.secrets["QDRANT_URL"]
QDRANT_API_KEY = st.secrets["QDRANT_API_KEY"]
ds = pd.read_csv('data/dataarxivfinal.csv')
print(ds.shape)
cohere_client = cohere.Client(api_key=cohere_api_key)
embeddings = np.load("embedding_model_comp.npz")['a']
collection_name = "my_collection"
distance = models.Distance.COSINE
client = qdrant_client.QdrantClient(
url= QDRANT_URL,
api_key=QDRANT_API_KEY,
)
# Create Qdrant collection and upload the Embeddings
button_for_upload = st.sidebar.button('Load')
if button_for_upload:
with st.spinner("Loading Models"):
collection_id = client.recreate_collection(collection_name = collection_name,
vectors_config= models.VectorParams(size=embeddings.shape[1], distance=distance))
vectors=[list(map(float, vector)) for vector in embeddings]
ids = []
for i, j in enumerate(embeddings):
ids.append(i)
client.upload_collection(
collection_name=collection_name,
ids=ids,
vectors=vectors,
batch_size=128
)
article_rec_type = st.sidebar.selectbox(
"Recommend article type by",
( "Article Name", "Article Content", "Article Translator", "Article Summarizer")
)
def article_summarizer():
col1, col2 = st.columns(2)
summarize_decision = st.button('Summarize')
with col1:
with st.expander("Input text"):
prompt = st.text_area("Paste the sentence that needs to be Summarized")
with col2:
with st.expander("Summarized texts"):
if summarize_decision:
response = cohere_client.generate(
model='xlarge',
prompt = prompt,
max_tokens=512,
temperature=0.6,
k=0,
p=1,
frequency_penalty=0,
presence_penalty=0,
stop_sequences=["--"],truncate="end"
)
summary = response.generations[0].text
st.write(summary)
language_dict = {"Tamil":"ta", "Nepali":"ne", "Indonesian":"id", "Thai":"th","Spanish":"es", "Russian":"ru", "Turkish":"tr", "French":"fr"}
def article_translator():
col1, col2 = st.columns(2)
language = st.sidebar.selectbox(
"Select Language",
( "Tamil", "Nepali", "Indonesian", "Thai","Spanish", "Russian", "Turkish", "French")
)
translate_decision = st.button('Translate')
with col1:
with st.expander("Input text"):
text = st.text_area("Paste the sentence that needs to be Translated")
with col2:
with st.expander("Translated text"):
if translate_decision:
result = model_translation.translate(text, target_lang=language_dict[language])
st.write(result)
def article_name():
title = st.selectbox('Article Name', options=tuple(ds['title'].values))
top_k = st.slider("Number of recommendations", 1, 10, step=1)
button = st.button('Predict')
if button:
query_to_ = ds[ds['title']==title].head(1)['abstract'].values[0]
query_vector = cohere_client.embed([query_to_], model=model_type, truncate="RIGHT").embeddings[0]
query_vector = list(map(float, query_vector))
search_result = client.search(collection_name=collection_name, query_vector=query_vector,limit=top_k)
similar_text_indices = [hit.id for hit in search_result]
score_ = [record.score for record in search_result]
for j,i in enumerate(ds.iloc[similar_text_indices].iterrows()):
st.write(f"**{i[1]['title']}** score:{score_[j]}")
def article_content():
search_decision = st.button('Search')
with st.expander("Input text"):
query_to_ = st.text_area("Paste the Contents that need to be searched for")
top_k = st.slider("Number of recommendations", 1, 10, step=1)
if search_decision:
query_vector = cohere_client.embed([query_to_], model=model_type, truncate="RIGHT").embeddings[0]
query_vector = list(map(float, query_vector))
search_result = client.search(collection_name=collection_name, query_vector=query_vector,limit=top_k)
similar_text_indices = [hit.id for hit in search_result]
score_ = [record.score for record in search_result]
for j,i in enumerate(ds.iloc[similar_text_indices].iterrows()):
st.write(f"**{i[1]['title']}** score:{score_[j]}")
if article_rec_type=='Article Name':
article_name()
elif article_rec_type == 'Article Translator':
article_translator()
elif article_rec_type == "Article Summarizer":
article_summarizer()
else:
article_content()
|