Spaces:

fschwartzer
/

streamlit_chatbot

Running

App Files Files Community

fschwartzer commited on Sep 11, 2024

Commit

d8f6691

verified ·

1 Parent(s): b13ea5d

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -27

app.py CHANGED Viewed

@@ -2,8 +2,7 @@ import streamlit as st
 import pandas as pd
 import torch
 from transformers import pipeline
-#from transformers import TapasTokenizer, TapexTokenizer, BartForConditionalGeneration
-from transformers import AutoTokenizer, AutoModelForTableQuestionAnswering
 import datetime
 #df = pd.read_excel('discrepantes.xlsx', index_col='Unnamed: 0')
@@ -15,30 +14,29 @@ print(table_data.head())
 def response(user_question, table_data):
     a = datetime.datetime.now()
-    model_name = "google/tapas-base-finetuned-wtq"
-    model = AutoModelForTableQuestionAnswering.from_pretrained(model_name)
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    # The query should be passed as a list
-    encoding = tokenizer(table=table_data, queries=[user_question], padding=True, return_tensors="pt", truncation=True)
-    # Instead of using generate, we pass the encoding through the model to get the logits
-    outputs = model(**encoding)
-    # Extract the answer coordinates
-    predicted_answer_coordinates = outputs.logits.argmax(-1)
-    # Decode the answer from the table using the coordinates
-    answer = tokenizer.convert_logits_to_predictions(
-        encoding.data,
-        predicted_answer_coordinates
     )
-    # Process the answer into a readable format
-    answer_text = answer[0][0][0] if len(answer[0]) > 0 else "Não foi possível encontrar uma resposta"
     query_result = {
-        "Resposta": answer_text
     }
     b = datetime.datetime.now()
@@ -46,6 +44,7 @@ def response(user_question, table_data):
     return query_result
 # Streamlit interface
 st.markdown("""
 <div style='display: flex; align-items: center;'>
@@ -65,15 +64,15 @@ user_question = st.text_input("Escreva sua questão aqui:", "")
 if user_question:
     # Add person emoji when typing question
-    st.session_state['history'].append(('👤', user_question))
-    st.markdown(f"**👤 {user_question}**")
     # Generate the response
     bot_response = response(user_question, table_data)
     # Add robot emoji when generating response and align to the right
-    st.session_state['history'].append(('🤖', bot_response))
-    st.markdown(f"<div style='text-align: right'>**🤖 {bot_response}**</div>", unsafe_allow_html=True)
 # Clear history button
 if st.button("Limpar"):
@@ -81,7 +80,7 @@ if st.button("Limpar"):
 # Display chat history
 for sender, message in st.session_state['history']:
-    if sender == '👤':
-        st.markdown(f"**👤 {message}**")
-    elif sender == '🤖':
-        st.markdown(f"<div style='text-align: right'>**🤖 {message}**</div>", unsafe_allow_html=True)

 import pandas as pd
 import torch
 from transformers import pipeline
+from transformers import TapasTokenizer, TapexTokenizer, BartForConditionalGeneration
 import datetime
 #df = pd.read_excel('discrepantes.xlsx', index_col='Unnamed: 0')
 def response(user_question, table_data):
     a = datetime.datetime.now()
+    model_name = "microsoft/tapex-large-finetuned-wtq"
+    model = BartForConditionalGeneration.from_pretrained(model_name)
+    tokenizer = TapexTokenizer.from_pretrained(model_name)
+    queries = [user_question]
+    encoding = tokenizer(table=table_data, query=queries, padding=True, return_tensors="pt", truncation=True)
+    # Experiment with generation parameters
+    outputs = model.generate(
+        **encoding,
+        num_beams=5,            # Beam search to generate more diverse responses
+        top_k=50,               # Top-k sampling for diversity
+        top_p=0.95,             # Nucleus sampling
+        temperature=0.7,        # Temperature scaling (if supported by the model)
+        max_length=50,          # Limit the length of the generated response
+        early_stopping=True     # Stop generation when an end token is generated
     )
+    ans = tokenizer.batch_decode(outputs, skip_special_tokens=True)
     query_result = {
+        "Resposta": ans[0]
     }
     b = datetime.datetime.now()
     return query_result
 # Streamlit interface
 st.markdown("""
 <div style='display: flex; align-items: center;'>
 if user_question:
     # Add person emoji when typing question
+    st.session_state['history'].append(('??', user_question))
+    st.markdown(f"**?? {user_question}**")
     # Generate the response
     bot_response = response(user_question, table_data)
     # Add robot emoji when generating response and align to the right
+    st.session_state['history'].append(('??', bot_response))
+    st.markdown(f"<div style='text-align: right'>**?? {bot_response}**</div>", unsafe_allow_html=True)
 # Clear history button
 if st.button("Limpar"):
 # Display chat history
 for sender, message in st.session_state['history']:
+    if sender == '??':
+        st.markdown(f"**?? {message}**")
+    elif sender == '??':
+        st.markdown(f"<div style='text-align: right'>**?? {message}**</div>", unsafe_allow_html=True)