Spaces:
Runtime error
Runtime error
| import os | |
| os.system("pip uninstall -y gradio") | |
| os.system("pip install gradio==3.31.0") | |
| import numpy as np | |
| from sentence_transformers import SentenceTransformer, models | |
| import faiss | |
| import nltk | |
| from nltk.tokenize import sent_tokenize, word_tokenize | |
| import openai | |
| import pickle | |
| import gradio as gr | |
| import base64 | |
| from pathlib import Path | |
| import pandas as pd | |
| import gzip | |
| openai.api_key = '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' | |
| nltk.download('punkt') | |
| # Load BERT model | |
| model = SentenceTransformer('multi-qa-MiniLM-L6-cos-v1') | |
| # Directory containing text files | |
| directory = "cleaned_files" | |
| # Define the index file name | |
| index_filename = "faiss.index" | |
| # Define the mapping file name | |
| mapping_filename = "mapping.pkl1" | |
| # Declare Textbox globally | |
| txt = gr.Textbox( | |
| label="Type your query here:", | |
| placeholder="What would you like to learn today?" | |
| ).style(container=True) | |
| def apply_html(text, color): | |
| if "<table>" in text and "</table>" in text: | |
| # If the text contains table tags, modify the table structure for Gradio | |
| table_start = text.index("<table>") | |
| table_end = text.index("</table>") + len("</table>") | |
| table_content = text[table_start:table_end] | |
| # Modify the table structure for Gradio | |
| modified_table = table_content.replace("<table>", "<table style='border-collapse: collapse;'>") | |
| modified_table = modified_table.replace("<th>", "<th style='border: 1px solid #ddd; padding: 8px; background-color: #f2f2f2;'>") | |
| modified_table = modified_table.replace("<td>", "<td style='border: 1px solid #ddd; padding: 8px;'>") | |
| # Replace the modified table back into the original text | |
| modified_text = text[:table_start] + modified_table + text[table_end:] | |
| return modified_text | |
| else: | |
| # Return the plain text as is | |
| return text | |
| ''' | |
| def apply_html(text, color): | |
| return f'<b style="color:{color}; font-size: 15px; !important">{text}</b>' | |
| ''' | |
| def apply_filelist_html(text, color): | |
| return f'<b style="color:{color}; font-size: 12px; !important">{text}</b>' | |
| # Check if the index file exists | |
| if os.path.exists(index_filename) and os.path.exists(mapping_filename): | |
| # Load the index from disk | |
| index = faiss.read_index(index_filename) | |
| # Load the mapping from disk | |
| with open(mapping_filename, 'rb') as f: | |
| chunks, filenames = pickle.load(f) | |
| else: | |
| # Lists to hold file names, corresponding embeddings and text chunks | |
| filenames = [] | |
| embeddings = [] | |
| chunks = [] | |
| # Define chunk size and overlap | |
| chunk_size = 5 # Size of each chunk | |
| overlap = 2 # Size of overlap between chunks | |
| # Iterate over files to create the index | |
| for filename in os.listdir(directory): | |
| if filename.endswith(".txt"): | |
| with open(os.path.join(directory, filename), 'r', encoding='utf-8') as file: | |
| text = file.read() | |
| # Split text into sentences | |
| sentences = sent_tokenize(text) | |
| # Group sentences into chunks with overlap | |
| for i in range(0, len(sentences), chunk_size-overlap): | |
| chunk = ' '.join(sentences[i:i+chunk_size]) | |
| chunks.append(chunk) | |
| # Compute BERT embedding and append to list | |
| embeddings.append(model.encode(chunk)) | |
| filenames.append(filename) | |
| # Convert list of embeddings to numpy array | |
| embeddings = np.array(embeddings) | |
| # Dimension of our vector space | |
| d = embeddings.shape[1] | |
| # Construct the index | |
| index = faiss.IndexFlatL2(d) | |
| # Add vectors to the index | |
| index.add(embeddings) | |
| # Save the index to disk | |
| faiss.write_index(index, index_filename) | |
| # Save the mapping to disk | |
| with open(mapping_filename, 'wb') as f: | |
| pickle.dump((chunks, filenames), f) | |
| def add_text(history, text): | |
| # Apply selected rules | |
| if history is not None: | |
| # If all rules pass, add message to chat history with bot's response set to None | |
| history.append([apply_html(text, "blue"), None]) | |
| return history, text | |
| def bot(query, history, fileListHistory, k=5): | |
| print("QUERY : " + query) | |
| # Compute embedding for the query | |
| query_embedding = model.encode(query) | |
| # Faiss works with single precision | |
| query_embedding = query_embedding.astype('float32') | |
| # Search the index | |
| D, I = index.search(np.array([query_embedding]), k) | |
| # Retrieve and join the top k chunks | |
| top_chunks = [chunks[I[0, i]] for i in range(I.shape[1])] | |
| context = '\n'.join(top_chunks) | |
| # Retrieve the corresponding filenames | |
| top_filenames = [filenames[I[0, i]] for i in range(I.shape[1])] | |
| # Deduplicate file list | |
| top_filenames = list(set(top_filenames)) | |
| # Print the filenames | |
| print("Corresponding filenames: ", top_filenames) | |
| # Add the query and filenames to the fileListHistory | |
| # Create file links | |
| file_links = [f'<a href="https://huggingface.co/spaces/happiestminds/rybot/resolve/main/raw/{filename.replace(".txt", ".pdf")}" target="_blank">{filename.replace(".txt", ".pdf")}</a>' for filename in top_filenames] | |
| file_links_str = ', '.join(file_links) | |
| # Update file history with query and file links | |
| fileListHistory.append([apply_filelist_html(f"QUERY: {query} | REFERENCES: {file_links_str}", "green"), None]) | |
| # Call OpenAI API | |
| prompt = f'''The following is a query from a user who is a mechanic. Use the context provided to respond to the user. | |
| QUERY: {query} | |
| CONTEXT: {context} | |
| Respond to the point. Do not include terms like - (according to the context provided) in your response.''' | |
| #Remember to respond in bullet points. Respond with a table when appropriate | |
| messages = [{"role": "user", "content": prompt}] | |
| print(messages) | |
| # Initialize response | |
| response = None | |
| # Send messages to OpenAI API | |
| # Attempt the call 3 times | |
| for i in range(3): | |
| try: | |
| # Send message to OpenAI API | |
| response = openai.ChatCompletion.create( | |
| model="gpt-3.5-turbo", | |
| messages=messages, | |
| max_tokens=1000, | |
| stop=None, | |
| temperature=0, | |
| top_p=1, | |
| frequency_penalty=0, | |
| presence_penalty=0, | |
| ) | |
| # If the call is successful, break the loop | |
| break | |
| except openai.OpenAIError as e: | |
| # If the call times out, wait for 1 second and then try again | |
| if str(e) == "Request timed out": | |
| time.sleep(1) | |
| else: | |
| # If the error is something else, break the loop | |
| break | |
| # If the call was not successful after 3 attempts, set the response to a timeout message | |
| if response is None: | |
| print("Unfortunately, the connection to ChatGPT timed out. Please try after some time.") | |
| if history is not None and len(history) > 0: | |
| # Update the chat history with the bot's response | |
| history[-1][1] = apply_html(response.text.strip(), "black") | |
| else: | |
| # Print the generated response | |
| print("\nGPT RESPONSE:\n") | |
| print(response['choices'][0]['message']['content'].strip()) | |
| if history is not None and len(history) > 0: | |
| # Update the chat history with the bot's response | |
| history[-1][1] = apply_html(response['choices'][0]['message']['content'].strip(), "black") | |
| ''' | |
| # Send messages to OpenAI API | |
| # Attempt the call 3 times | |
| for i in range(3): | |
| try: | |
| # Send message to OpenAI API | |
| response = openai.Completion.create( | |
| engine="text-davinci-002", | |
| prompt=prompt, | |
| max_tokens=1000, | |
| temperature=0, | |
| top_p=1, | |
| frequency_penalty=0, | |
| presence_penalty=0, | |
| ) | |
| # If the call is successful, break the loop | |
| break | |
| except openai.OpenAIError as e: | |
| # If the call times out, wait for 1 second and then try again | |
| if str(e) == "Request timed out": | |
| time.sleep(1) | |
| else: | |
| # If the error is something else, break the loop | |
| break | |
| # If the call was not successful after 3 attempts, set the response to a timeout message | |
| if response is None: | |
| print("Unfortunately, the connection to ChatGPT timed out. Please try after some time.") | |
| if history is not None and len(history) > 0: | |
| # Update the chat history with the bot's response | |
| history[-1][1] = apply_html(response.text.strip(), "black") | |
| else: | |
| # Print the generated response | |
| print("\nGPT RESPONSE:\n") | |
| print(response.choices[0].text.strip()) | |
| if history is not None and len(history) > 0: | |
| # Update the chat history with the bot's response | |
| history[-1][1] = apply_html(response.choices[0].text.strip(), "black") | |
| ''' | |
| return history, fileListHistory | |
| # Open the image and convert it to base64 | |
| with open(Path("rybot_small.png"), "rb") as img_file: | |
| img_str = base64.b64encode(img_file.read()).decode() | |
| html_code = f''' | |
| <!DOCTYPE html> | |
| <html> | |
| <head> | |
| <style> | |
| .center {{ | |
| display: flex; | |
| justify-content: center; | |
| align-items: center; | |
| margin-top: -40px; /* adjust this value as per your requirement */ | |
| margin-bottom: 5px; | |
| }} | |
| .large-text {{ | |
| font-size: 40px; | |
| font-family: Arial, Helvetica, sans-serif; | |
| font-weight: 900 !important; | |
| margin-left: 5px; | |
| color: #5b5b5b !important; | |
| }} | |
| .image-container {{ | |
| display: inline-block; | |
| vertical-align: middle; | |
| height: 50px; /* Twice the font-size */ | |
| margin-bottom: 5px; | |
| }} | |
| </style> | |
| </head> | |
| <body> | |
| <div class="center"> | |
| <img src="data:image/jpg;base64,{img_str}" alt="RyBOT image" class="image-container" /> | |
| <strong class="large-text">RyBOT</strong> | |
| </div> | |
| <br> | |
| <div class="center"> | |
| <h3> [ "I'm smart but the humans have me running on a hamster wheel. Please forgive the slow responses." ] </h3> | |
| </div> | |
| </body> | |
| </html> | |
| ''' | |
| css = """ | |
| .feedback textarea {background-color: #e9f0f7} | |
| .gradio-container {background-color: #eeeeee} | |
| """ | |
| def clear_textbox(): | |
| print("Calling CLEAR") | |
| return None | |
| with gr.Blocks(theme=gr.themes.Soft(), css=css, title="RyBOT") as demo: | |
| gr.HTML(html_code) | |
| chatbot = gr.Chatbot([], elem_id="chatbot", label="Chat", color_map=["blue","grey"]).style(height=450) | |
| fileListBot = gr.Chatbot([], elem_id="fileListBot", label="References", color_map=["blue","grey"]).style(height=150) | |
| txt = gr.Textbox( | |
| label="Type your query here:", | |
| placeholder="What would you like to find today?" | |
| ).style(container=True) | |
| txt.submit( | |
| add_text, | |
| [chatbot, txt], | |
| [chatbot, txt] | |
| ).then( | |
| bot, | |
| [txt, chatbot, fileListBot], | |
| [chatbot, fileListBot] | |
| ).then( | |
| clear_textbox, | |
| inputs=None, | |
| outputs=[txt] | |
| ) | |
| btn = gr.Button(value="Send") | |
| btn.click( | |
| add_text, | |
| [chatbot, txt], | |
| [chatbot, txt], | |
| ).then( | |
| bot, | |
| [txt, chatbot, fileListBot], | |
| [chatbot, fileListBot] | |
| ).then( | |
| clear_textbox, | |
| inputs=None, | |
| outputs=[txt] | |
| ) | |
| demo.launch() |