Spaces:
Runtime error
Runtime error
File size: 3,867 Bytes
bc94c3a b2c8048 01dd91b bc94c3a ee69ae8 b2c8048 ee69ae8 bc94c3a ee69ae8 bc94c3a b0c1ef9 bc94c3a b0c1ef9 bc94c3a 9c040b5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
# -*- coding: utf-8 -*-
"""Untitled8.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1krY-kSVbf8NSdFeA5eZ_1vvYGLuuSv7I
"""
import os
import pandas as pd
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
import gradio as gr
# Step 5: Initialize the LLM
openai_api_key = os.getenv("tauhid")
print(f"API key retrieved: {'[NOT FOUND]' if not openai_api_key else '[FOUND - first 4 chars: ' + openai_api_key[:4] + ']'}")
# Add this line to explicitly set the environment variable
os.environ["OPENAI_API_KEY"] = openai_api_key
# Then create embeddings
embeddings = OpenAIEmbeddings()
# Step 1: Load the System Prompt
prompt_path = "system_prompt.txt" # Ensure this file is in the same directory
if not os.path.exists(prompt_path):
raise FileNotFoundError(f"The file '{prompt_path}' is missing. Please upload it to the Space.")
with open(prompt_path, "r") as file:
system_prompt = file.read()
# Step 2: Load the Retrieval Database
csv_path = "retrievaldb.csv" # Ensure this file is in the same directory
if not os.path.exists(csv_path):
raise FileNotFoundError(f"The file '{csv_path}' is missing. Please upload it to the Space.")
# Load the CSV
df = pd.read_csv(csv_path)
# Step 3: Preprocess the Data
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
texts = []
metadatas = []
# Process each row to chunk text and attach metadata
for _, row in df.iterrows():
chunk_text = row.get("chunk_text", "")
if pd.notna(chunk_text):
chunks = text_splitter.split_text(chunk_text)
for chunk in chunks:
texts.append(chunk)
metadatas.append({
"source": row.get("content_source", "Unknown Source"),
"title": row.get("document_name", "Unknown Document"),
"page": row.get("page_number", "N/A"),
"topic": row.get("main_topic", "N/A"),
"week": row.get("metadata", "N/A")
})
if len(texts) != len(metadatas):
raise ValueError("Mismatch between texts and metadata after preprocessing.")
# Step 4: Create the Vector Store
embeddings = OpenAIEmbeddings()
vector_store = FAISS.from_texts(
texts=texts,
embedding=embeddings,
metadatas=metadatas
)
# Initialize the LLM
llm = ChatOpenAI(
model_name="gpt-4o-mini",
temperature=0.7,
api_key=openai_api_key
)
# Initialize Embeddings with the same key
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
# Step 6: Set Up the RetrievalQA Chain
retriever = vector_store.as_retriever(search_kwargs={"k": 5})
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff", # Concatenates retrieved chunks for context
retriever=retriever,
return_source_documents=False # Do not include source documents in the response
)
# Step 7: Define Query Function
def query_bradtgpt(user_input):
# Add system prompt dynamically to the query
full_prompt = f"""
{system_prompt}
User: {user_input}
Assistant:
"""
response = qa_chain({"query": full_prompt})
return response["result"] # Return the main answer only
# Step 8: Gradio Interface
def respond(message):
return query_bradtgpt(message)
demo = gr.Interface(
fn=respond,
inputs=gr.Textbox(
label="Your question",
placeholder="Ask BradGPT anything about CPSC 183!",
lines=3
),
outputs=gr.Textbox(
label="Response",
lines=10
),
title="BradGPT",
description="Ask BradGPT questions about CPSC 183 course readings or topics.",
theme="monochrome"
)
if __name__ == "__main__":
demo.launch() |