Spaces:

RAHMAN00700
/

Chat-with-Multiple-Documents-Using-Streamlit-and-Watsonx

Running

App Files Files Community

RAHMAN00700 commited on Nov 16, 2024

Commit

0de8564

1 Parent(s): 123e33d

changes made in repo

Browse files

Files changed (6) hide show

app.py +3 -0
app1.py +0 -176
appcsvhtml.py +0 -220
appfinal.py +0 -193
appfinalokokok.py +0 -199
sample env.txt +2 -0

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ import pandas as pd
 import json
 import xml.etree.ElementTree as ET
 import yaml
 from bs4 import BeautifulSoup
 from pptx import Presentation
 from docx import Document
@@ -124,6 +125,8 @@ def load_file(file_name, file_type):
     return None
 # Watsonx API setup
 watsonx_api_key =  os.getenv("WATSONX_API_KEY")
 watsonx_project_id = os.getenv("WATSONX_PROJECT_ID")

 import json
 import xml.etree.ElementTree as ET
 import yaml
+from dotenv import load_dotenv
 from bs4 import BeautifulSoup
 from pptx import Presentation
 from docx import Document
     return None
 # Watsonx API setup
+load_dotenv()
 watsonx_api_key =  os.getenv("WATSONX_API_KEY")
 watsonx_project_id = os.getenv("WATSONX_PROJECT_ID")

app1.py DELETED Viewed

@@ -1,176 +0,0 @@
-import os
-import tempfile
-from dotenv import load_dotenv
-import streamlit as st
-from langchain.document_loaders import PyPDFLoader, TextLoader
-from langchain.indexes import VectorstoreIndexCreator
-from langchain.chains import RetrievalQA
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain.embeddings import HuggingFaceEmbeddings
-from langchain.chains import LLMChain
-from langchain.prompts import PromptTemplate
-from ibm_watson_machine_learning.foundation_models import Model
-from ibm_watson_machine_learning.foundation_models.extensions.langchain import WatsonxLLM
-from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams
-from ibm_watson_machine_learning.foundation_models.utils.enums import DecodingMethods
-from pptx import Presentation
-from docx import Document
-# Load environment variables
-load_dotenv()
-# Watsonx API setup
-watsonx_api_key = os.getenv("API_KEY")
-watsonx_project_id = os.getenv("PROJECT_ID")
-watsonx_url = "https://us-south.ml.cloud.ibm.com"
-if not watsonx_api_key or not watsonx_project_id:
-    st.error("API Key or Project ID is not set. Please set them as environment variables.")
-# Custom loader for DOCX files
-class DocxLoader:
-    def __init__(self, file_path):
-        self.file_path = file_path
-    def load(self):
-        document = Document(self.file_path)
-        text_content = [para.text for para in document.paragraphs]
-        return " ".join(text_content)
-# Custom loader for PPTX files
-class PptxLoader:
-    def __init__(self, file_path):
-        self.file_path = file_path
-    def load(self):
-        presentation = Presentation(self.file_path)
-        text_content = []
-        for slide in presentation.slides:
-            for shape in slide.shapes:
-                if hasattr(shape, "text"):
-                    text_content.append(shape.text)
-        return " ".join(text_content)
-# Caching function to load various file types
-@st.cache_resource
-def load_file(uploaded_file, file_type):
-    loaders = []
-    # Save uploaded file to a temporary path
-    with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file_type}") as temp_file:
-        temp_file.write(uploaded_file.read())
-        temp_file_path = temp_file.name
-    if file_type == "pdf":
-        loaders = [PyPDFLoader(temp_file_path)]
-    elif file_type == "docx":
-        loader = DocxLoader(temp_file_path)
-        text = loader.load()
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as temp_txt_file:
-            temp_txt_file.write(text.encode("utf-8"))
-            temp_txt_file_path = temp_txt_file.name
-        loaders = [TextLoader(temp_txt_file_path)]
-    elif file_type == "txt":
-        loaders = [TextLoader(temp_file_path)]
-    elif file_type == "pptx":
-        loader = PptxLoader(temp_file_path)
-        text = loader.load()
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as temp_txt_file:
-            temp_txt_file.write(text.encode("utf-8"))
-            temp_txt_file_path = temp_txt_file.name
-        loaders = [TextLoader(temp_txt_file_path)]
-    else:
-        st.error("Unsupported file type.")
-        return None
-    # Create the index with the loaded documents
-    index = VectorstoreIndexCreator(
-        embedding=HuggingFaceEmbeddings(model_name="all-MiniLM-L12-v2"),
-        text_splitter=RecursiveCharacterTextSplitter(chunk_size=450, chunk_overlap=50)
-    ).from_loaders(loaders)
-    return index
-# Prompt template
-prompt_template = PromptTemplate(
-    input_variables=["context", "question"],
-    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
-I am a helpful assistant.
-<|eot_id|>
-{context}
-<|start_header_id|>user<|end_header_id|>
-{question}<|eot_id|>
-"""
-)
-# Sidebar settings
-with st.sidebar:
-    st.title("Watsonx RAG Demo")
-    model_name = st.selectbox("Model", ["meta-llama/llama-3-405b-instruct", "codellama/codellama-34b-instruct-hf", "ibm/granite-20b-multilingual"])
-    max_new_tokens = st.slider("Max output tokens", min_value=100, max_value=1000, value=300, step=100)
-    decoding_method = st.radio("Decoding Method", [DecodingMethods.GREEDY.value, DecodingMethods.SAMPLE.value])
-    st.info("Upload a PDF, DOCX, TXT, or PPTX file for RAG")
-    uploaded_file = st.file_uploader("Upload file", accept_multiple_files=False, type=["pdf", "docx", "txt", "pptx"])
-    if uploaded_file:
-        file_type = uploaded_file.name.split('.')[-1].lower()
-        index = load_file(uploaded_file, file_type)
-# Watsonx Model setup with UI feedback
-credentials = {
-    "url": watsonx_url,
-    "apikey": watsonx_api_key
-}
-parameters = {
-    GenParams.DECODING_METHOD: decoding_method,
-    GenParams.MAX_NEW_TOKENS: max_new_tokens,
-    GenParams.MIN_NEW_TOKENS: 1,
-    GenParams.TEMPERATURE: 0.7,
-    GenParams.TOP_K: 50,
-    GenParams.TOP_P: 1,
-    GenParams.REPETITION_PENALTY: 1.0
-}
-# Display setup status
-status_placeholder = st.empty()
-status_placeholder.markdown("**Setting up Watsonx...**")
-try:
-    model = WatsonxLLM(Model(model_name, credentials, parameters, project_id=watsonx_project_id))
-    status_placeholder.markdown(f"**Model [{model_name}] ready.**")
-except Exception as e:
-    st.error(f"Failed to initialize model: {str(e)}")
-# Chat History Setup
-if "messages" not in st.session_state:
-    st.session_state.messages = []
-# Display chat messages from history on app rerun
-for message in st.session_state.messages:
-    st.chat_message(message["role"]).markdown(message["content"])
-# User Input
-prompt = st.chat_input("Ask your question here", disabled=False if model else True)
-# Process User Input
-if prompt:
-    st.chat_message("user").markdown(prompt)
-    if index:
-        rag_chain = RetrievalQA.from_chain_type(
-            llm=model,
-            chain_type="stuff",
-            retriever=index.vectorstore.as_retriever(),
-            chain_type_kwargs={"prompt": prompt_template},
-            verbose=True
-        )
-        response_text = rag_chain.run(prompt).strip()
-    else:
-        chain = LLMChain(llm=model, prompt=prompt_template)
-        response_text = chain.run(context="", question=prompt).strip("<|start_header_id|>assistant<|end_header_id|>").strip("<|eot_id|>")
-    st.session_state.messages.append({'role': 'user', 'content': prompt})
-    st.chat_message("assistant").markdown(response_text)
-    st.session_state.messages.append({'role': 'assistant', 'content': response_text})

appcsvhtml.py DELETED Viewed

@@ -1,220 +0,0 @@
-import os
-import streamlit as st
-import tempfile
-import pandas as pd
-import json
-import xml.etree.ElementTree as ET
-import yaml
-from bs4 import BeautifulSoup
-from pptx import Presentation
-from docx import Document
-from langchain.document_loaders import PyPDFLoader, TextLoader
-from langchain.indexes import VectorstoreIndexCreator
-from langchain.chains import RetrievalQA
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain.embeddings import HuggingFaceEmbeddings
-from langchain.chains import LLMChain
-from langchain.prompts import PromptTemplate
-from ibm_watson_machine_learning.foundation_models import Model
-from ibm_watson_machine_learning.foundation_models.extensions.langchain import WatsonxLLM
-from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams
-from ibm_watson_machine_learning.foundation_models.utils.enums import DecodingMethods
-# Initialize index to None
-index = None
-rag_chain = None  # Initialize rag_chain as None by default
-# Custom loader for DOCX files
-class DocxLoader:
-    def __init__(self, file_path):
-        self.file_path = file_path
-    def load(self):
-        document = Document(self.file_path)
-        text_content = [para.text for para in document.paragraphs]
-        return " ".join(text_content)
-# Custom loader for PPTX files
-class PptxLoader:
-    def __init__(self, file_path):
-        self.file_path = file_path
-    def load(self):
-        presentation = Presentation(self.file_path)
-        text_content = [shape.text for slide in presentation.slides for shape in slide.shapes if hasattr(shape, "text")]
-        return " ".join(text_content)
-# Custom loader for additional file types
-def load_csv(file_path):
-    df = pd.read_csv(file_path)
-    return df.to_string(index=False)
-def load_json(file_path):
-    with open(file_path, 'r') as file:
-        data = json.load(file)
-    return json.dumps(data, indent=2)
-def load_xml(file_path):
-    tree = ET.parse(file_path)
-    root = tree.getroot()
-    return ET.tostring(root, encoding="unicode")
-def load_yaml(file_path):
-    with open(file_path, 'r') as file:
-        data = yaml.safe_load(file)
-    return yaml.dump(data)
-def load_html(file_path):
-    with open(file_path, 'r', encoding='utf-8') as file:
-        soup = BeautifulSoup(file, 'html.parser')
-    return soup.get_text()
-# Caching function to load various file types
-@st.cache_resource
-def load_file(file_name, file_type):
-    loaders = []
-    if file_type == "pdf":
-        loaders = [PyPDFLoader(file_name)]
-    elif file_type == "docx":
-        loader = DocxLoader(file_name)
-        text = loader.load()
-    elif file_type == "pptx":
-        loader = PptxLoader(file_name)
-        text = loader.load()
-    elif file_type == "txt":
-        loaders = [TextLoader(file_name)]
-    elif file_type == "csv":
-        text = load_csv(file_name)
-    elif file_type == "json":
-        text = load_json(file_name)
-    elif file_type == "xml":
-        text = load_xml(file_name)
-    elif file_type == "yaml":
-        text = load_yaml(file_name)
-    elif file_type == "html":
-        text = load_html(file_name)
-    else:
-        st.error("Unsupported file type.")
-        return None
-    # Use TextLoader for intermediate text files from custom loaders
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as temp_file:
-        temp_file.write(text.encode("utf-8"))
-        temp_file_path = temp_file.name
-    loaders = [TextLoader(temp_file_path)]
-    index = VectorstoreIndexCreator(
-        embedding=HuggingFaceEmbeddings(model_name="all-MiniLM-L12-v2"),
-        text_splitter=RecursiveCharacterTextSplitter(chunk_size=450, chunk_overlap=50)
-    ).from_loaders(loaders)
-    return index
-# Watsonx API setup
-watsonx_api_key = os.getenv("WATSONX_API_KEY")
-watsonx_project_id = os.getenv("WATSONX_PROJECT_ID")
-if not watsonx_api_key or not watsonx_project_id:
-    st.error("API Key or Project ID is not set. Please set them as environment variables.")
-prompt_template_br = PromptTemplate(
-    input_variables=["context", "question"],
-    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
-I am a helpful assistant.
-<|eot_id|>
-{context}
-<|start_header_id|>user<|end_header_id|>
-{question}<|eot_id|>
-"""
-)
-with st.sidebar:
-    st.title("Watsonx RAG with Multiple docs")
-    watsonx_model = st.selectbox("Model", ["meta-llama/llama-3-405b-instruct", "codellama/codellama-34b-instruct-hf", "ibm/granite-20b-multilingual"])
-    max_new_tokens = st.slider("Max output tokens", min_value=100, max_value=4000, value=600, step=100)
-    decoding_method = st.radio("Decoding", (DecodingMethods.GREEDY.value, DecodingMethods.SAMPLE.value))
-    parameters = {
-        GenParams.DECODING_METHOD: decoding_method,
-        GenParams.MAX_NEW_TOKENS: max_new_tokens,
-        GenParams.MIN_NEW_TOKENS: 1,
-        GenParams.TEMPERATURE: 0,
-        GenParams.TOP_K: 50,
-        GenParams.TOP_P: 1,
-        GenParams.STOP_SEQUENCES: [],
-        GenParams.REPETITION_PENALTY: 1
-    }
-    st.info("Upload a file to use RAG")
-    uploaded_file = st.file_uploader("Upload file", accept_multiple_files=False, type=["pdf", "docx", "txt", "pptx", "csv", "json", "xml", "yaml", "html"])
-    if uploaded_file is not None:
-        bytes_data = uploaded_file.read()
-        st.write("Filename:", uploaded_file.name)
-        with open(uploaded_file.name, 'wb') as f:
-            f.write(bytes_data)
-        file_type = uploaded_file.name.split('.')[-1].lower()
-        index = load_file(uploaded_file.name, file_type)
-    model_name = watsonx_model
-st.info("Setting up Watsonx...")
-my_credentials = {
-    "url": "https://us-south.ml.cloud.ibm.com",
-    "apikey": watsonx_api_key
-}
-params = parameters
-project_id = watsonx_project_id
-space_id = None
-verify = False
-model = WatsonxLLM(model=Model(model_name, my_credentials, params, project_id, space_id, verify))
-if model:
-    st.info(f"Model {model_name} ready.")
-    chain = LLMChain(llm=model, prompt=prompt_template_br, verbose=True)
-if chain and index is not None:
-    rag_chain = RetrievalQA.from_chain_type(
-        llm=model,
-        chain_type="stuff",
-        retriever=index.vectorstore.as_retriever(),
-        chain_type_kwargs={"prompt": prompt_template_br},
-        return_source_documents=False,
-        verbose=True
-    )
-    st.info("Document-based retrieval is ready.")
-else:
-    st.warning("No document uploaded or chain setup issue.")
-# Chat loop
-if "messages" not in st.session_state:
-    st.session_state.messages = []
-for message in st.session_state.messages:
-    st.chat_message(message["role"]).markdown(message["content"])
-prompt = st.chat_input("Ask your question here", disabled=False if chain else True)
-if prompt:
-    st.chat_message("user").markdown(prompt)
-    if rag_chain:
-        response_text = rag_chain.run(prompt).strip()
-    else:
-        response_text = chain.run(question=prompt, context="").strip()
-    st.session_state.messages.append({'role': 'User', 'content': prompt})
-    st.chat_message("assistant").markdown(response_text)
-    st.session_state.messages.append({'role': 'Assistant', 'content': response_text})
-# requirements.txt
-# Streamlit
-# pandas
-# beautifulsoup4
-# ibm-watson-machine-learning
-# python-pptx
-# python-docx
-# PyYAML
-# xml

appfinal.py DELETED Viewed

@@ -1,193 +0,0 @@
-import os
-from langchain.document_loaders import PyPDFLoader, TextLoader
-from langchain.indexes import VectorstoreIndexCreator
-from langchain.chains import RetrievalQA
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain.embeddings import HuggingFaceEmbeddings
-from langchain.chains import LLMChain
-from langchain.prompts import PromptTemplate
-import streamlit as st
-import tempfile
-from ibm_watson_machine_learning.foundation_models import Model
-from ibm_watson_machine_learning.foundation_models.extensions.langchain import WatsonxLLM
-from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams
-from ibm_watson_machine_learning.foundation_models.utils.enums import DecodingMethods
-from pptx import Presentation
-from docx import Document
-# Initialize index to None
-index = None
-# Custom loader for DOCX files
-class DocxLoader:
-    def __init__(self, file_path):
-        self.file_path = file_path
-    def load(self):
-        document = Document(self.file_path)
-        text_content = []
-        for para in document.paragraphs:
-            text_content.append(para.text)
-        return " ".join(text_content)
-# Custom loader for PPTX files
-class PptxLoader:
-    def __init__(self, file_path):
-        self.file_path = file_path
-    def load(self):
-        presentation = Presentation(self.file_path)
-        text_content = []
-        for slide in presentation.slides:
-            for shape in slide.shapes:
-                if hasattr(shape, "text"):
-                    text_content.append(shape.text)
-        return " ".join(text_content)
-# Caching function to load various file types
-@st.cache_resource
-def load_file(file_name, file_type):
-    loaders = []
-    if file_type == "pdf":
-        loaders = [PyPDFLoader(file_name)]
-    elif file_type == "docx":
-        loader = DocxLoader(file_name)
-        text = loader.load()
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as temp_file:
-            temp_file.write(text.encode("utf-8"))
-            temp_file_path = temp_file.name
-        loaders = [TextLoader(temp_file_path)]
-    elif file_type == "txt":
-        loaders = [TextLoader(file_name)]
-    elif file_type == "pptx":
-        loader = PptxLoader(file_name)
-        text = loader.load()
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as temp_file:
-            temp_file.write(text.encode("utf-8"))
-            temp_file_path = temp_file.name
-        loaders = [TextLoader(temp_file_path)]
-    else:
-        st.error("Unsupported file type.")
-        return None
-    index = VectorstoreIndexCreator(
-        embedding=HuggingFaceEmbeddings(model_name="all-MiniLM-L12-v2"),
-        text_splitter=RecursiveCharacterTextSplitter(chunk_size=450, chunk_overlap=50)
-    ).from_loaders(loaders)
-    return index
-def format_history():
-    return ""
-# Watsonx API setup using environment variables
-watsonx_api_key = os.getenv("WATSONX_API_KEY")
-watsonx_project_id = os.getenv("WATSONX_PROJECT_ID")
-if not watsonx_api_key or not watsonx_project_id:
-    st.error("API Key or Project ID is not set. Please set them as environment variables.")
-prompt_template_br = PromptTemplate(
-    input_variables=["context", "question"],
-    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
-I am a helpful assistant.
-<|eot_id|>
-{context}
-<|start_header_id|>user<|end_header_id|>
-{question}<|eot_id|>
-"""
-)
-with st.sidebar:
-    st.title("Watsonx RAG with Multiple docs")
-    watsonx_model = st.selectbox("Model", ["meta-llama/llama-3-405b-instruct", "codellama/codellama-34b-instruct-hf", "ibm/granite-20b-multilingual"])
-    max_new_tokens = st.slider("Max output tokens", min_value=100, max_value=4000, value=600, step=100)
-    decoding_method = st.radio("Decoding", (DecodingMethods.GREEDY.value, DecodingMethods.SAMPLE.value))
-    parameters = {
-        GenParams.DECODING_METHOD: decoding_method,
-        GenParams.MAX_NEW_TOKENS: max_new_tokens,
-        GenParams.MIN_NEW_TOKENS: 1,
-        GenParams.TEMPERATURE: 0,
-        GenParams.TOP_K: 50,
-        GenParams.TOP_P: 1,
-        GenParams.STOP_SEQUENCES: [],
-        GenParams.REPETITION_PENALTY: 1
-    }
-    st.info("Upload a PDF, DOCX, TXT, or PPTX file to use RAG")
-    uploaded_file = st.file_uploader("Upload file", accept_multiple_files=False, type=["pdf", "docx", "txt", "pptx"])
-    if uploaded_file is not None:
-        bytes_data = uploaded_file.read()
-        st.write("Filename:", uploaded_file.name)
-        with open(uploaded_file.name, 'wb') as f:
-            f.write(bytes_data)
-        file_type = uploaded_file.name.split('.')[-1].lower()
-        index = load_file(uploaded_file.name, file_type)
-    model_name = watsonx_model
-    def clear_messages():
-        st.session_state.messages = []
-    st.button('Clear messages', on_click=clear_messages)
-st.info("Setting up Watsonx...")
-my_credentials = {
-    "url": "https://us-south.ml.cloud.ibm.com",
-    "apikey": watsonx_api_key
-}
-params = parameters
-project_id = watsonx_project_id
-space_id = None
-verify = False
-model = WatsonxLLM(model=Model(model_name, my_credentials, params, project_id, space_id, verify))
-if model:
-    st.info(f"Model {model_name} ready.")
-    chain = LLMChain(llm=model, prompt=prompt_template_br, verbose=True)
-if chain:
-    st.info("Chat ready.")
-    if index:
-        rag_chain = RetrievalQA.from_chain_type(
-            llm=model,
-            chain_type="stuff",
-            retriever=index.vectorstore.as_retriever(),
-            chain_type_kwargs={"prompt": prompt_template_br},
-            return_source_documents=False,
-            verbose=True
-        )
-        st.info("Chat with document ready.")
-if "messages" not in st.session_state:
-    st.session_state.messages = []
-for message in st.session_state.messages:
-    st.chat_message(message["role"]).markdown(message["content"])
-prompt = st.chat_input("Ask your question here", disabled=False if chain else True)
-if prompt:
-    st.chat_message("user").markdown(prompt)
-    response_text = None
-    if rag_chain:
-        response_text = rag_chain.run(prompt).strip()
-    if not response_text:
-        response = chain.run(question=prompt, context=format_history())
-        response_text = response.strip("<|start_header_id|>assistant<|end_header_id|>").strip("<|eot_id|>")
-    st.session_state.messages.append({'role': 'User', 'content': prompt })
-    st.chat_message("assistant").markdown(response_text)
-    st.session_state.messages.append({'role': 'Assistant', 'content': response_text })

appfinalokokok.py DELETED Viewed

@@ -1,199 +0,0 @@
-import os
-import streamlit as st
-import tempfile
-from pptx import Presentation
-from docx import Document
-from langchain.document_loaders import PyPDFLoader, TextLoader
-from langchain.indexes import VectorstoreIndexCreator
-from langchain.chains import RetrievalQA
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain.embeddings import HuggingFaceEmbeddings
-from langchain.chains import LLMChain
-from langchain.prompts import PromptTemplate
-from ibm_watson_machine_learning.foundation_models import Model
-from ibm_watson_machine_learning.foundation_models.extensions.langchain import WatsonxLLM
-from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams
-from ibm_watson_machine_learning.foundation_models.utils.enums import DecodingMethods
-# Initialize index to None
-index = None
-rag_chain = None  # Initialize rag_chain as None by default
-# Custom loader for DOCX files
-class DocxLoader:
-    def __init__(self, file_path):
-        self.file_path = file_path
-    def load(self):
-        document = Document(self.file_path)
-        text_content = []
-        for para in document.paragraphs:
-            text_content.append(para.text)
-        return " ".join(text_content)
-# Custom loader for PPTX files
-class PptxLoader:
-    def __init__(self, file_path):
-        self.file_path = file_path
-    def load(self):
-        presentation = Presentation(self.file_path)
-        text_content = []
-        for slide in presentation.slides:
-            for shape in slide.shapes:
-                if hasattr(shape, "text"):
-                    text_content.append(shape.text)
-        return " ".join(text_content)
-# Caching function to load various file types
-@st.cache_resource
-def load_file(file_name, file_type):
-    loaders = []
-    if file_type == "pdf":
-        loaders = [PyPDFLoader(file_name)]
-    elif file_type == "docx":
-        loader = DocxLoader(file_name)
-        text = loader.load()
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as temp_file:
-            temp_file.write(text.encode("utf-8"))
-            temp_file_path = temp_file.name
-        loaders = [TextLoader(temp_file_path)]
-    elif file_type == "txt":
-        loaders = [TextLoader(file_name)]
-    elif file_type == "pptx":
-        loader = PptxLoader(file_name)
-        text = loader.load()
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as temp_file:
-            temp_file.write(text.encode("utf-8"))
-            temp_file_path = temp_file.name
-        loaders = [TextLoader(temp_file_path)]
-    else:
-        st.error("Unsupported file type.")
-        return None
-    index = VectorstoreIndexCreator(
-        embedding=HuggingFaceEmbeddings(model_name="all-MiniLM-L12-v2"),
-        text_splitter=RecursiveCharacterTextSplitter(chunk_size=450, chunk_overlap=50)
-    ).from_loaders(loaders)
-    return index
-def format_history():
-    return ""
-# Watsonx API setup using environment variables
-watsonx_api_key = os.getenv("WATSONX_API_KEY")
-watsonx_project_id = os.getenv("WATSONX_PROJECT_ID")
-if not watsonx_api_key or not watsonx_project_id:
-    st.error("API Key or Project ID is not set. Please set them as environment variables.")
-prompt_template_br = PromptTemplate(
-    input_variables=["context", "question"],
-    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
-I am a helpful assistant.
-<|eot_id|>
-{context}
-<|start_header_id|>user<|end_header_id|>
-{question}<|eot_id|>
-"""
-)
-with st.sidebar:
-    st.title("Watsonx RAG with Multiple docs")
-    watsonx_model = st.selectbox("Model", ["meta-llama/llama-3-405b-instruct", "codellama/codellama-34b-instruct-hf", "ibm/granite-20b-multilingual"])
-    max_new_tokens = st.slider("Max output tokens", min_value=100, max_value=4000, value=600, step=100)
-    decoding_method = st.radio("Decoding", (DecodingMethods.GREEDY.value, DecodingMethods.SAMPLE.value))
-    parameters = {
-        GenParams.DECODING_METHOD: decoding_method,
-        GenParams.MAX_NEW_TOKENS: max_new_tokens,
-        GenParams.MIN_NEW_TOKENS: 1,
-        GenParams.TEMPERATURE: 0,
-        GenParams.TOP_K: 50,
-        GenParams.TOP_P: 1,
-        GenParams.STOP_SEQUENCES: [],
-        GenParams.REPETITION_PENALTY: 1
-    }
-    st.info("Upload a PDF, DOCX, TXT, or PPTX file to use RAG")
-    uploaded_file = st.file_uploader("Upload file", accept_multiple_files=False, type=["pdf", "docx", "txt", "pptx"])
-    if uploaded_file is not None:
-        bytes_data = uploaded_file.read()
-        st.write("Filename:", uploaded_file.name)
-        with open(uploaded_file.name, 'wb') as f:
-            f.write(bytes_data)
-        file_type = uploaded_file.name.split('.')[-1].lower()
-        index = load_file(uploaded_file.name, file_type)
-    model_name = watsonx_model
-    def clear_messages():
-        st.session_state.messages = []
-    st.button('Clear messages', on_click=clear_messages)
-st.info("Setting up Watsonx...")
-my_credentials = {
-    "url": "https://us-south.ml.cloud.ibm.com",
-    "apikey": watsonx_api_key
-}
-params = parameters
-project_id = watsonx_project_id
-space_id = None
-verify = False
-model = WatsonxLLM(model=Model(model_name, my_credentials, params, project_id, space_id, verify))
-if model:
-    st.info(f"Model {model_name} ready.")
-    chain = LLMChain(llm=model, prompt=prompt_template_br, verbose=True)
-if chain:
-    st.info("Chat ready.")
-    # Only create rag_chain if index is successfully created
-    if index is not None:
-        rag_chain = RetrievalQA.from_chain_type(
-            llm=model,
-            chain_type="stuff",
-            retriever=index.vectorstore.as_retriever(),
-            chain_type_kwargs={"prompt": prompt_template_br},
-            return_source_documents=False,
-            verbose=True
-        )
-        st.info("Document-based retrieval is ready.")
-    else:
-        st.warning("No document uploaded. Answering common queries without retrieval.")
-# Chat loop for handling queries
-if "messages" not in st.session_state:
-    st.session_state.messages = []
-for message in st.session_state.messages:
-    st.chat_message(message["role"]).markdown(message["content"])
-prompt = st.chat_input("Ask your question here", disabled=False if chain else True)
-if prompt:
-    st.chat_message("user").markdown(prompt)
-    # Answer based on availability of rag_chain or chain
-    if rag_chain:
-        response_text = rag_chain.run(prompt).strip()
-    else:
-        # Use general model-based response if rag_chain is not available
-        response_text = chain.run(question=prompt, context=format_history()).strip("<|start_header_id|>assistant<|end_header_id|>").strip("<|eot_id|>")
-    # Store and display conversation
-    st.session_state.messages.append({'role': 'User', 'content': prompt})
-    st.chat_message("assistant").markdown(response_text)
-    st.session_state.messages.append({'role': 'Assistant', 'content': response_text})

sample env.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ WATSONX_API_KEY=<your_watsonx_api_key>
2	+ WATSONX_PROJECT_ID=<your_watsonx_project_id>