Waseem7711 commited on
Commit
429c15a
·
verified ·
1 Parent(s): 72e4b93

Update.app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -30
app.py CHANGED
@@ -1,41 +1,93 @@
1
- from langchain_openai import ChatOpenAI
2
- from langchain_core.prompts import ChatPromptTemplate
3
- from langchain_core.output_parsers import StrOutputParser
4
- from langchain_community.llms import Ollama
5
  import streamlit as st
 
 
6
  import os
7
  from dotenv import load_dotenv
8
 
9
  # Load environment variables
10
  load_dotenv()
11
 
12
- # Set environment variables
13
- os.environ["LANGCHAIN_TRACING_V2"] = "true"
14
- os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
15
-
16
- # Prompt Template
17
- prompt = ChatPromptTemplate.from_messages(
18
- [
19
- ("system", "You are a helpful assistant. Please respond to the user queries"),
20
- ("user", "Question: {question}")
21
- ]
22
- )
23
 
24
  # Streamlit app setup
25
- st.title('Langchain Demo With LLAMA2 API')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  # User input
28
- input_text = st.text_input("Search the topic you want")
29
-
30
- # Ollama LLM (ensure the model is available, or access it through Hugging Face API)
31
- llm = Ollama(model="llama2")
32
- output_parser = StrOutputParser()
33
- chain = prompt | llm | output_parser
34
-
35
- # Display result when user inputs text
36
- if input_text:
37
- try:
38
- response = chain.invoke({"question": input_text})
39
- st.write(response)
40
- except Exception as e:
41
- st.error(f"Error: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+
 
 
3
  import streamlit as st
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer
5
+ import torch
6
  import os
7
  from dotenv import load_dotenv
8
 
9
  # Load environment variables
10
  load_dotenv()
11
 
12
+ # Retrieve Hugging Face API token from environment variables (if accessing private models)
13
+ HF_API_TOKEN = os.getenv("HF_API_TOKEN") # Ensure you set this in Hugging Face Secrets
 
 
 
 
 
 
 
 
 
14
 
15
  # Streamlit app setup
16
+ st.title('Llama2 Chatbot Deployment on Hugging Face Spaces')
17
+ st.write("This chatbot is powered by the Llama2 model. Ask me anything!")
18
+
19
+ @st.cache_resource
20
+ def load_model():
21
+ """
22
+ Load the tokenizer and model from Hugging Face.
23
+ This function is cached to prevent re-loading on every interaction.
24
+ """
25
+ tokenizer = AutoTokenizer.from_pretrained(
26
+ "meta-llama/Llama-2-7b-chat-hf",
27
+ use_auth_token=HF_API_TOKEN # Remove if the model is public
28
+ )
29
+ model = AutoModelForCausalLM.from_pretrained(
30
+ "meta-llama/Llama-2-7b-chat-hf",
31
+ torch_dtype=torch.float16, # Use float16 for reduced memory usage
32
+ device_map="auto",
33
+ use_auth_token=HF_API_TOKEN # Remove if the model is public
34
+ )
35
+ return tokenizer, model
36
+
37
+ # Load the model and tokenizer
38
+ tokenizer, model = load_model()
39
+
40
+ # Initialize session state for conversation history
41
+ if "conversation" not in st.session_state:
42
+ st.session_state.conversation = []
43
 
44
  # User input
45
+ user_input = st.text_input("You:", "")
46
+
47
+ if user_input:
48
+ st.session_state.conversation.append({"role": "user", "content": user_input})
49
+ with st.spinner("Generating response..."):
50
+ try:
51
+ # Prepare the conversation history for the model
52
+ conversation_text = ""
53
+ for message in st.session_state.conversation:
54
+ if message["role"] == "user":
55
+ conversation_text += f"User: {message['content']}\n"
56
+ elif message["role"] == "assistant":
57
+ conversation_text += f"Assistant: {message['content']}\n"
58
+
59
+ # Encode the input
60
+ inputs = tokenizer.encode(conversation_text + "Assistant:", return_tensors="pt").to(model.device)
61
+
62
+ # Generate a response
63
+ output = model.generate(
64
+ inputs,
65
+ max_length=1000,
66
+ temperature=0.7,
67
+ top_p=0.9,
68
+ do_sample=True,
69
+ eos_token_id=tokenizer.eos_token_id,
70
+ pad_token_id=tokenizer.eos_token_id # To avoid warnings
71
+ )
72
+
73
+ # Decode the response
74
+ response = tokenizer.decode(output[0], skip_special_tokens=True)
75
+
76
+ # Extract the assistant's reply
77
+ assistant_reply = response[len(conversation_text + "Assistant: "):].strip()
78
+
79
+ # Append the assistant's reply to the conversation history
80
+ st.session_state.conversation.append({"role": "assistant", "content": assistant_reply})
81
+
82
+ # Display the updated conversation
83
+ conversation_display = ""
84
+ for message in st.session_state.conversation:
85
+ if message["role"] == "user":
86
+ conversation_display += f"**You:** {message['content']}\n\n"
87
+ elif message["role"] == "assistant":
88
+ conversation_display += f"**Bot:** {message['content']}\n\n"
89
+
90
+ st.markdown(conversation_display)
91
+
92
+ except Exception as e:
93
+ st.error(f"An error occurred: {e}")