BroBro87 commited on
Commit
fa83d85
1 Parent(s): f44efda

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -0
app.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # this is the pdf
2
+ #https://docs.google.com/document/d/1hY5ItC8Mewyk-90Q--CGr50wBbZBjPrkYu4NtiBVre4/edit?usp=sharing
3
+ #Inference takes 6-7 mins per query
4
+ import logging
5
+ import sys
6
+ import gradio as gr
7
+ from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
8
+ from llama_index.llms import LlamaCPP
9
+ from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt
10
+ from langchain.embeddings.huggingface import HuggingFaceEmbeddings
11
+
12
+ # Set up logging
13
+ logging.basicConfig(stream=sys.stdout, level=logging.INFO)
14
+ logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
15
+
16
+ def configure_llama_model():
17
+ model_url = 'https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf'
18
+ llm = LlamaCPP(
19
+ model_url=model_url,
20
+ temperature=0.3,
21
+ max_new_tokens=256,
22
+ context_window=3900,
23
+ model_kwargs={"n_gpu_layers": -1},
24
+ messages_to_prompt=messages_to_prompt,
25
+ completion_to_prompt=completion_to_prompt,
26
+ verbose=True,
27
+ )
28
+ return llm
29
+
30
+ def configure_embeddings():
31
+ embed_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
32
+ return embed_model
33
+
34
+ def configure_service_context(llm, embed_model):
35
+ return ServiceContext.from_defaults(chunk_size=250, llm=llm, embed_model=embed_model)
36
+
37
+ def initialize_vector_store_index(data_path, service_context):
38
+ documents = SimpleDirectoryReader("./").load_data()
39
+ index = VectorStoreIndex.from_documents(documents, service_context=service_context)
40
+ return index
41
+
42
+ # Configure and initialize components
43
+ llm = configure_llama_model()
44
+ embed_model = configure_embeddings()
45
+ service_context = configure_service_context(llm, embed_model)
46
+ index = initialize_vector_store_index("./", service_context)
47
+ query_engine = index.as_query_engine()
48
+
49
+ # Define a function for Gradio to use
50
+ def get_response(text, username):
51
+ # For simplicity, we are only using the 'text' argument
52
+ response = str(query_engine.query(text))
53
+ return response
54
+
55
+
56
+ gr.ChatInterface(get_response).launch(debug=True)