hsienchen commited on
Commit
9f22cbd
1 Parent(s): ed0628f
Files changed (3) hide show
  1. README.md +3 -11
  2. app.py +52 -0
  3. requirements.txt +6 -0
README.md CHANGED
@@ -1,12 +1,4 @@
1
- ---
2
- title: Phi2
3
- emoji: ⚡
4
- colorFrom: indigo
5
- colorTo: blue
6
- sdk: gradio
7
- sdk_version: 4.14.0
8
- app_file: app.py
9
- pinned: false
10
- ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ # Microsoft Phi2 RAG
2
+
3
+ [colab](https://colab.research.google.com/drive/1SRW-snwVzVASPR8h9AJEsaj-F0jmgeMw#scrollTo=gZyvBS0STcQX)
 
 
 
 
 
 
 
4
 
 
app.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
2
+ from llama_index.llms import HuggingFaceLLM
3
+ import torch
4
+
5
+ documents = SimpleDirectoryReader("/content/Data").load_data()
6
+
7
+ from llama_index.prompts.prompts import SimpleInputPrompt
8
+
9
+ system_prompt = "You are a Q&A assistant. Your goal is to answer questions as accurately as possible based on the instructions and context provided."
10
+
11
+ # This will wrap the default prompts that are internal to llama-index
12
+ query_wrapper_prompt = SimpleInputPrompt("<|USER|>{query_str}<|ASSISTANT|>")
13
+
14
+
15
+
16
+ llm = HuggingFaceLLM(
17
+ context_window=4096,
18
+ max_new_tokens=256,
19
+ generate_kwargs={"temperature": 0.0, "do_sample": False},
20
+ system_prompt=system_prompt,
21
+ query_wrapper_prompt=query_wrapper_prompt,
22
+ tokenizer_name="microsoft/phi-2",
23
+ model_name="microsoft/phi-2",
24
+ device_map="cuda",
25
+ # uncomment this if using CUDA to reduce memory usage
26
+ model_kwargs={"torch_dtype": torch.bfloat16}
27
+ )
28
+
29
+ from llama_index.embeddings import HuggingFaceEmbedding
30
+
31
+ # loads BAAI/bge-small-en
32
+ # embed_model = HuggingFaceEmbedding()
33
+
34
+ # loads BAAI/bge-small-en-v1.5
35
+ embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
36
+
37
+ service_context = ServiceContext.from_defaults(
38
+ chunk_size=1024,
39
+ llm=llm,
40
+ embed_model=embed_model
41
+ )
42
+
43
+ index = VectorStoreIndex.from_documents(documents, service_context=service_context)
44
+
45
+ query_engine = index.as_query_engine()
46
+
47
+ def predict(input, history):
48
+ response = query_engine.query(input)
49
+ return str(response)
50
+
51
+ import gradio as gr
52
+ gr.ChatInterface(predict).launch(share=True)
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ pypdf
2
+ python-dotenv
3
+ llama-index
4
+ gradio
5
+ einops
6
+ accelerate