Warlord-K commited on
Commit
cb10e0f
1 Parent(s): f31688f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +178 -0
app.py ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name = "qwen:0.5b-chat"
2
+
3
+ import os
4
+
5
+ os.system("sudo apt install lshw")
6
+ os.system("curl https://ollama.ai/install.sh | sh")
7
+
8
+ import nest_asyncio
9
+ nest_asyncio.apply()
10
+
11
+ import os
12
+ import asyncio
13
+
14
+ # Run Async Ollama
15
+ # Taken from: https://stackoverflow.com/questions/77697302/how-to-run-ollama-in-google-colab
16
+ # NB: You may need to set these depending and get cuda working depending which backend you are running.
17
+ # Set environment variable for NVIDIA library
18
+ # Set environment variables for CUDA
19
+ os.environ['PATH'] += ':/usr/local/cuda/bin'
20
+ # Set LD_LIBRARY_PATH to include both /usr/lib64-nvidia and CUDA lib directories
21
+ os.environ['LD_LIBRARY_PATH'] = '/usr/lib64-nvidia:/usr/local/cuda/lib64'
22
+
23
+ async def run_process(cmd):
24
+ print('>>> starting', *cmd)
25
+ process = await asyncio.create_subprocess_exec(
26
+ *cmd,
27
+ stdout=asyncio.subprocess.PIPE,
28
+ stderr=asyncio.subprocess.PIPE
29
+ )
30
+
31
+ # define an async pipe function
32
+ async def pipe(lines):
33
+ async for line in lines:
34
+ print(line.decode().strip())
35
+
36
+ await asyncio.gather(
37
+ pipe(process.stdout),
38
+ pipe(process.stderr),
39
+ )
40
+
41
+ # call it
42
+ await asyncio.gather(pipe(process.stdout), pipe(process.stderr))
43
+
44
+ import asyncio
45
+ import threading
46
+
47
+ async def start_ollama_serve():
48
+ await run_process(['ollama', 'serve'])
49
+
50
+ def run_async_in_thread(loop, coro):
51
+ asyncio.set_event_loop(loop)
52
+ loop.run_until_complete(coro)
53
+ loop.close()
54
+
55
+ # Create a new event loop that will run in a new thread
56
+ new_loop = asyncio.new_event_loop()
57
+
58
+ # Start ollama serve in a separate thread so the cell won't block execution
59
+ thread = threading.Thread(target=run_async_in_thread, args=(new_loop, start_ollama_serve()))
60
+ thread.start()
61
+
62
+ # Load up model
63
+
64
+ os.system(f"ollama pull {model_name}")
65
+
66
+ # Download Data
67
+
68
+ os.system("wget -O data.txt https://drive.google.com/uc?id=1uMvEYq17LsvTkX8bU5Fq-2FcG16XbrAW")
69
+
70
+ from llama_index import SimpleDirectoryReader
71
+ from llama_index import Document
72
+ from llama_index.embeddings import HuggingFaceEmbedding
73
+ from llama_index import (
74
+ SimpleDirectoryReader,
75
+ VectorStoreIndex,
76
+ ServiceContext,
77
+ )
78
+ from llama_index.llms import Ollama
79
+ from llama_index import ServiceContext, VectorStoreIndex, StorageContext
80
+ from llama_index.indices.postprocessor import SentenceTransformerRerank
81
+ from llama_index import load_index_from_storage
82
+ from llama_index.node_parser import HierarchicalNodeParser
83
+
84
+ from llama_index.node_parser import get_leaf_nodes
85
+ from llama_index import StorageContext
86
+ from llama_index.retrievers import AutoMergingRetriever
87
+ from llama_index.indices.postprocessor import SentenceTransformerRerank
88
+ from llama_index.query_engine import RetrieverQueryEngine
89
+ import gradio as gr
90
+ import os
91
+ from llama_index import get_response_synthesizer
92
+ from llama_index.chat_engine.condense_question import (
93
+ CondenseQuestionChatEngine,
94
+ )
95
+
96
+ from llama_index import set_global_service_context
97
+
98
+ def build_automerging_index(
99
+ documents,
100
+ llm,
101
+ embed_model,
102
+ save_dir="merging_index",
103
+ chunk_sizes=None,
104
+ ):
105
+ chunk_sizes = chunk_sizes or [2048, 512, 128]
106
+ node_parser = HierarchicalNodeParser.from_defaults(chunk_sizes=chunk_sizes)
107
+ nodes = node_parser.get_nodes_from_documents(documents)
108
+ leaf_nodes = get_leaf_nodes(nodes)
109
+ merging_context = ServiceContext.from_defaults(
110
+ llm=llm,
111
+ embed_model=embed_model,
112
+ )
113
+ set_global_service_context(merging_context)
114
+ storage_context = StorageContext.from_defaults()
115
+ storage_context.docstore.add_documents(nodes)
116
+
117
+ if not os.path.exists(save_dir):
118
+ automerging_index = VectorStoreIndex(
119
+ leaf_nodes, storage_context=storage_context, service_context=merging_context
120
+ )
121
+ automerging_index.storage_context.persist(persist_dir=save_dir)
122
+ else:
123
+ automerging_index = load_index_from_storage(
124
+ StorageContext.from_defaults(persist_dir=save_dir),
125
+ service_context=merging_context,
126
+ )
127
+ return automerging_index
128
+
129
+
130
+ def get_automerging_query_engine(
131
+ automerging_index,
132
+ similarity_top_k=5,
133
+ rerank_top_n=2,
134
+ ):
135
+ base_retriever = automerging_index.as_retriever(similarity_top_k=similarity_top_k)
136
+ retriever = AutoMergingRetriever(
137
+ base_retriever, automerging_index.storage_context, verbose=True
138
+ )
139
+ rerank = SentenceTransformerRerank(
140
+ top_n=rerank_top_n, model="BAAI/bge-reranker-base"
141
+ )
142
+ synth = get_response_synthesizer(streaming=True)
143
+ auto_merging_engine = RetrieverQueryEngine.from_args(
144
+ retriever, node_postprocessors=[rerank],response_synthesizer=synth
145
+ )
146
+ return auto_merging_engine
147
+
148
+
149
+ llm = Ollama(model=model_name, request_timeout=300.0)
150
+ embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5")
151
+
152
+ documents = SimpleDirectoryReader(
153
+ input_files=["data.txt"]
154
+ ).load_data()
155
+
156
+ automerging_index = build_automerging_index(
157
+ documents,
158
+ llm,
159
+ embed_model=embed_model,
160
+ save_dir="merging_index"
161
+ )
162
+
163
+ automerging_query_engine = get_automerging_query_engine(
164
+ automerging_index,
165
+ )
166
+ automerging_chat_engine = CondenseQuestionChatEngine.from_defaults(
167
+ query_engine=automerging_query_engine,
168
+ )
169
+
170
+ def chat(message, history):
171
+ res = automerging_chat_engine.stream_chat(message)
172
+ response = ""
173
+ for text in res.response_gen:
174
+ response+=text
175
+ yield response
176
+
177
+ demo = gr.ChatInterface(chat)
178
+ demo.launch()