cnmoro commited on
Commit
2d3a993
1 Parent(s): 22c7926

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +130 -0
  2. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time, aiohttp, asyncio, json, os, multiprocessing
2
+ from minivectordb.embedding_model import EmbeddingModel
3
+ from minivectordb.vector_database import VectorDatabase
4
+ from text_util_en_pt.cleaner import structurize_text, detect_language, Language
5
+ from webtextcrawler.webtextcrawler import extract_text_from_url
6
+ from duckduckgo_search import DDGS
7
+ import gradio as gr
8
+
9
+ openrouter_key = os.environ.get("OPENROUTER_KEY")
10
+ model = EmbeddingModel(use_quantized_onnx_model=False, e5_model_size='small')
11
+
12
+ def fetch_links(query, max_results=10):
13
+ with DDGS() as ddgs:
14
+ return [r['href'] for r in ddgs.text(query, max_results=max_results)]
15
+
16
+ def fetch_texts(links):
17
+ with multiprocessing.Pool() as pool:
18
+ texts = pool.map(extract_text_from_url, links)
19
+ return '\n'.join([t for t in texts if t])
20
+
21
+ def index_and_search(query, text):
22
+ start = time.time()
23
+ query_embedding = model.extract_embeddings(query)
24
+
25
+ # Indexing
26
+ vector_db = VectorDatabase()
27
+ sentences = [ s['sentence'] for s in structurize_text(text)]
28
+
29
+ for idx, sentence in enumerate(sentences):
30
+ sentence_embedding = model.extract_embeddings(sentence)
31
+ vector_db.store_embedding(idx + 1, sentence_embedding, {'sentence': sentence})
32
+
33
+ embedding_time = time.time() - start
34
+
35
+ # Retrieval
36
+ start = time.time()
37
+ search_results = vector_db.find_most_similar(query_embedding, k = 10)
38
+ retrieval_time = time.time() - start
39
+ return '\n'.join([s['sentence'] for s in search_results[2]]), embedding_time, retrieval_time
40
+
41
+ def retrieval_pipeline(query):
42
+ start = time.time()
43
+ links = fetch_links(query)
44
+ websearch_time = time.time() - start
45
+
46
+ start = time.time()
47
+ text = fetch_texts(links)
48
+ webcrawl_time = time.time() - start
49
+
50
+ context, embedding_time, retrieval_time = index_and_search(query, text)
51
+
52
+ return context, websearch_time, webcrawl_time, embedding_time, retrieval_time, links
53
+
54
+ async def predict(message, history):
55
+ context, websearch_time, webcrawl_time, embedding_time, retrieval_time, links = retrieval_pipeline(message)
56
+
57
+ if detect_language(message) == Language.ptbr:
58
+ prompt = f"Contexto:\n\n{context}\n\nBaseado no contexto, responda: {message}"
59
+ else:
60
+ prompt = f"Context:\n\n{context}\n\nBased on the context, answer: {message}"
61
+
62
+ url = "https://openrouter.ai/api/v1/chat/completions"
63
+ headers = { "Content-Type": "application/json",
64
+ "Authorization": f"Bearer {openrouter_key}" }
65
+ body = { "stream": True,
66
+ "models": [
67
+ "mistralai/mistral-7b-instruct:free",
68
+ "nousresearch/nous-capybara-7b:free"
69
+ "huggingfaceh4/zephyr-7b-beta:free",
70
+ "openchat/openchat-7b:free"
71
+ ],
72
+ "route": "fallback",
73
+ "max_tokens": 768,
74
+ "messages": [
75
+ {"role": "user", "content": prompt}
76
+ ] }
77
+
78
+ full_response = ""
79
+ async with aiohttp.ClientSession() as session:
80
+ async with session.post(url, headers=headers, json=body) as response:
81
+
82
+ buffer = "" # A buffer to hold incomplete lines of SSE data
83
+ async for chunk in response.content.iter_any():
84
+ buffer += chunk.decode()
85
+ while "\n" in buffer: # Process as long as there are complete lines in the buffer
86
+ line, buffer = buffer.split("\n", 1)
87
+
88
+ if line.startswith("data: "):
89
+ event_data = line[len("data: "):]
90
+ if event_data != '[DONE]':
91
+ try:
92
+ current_text = json.loads(event_data)['choices'][0]['delta']['content']
93
+ full_response += current_text
94
+ yield full_response
95
+ await asyncio.sleep(0.01)
96
+ except Exception:
97
+ try:
98
+ current_text = json.loads(event_data)['choices'][0]['text']
99
+ full_response += current_text
100
+ yield full_response
101
+ await asyncio.sleep(0.01)
102
+ except Exception as e:
103
+ print(e)
104
+
105
+ final_metadata_block = ""
106
+
107
+ final_metadata_block += f"Links visited:\n"
108
+ for link in links:
109
+ final_metadata_block += f"{link}\n"
110
+ final_metadata_block += f"\nWeb search time: {websearch_time:.4f} seconds\n"
111
+ final_metadata_block += f"\nText extraction: {webcrawl_time:.4f} seconds\n"
112
+ final_metadata_block += f"\nEmbedding time: {embedding_time:.4f} seconds\n"
113
+ final_metadata_block += f"\nRetrieval from VectorDB time: {retrieval_time:.4f} seconds"
114
+
115
+ yield f"{full_response}\n\n{final_metadata_block}"
116
+
117
+ # Setting up the Gradio chat interface.
118
+ gr.ChatInterface(
119
+ predict,
120
+ title="AI Web Search",
121
+ description="Ask any question, and I will try to answer it using web search !",
122
+ retry_btn=None,
123
+ undo_btn=None,
124
+ examples=[
125
+ 'When did the first human land on the moon?',
126
+ 'Liquid vs solid vs gas ?',
127
+ 'What is the capital of France?',
128
+ 'Why does Brazil has a high tax rate?'
129
+ ]
130
+ ).launch() # Launching the web interface.
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ minivectordb
2
+ webtextcrawler
3
+ requests
4
+ aiohttp
5
+ gradio>=4.13.0
6
+ duckduckgo-search
7
+ text-util-en-pt