Omartificial-Intelligence-Space commited on
Commit
0b48057
·
verified ·
1 Parent(s): 2f622f3

upload rag.py

Browse files
Files changed (1) hide show
  1. rag.py +69 -0
rag.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer
2
+ from wikipediaapi import Wikipedia
3
+ import textwrap
4
+ import numpy as np
5
+ import openai
6
+ from openai import OpenAI
7
+
8
+ matryoshka_dim = 128
9
+ model = SentenceTransformer("Omartificial-Intelligence-Space/Arabic-all-nli-triplet-Matryoshka", trust_remote_code=True , truncate_dim = matryoshka_dim)
10
+
11
+
12
+ wiki = Wikipedia('RAGBot/0.0', 'ar')
13
+ doc = wiki.page('جابر بن حيان').text
14
+ paragraphs = doc.split('\n\n') # chunking
15
+
16
+ for i, p in enumerate(paragraphs):
17
+ wrapped_text = textwrap.fill(p, width=100)
18
+
19
+ print("-----------------------------------------------------------------")
20
+ print(wrapped_text)
21
+ print("-----------------------------------------------------------------")
22
+
23
+ docs_embed = model.encode(paragraphs, normalize_embeddings=True)
24
+
25
+ docs_embed.shape
26
+ docs_embed[0]
27
+
28
+
29
+ query = "من هو جابر بن حيان؟"
30
+ query_embed = model.encode(query, normalize_embeddings=True)
31
+
32
+
33
+ similarities = np.dot(docs_embed, query_embed.T)
34
+
35
+ similarities.shape
36
+ similarities
37
+
38
+ top_3_idx = np.argsort(similarities, axis=0)[-3:][::-1].tolist()
39
+ top_3_idx
40
+
41
+ most_similar_documents = [paragraphs[idx] for idx in top_3_idx]
42
+
43
+ CONTEXT = ""
44
+ for i, p in enumerate(most_similar_documents):
45
+ wrapped_text = textwrap.fill(p, width=100)
46
+
47
+ print("-----------------------------------------------------------------")
48
+ print(wrapped_text)
49
+ print("-----------------------------------------------------------------")
50
+ CONTEXT += wrapped_text + "\n\n"
51
+
52
+ prompt = f"""
53
+ use the following CONTEXT to answer the QUESTION at the end.
54
+ If you don't know the answer, just say that you don't know, don't try to make up an answer.
55
+
56
+ CONTEXT: {CONTEXT}
57
+ QUESTION: {query}
58
+ """
59
+
60
+ client = OpenAI(api_key=userdata.get('OPENAI_API_KEY'))
61
+
62
+ response = client.chat.completions.create(
63
+ model="gpt-4o",
64
+ messages=[
65
+ {"role": "user", "content": prompt},
66
+ ]
67
+ )
68
+
69
+ print(response.choices[0].message.content)