paulokewunmi commited on
Commit
fb2b8cf
1 Parent(s): 204b140

Add application file

Browse files
.DS_Store ADDED
Binary file (6.15 kB). View file
 
app.py ADDED
@@ -0,0 +1,300 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding: utf-8
2
+
3
+ import gradio as gr
4
+ from src.document_utils import (
5
+ summarize,
6
+ question_answer,
7
+ generate_questions,
8
+ load_history,
9
+ load_science,
10
+ )
11
+ from src.wiki_search import cross_lingual_document_search, document_source
12
+ from src.theme import CustomTheme
13
+
14
+ max_search_results = 3
15
+
16
+
17
+ def reset_chatbot():
18
+ return gr.update(value="")
19
+
20
+
21
+ def get_user_input(input_question, history):
22
+ return "", history + [[input_question, None]]
23
+
24
+
25
+ def study_doc_qa_bot(input_document, history):
26
+ bot_message = question_answer(input_document, history)
27
+ history[-1][1] = bot_message
28
+ return history
29
+
30
+ def my_function(file_object):
31
+ pass
32
+
33
+
34
+ custom_theme = CustomTheme()
35
+
36
+
37
+ with gr.Blocks(theme=custom_theme) as demo:
38
+ gr.HTML(
39
+ """<html><center><img src='file/logo/omowe_logo.png', alt='omowe.ai logo', width=820, height=312 /></center><br></html>"""
40
+ )
41
+
42
+ qa_bot_state = gr.State(value=[])
43
+
44
+ with gr.Tabs():
45
+
46
+ with gr.TabItem("Document Search"):
47
+ gr.HTML(
48
+ """<p style="text-align:center;font-size:24px;"><b>Search across a set of study materials in your own native language or even a mix of languages.</p>"""
49
+ )
50
+ gr.HTML(
51
+ """<p style="text-align:center; font-style:italic; font-size:16px;">Get started with a pre-indexed set of study materials spaning various subjects (History, Literature, Philosophy, Government etc) in 4 different languages.</p>"""
52
+ )
53
+
54
+ with gr.Row():
55
+ text_match = gr.CheckboxGroup(
56
+ ["Full Text Search"], label="find exact text in documents"
57
+ )
58
+
59
+ with gr.Row():
60
+ lang_choices = gr.CheckboxGroup(
61
+ [
62
+ "English",
63
+ "Yoruba",
64
+ "Igbo",
65
+ "Hausa",
66
+ ],
67
+ label="Filter results based on language",
68
+ )
69
+
70
+ with gr.Row():
71
+ with gr.Column():
72
+ user_query = gr.Text(
73
+ label="Enter query here",
74
+ placeholder="Search through all your documents",
75
+ )
76
+
77
+ num_search_results = gr.Slider(
78
+ 1,
79
+ max_search_results,
80
+ visible=False,
81
+ value=max_search_results,
82
+ step=1,
83
+ interactive=True,
84
+ label="How many search results to show:",
85
+ )
86
+
87
+ with gr.Row():
88
+
89
+ with gr.Column():
90
+ query_match_out_1 = gr.Textbox(
91
+ label= f"Search Result 1"
92
+ )
93
+
94
+ with gr.Column():
95
+ with gr.Accordion("Click to View Source", open=True):
96
+ translate_1 = gr.Button(
97
+ label="Go to Source",
98
+ value="Get Sources",
99
+ variant="primary",
100
+ )
101
+ translate_res_1 = gr.Textbox(
102
+ label=f"Source Url"
103
+ )
104
+
105
+ with gr.Row():
106
+ with gr.Column():
107
+ query_match_out_2 = gr.Textbox(label=f"Search Result 2")
108
+
109
+ with gr.Column():
110
+ with gr.Accordion("Click to View Source", open=False):
111
+ translate_res_2 = gr.Textbox(
112
+ label=f"Source Url"
113
+ )
114
+
115
+ with gr.Row():
116
+ with gr.Column():
117
+ query_match_out_3 = gr.Textbox(label=f"Search Result 3")
118
+
119
+ with gr.Column():
120
+ with gr.Accordion("Click to View Source", open=False):
121
+ translate_res_3 = gr.Textbox(
122
+ label=f"Source Url"
123
+ )
124
+
125
+ with gr.TabItem("Q&A"):
126
+ gr.HTML(
127
+ """<p style="text-align:center; font-size:16px;"><b>Looking to breeze through your study materials effortlessly? Simply upload your documents and fire away any questions you have!</p>"""
128
+ )
129
+
130
+ with gr.Row():
131
+ with gr.Column():
132
+ input_document = gr.Text(label="Copy your document here", lines=2)
133
+ input_document_pdf = gr.inputs.File(label="Uplaod file")
134
+
135
+
136
+ with gr.Column():
137
+ chatbot = gr.Chatbot(label="Chat History")
138
+ input_question = gr.Text(
139
+ label="Ask a question",
140
+ placeholder="Type a question here and hit enter.",
141
+ )
142
+ clear = gr.Button("Clear", variant="primary")
143
+
144
+ with gr.Row():
145
+ with gr.Accordion("Show example inputs I can load:", open=False):
146
+
147
+ example_2 = gr.Button(
148
+ "Load History of Nigeria", variant="primary"
149
+ )
150
+ example_1 = gr.Button(
151
+ "Load Science of Photosynthesis", variant="primary"
152
+ )
153
+
154
+ with gr.TabItem("Summarize"):
155
+ gr.HTML(
156
+ """<p style="text-align:center; font-size:24px;"><b> Get the most out of your study materials!</p>"""
157
+ )
158
+ gr.HTML(
159
+ """<p style="text-align:center; font-size:16px;"><b>You can easily upload your documents and generate quick summaries and practice questions in a flash.</p>"""
160
+ )
161
+
162
+ with gr.Row():
163
+ with gr.Column():
164
+ summary_input = gr.Text(label="Document", lines=5)
165
+ with gr.Column():
166
+ summary_output = gr.Text(label="Generated Summary", lines=5)
167
+ invisible_comp = gr.Text(label="Dummy Component", visible=False)
168
+ with gr.Row():
169
+ generate_summary = gr.Button("Generate Summary", variant="primary")
170
+ with gr.Row():
171
+ with gr.Column():
172
+ with gr.Accordion("Summary Settings", open=False):
173
+ summary_length = gr.Radio(
174
+ ["short", "medium", "long"],
175
+ label="Summary Length",
176
+ value="long",
177
+ )
178
+
179
+ summary_format = gr.Radio(
180
+ ["paragraph", "bullets"],
181
+ label="Summary Format",
182
+ value="bullets",
183
+ )
184
+ extractiveness = gr.Radio(
185
+ ["low", "medium", "high"],
186
+ label="Extractiveness",
187
+ info="Controls how close to the original text the summary is.",
188
+ visible=False,
189
+ value="high",
190
+ )
191
+ temperature = gr.Slider(
192
+ minimum=0,
193
+ maximum=5.0,
194
+ value=0.64,
195
+ step=0.1,
196
+ interactive=True,
197
+ visible=False,
198
+ label="Temperature",
199
+ info="Controls the randomness of the output. Lower values tend to generate more “predictable” output, while higher values tend to generate more “creative” output.",
200
+ )
201
+
202
+ with gr.Row():
203
+ generate_questions_btn = gr.Button("Generate Questions", variant="primary")
204
+ with gr.Row():
205
+ generate_output = gr.Text(label="Generated Questions", lines=5)
206
+ with gr.Row():
207
+ with gr.Accordion("Show example inputs I can load:", open=False):
208
+ example_4 = gr.Button(
209
+ "Load History of Nigeria", variant="primary"
210
+ )
211
+ example_3 = gr.Button(
212
+ "Load Science of Photosynthesis", variant="primary"
213
+ )
214
+
215
+ # fetch answer for submitted question corresponding to input document
216
+ input_question.submit(
217
+ get_user_input,
218
+ [input_question, chatbot],
219
+ [input_question, chatbot],
220
+ queue=False,
221
+ ).then(study_doc_qa_bot, [input_document, chatbot], chatbot)
222
+
223
+ # reset the chatbot Q&A history when input document changes
224
+ input_document.change(fn=reset_chatbot, inputs=[], outputs=chatbot)
225
+
226
+ # Loading examples on click for Q&A module
227
+ example_1.click(
228
+ load_history,
229
+ [],
230
+ [input_document, input_question],
231
+ queue=False,
232
+ )
233
+
234
+ example_2.click(
235
+ load_science,
236
+ [],
237
+ [input_document, input_question],
238
+ queue=False,
239
+ )
240
+
241
+ # Loading examples on click for Q&A module
242
+ example_3.click(
243
+ load_history,
244
+ [],
245
+ [summary_input, invisible_comp],
246
+ queue=False,
247
+ )
248
+
249
+ example_4.click(
250
+ load_science,
251
+ [],
252
+ [summary_input, invisible_comp],
253
+ queue=False,
254
+ )
255
+
256
+ # generate summary corresponding to document submitted by the user.
257
+ generate_summary.click(
258
+ summarize,
259
+ [summary_input, summary_length, summary_format, extractiveness, temperature],
260
+ [summary_output],
261
+ queue=False,
262
+ )
263
+
264
+ generate_questions_btn.click(
265
+ generate_questions,
266
+ [summary_input],
267
+ [generate_output],
268
+ queue=False,
269
+ )
270
+
271
+ # clear the chatbot Q&A history when this button is clicked by the user
272
+ clear.click(lambda: None, None, chatbot, queue=False)
273
+
274
+ # run search as user is typing the query
275
+ user_query.change(
276
+ cross_lingual_document_search,
277
+ [user_query, num_search_results, lang_choices, text_match],
278
+ [query_match_out_1, query_match_out_2, query_match_out_3],
279
+ queue=False,
280
+ )
281
+
282
+ # run search if user submits query
283
+ user_query.submit(
284
+ cross_lingual_document_search,
285
+ [user_query, num_search_results, lang_choices, text_match],
286
+ [query_match_out_1, query_match_out_2, query_match_out_3],
287
+ queue=False,
288
+ )
289
+
290
+ # translate results corresponding to 1st search result obtained if user clicks 'Translate'
291
+ translate_1.click(
292
+ document_source,
293
+ [user_query, num_search_results, lang_choices, text_match],
294
+ [translate_res_1,translate_res_2,translate_res_3],
295
+ queue=False,
296
+ )
297
+
298
+
299
+ if __name__ == "__main__":
300
+ demo.launch(debug=True)
logo/omowe_logo.png ADDED
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ cohere
2
+ qdrant_client
3
+ gradio
4
+ langchain
5
+ black
src/.DS_Store ADDED
Binary file (6.15 kB). View file
 
src/constants.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # name of cohere's summarization model
2
+ SUMMARIZATION_MODEL = "summarize-xlarge"
3
+
4
+ # whether to use multilingual embeddings to represent the documents or not
5
+ USE_MULTILINGUAL_EMBEDDING = True
6
+
7
+ # name of cohere's multilingual embedding model
8
+ MULTILINGUAL_EMBEDDING_MODEL = "multilingual-22-12"
9
+
10
+ # The name with which you want to create a collection in Qdrant
11
+ CREATE_QDRANT_COLLECTION_NAME = "wiki-embed"
12
+
13
+ # name of cohere's model which will be used for generating the translation of an input document
14
+ TEXT_GENERATION_MODEL = "command-xlarge-nightly"
15
+
16
+
17
+ EXAMPLES_FILE_PATH = "src/example.csv"
src/document_utils.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+
4
+ import pandas as pd
5
+ from typing import List
6
+ from dotenv import load_dotenv
7
+
8
+ import cohere
9
+ from langchain.embeddings.cohere import CohereEmbeddings
10
+ from langchain.llms import Cohere
11
+ from langchain.prompts import PromptTemplate
12
+ from langchain.vectorstores import Qdrant
13
+ from langchain.chains.question_answering import load_qa_chain
14
+
15
+ sys.path.append(os.path.abspath('..'))
16
+
17
+ from src.constants import SUMMARIZATION_MODEL, EXAMPLES_FILE_PATH
18
+
19
+
20
+ # load environment variables
21
+ CWD = os.path.dirname(os.path.abspath(__file__))
22
+ dotenv_path = os.path.join(os.path.dirname(CWD), ".env")
23
+ load_dotenv(dotenv_path)
24
+ # load environment variables
25
+ QDRANT_HOST = os.getenv("QDRANT_HOST")
26
+ QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
27
+ COHERE_API_KEY = os.getenv("COHERE_API_KEY")
28
+
29
+
30
+ def replace_text(text):
31
+ if text.startswith("The answer is "):
32
+ text = text.replace("The answer is ", "", 1)
33
+ return text
34
+
35
+
36
+ def summarize(
37
+ document: str,
38
+ summary_length: str,
39
+ summary_format: str,
40
+ extractiveness: str = "high",
41
+ temperature: float = 0.6,
42
+ ) -> str:
43
+ """
44
+ Generates a summary for the input document using Cohere's summarize API.
45
+ Args:
46
+ document (`str`):
47
+ The document given by the user for which summary must be generated.
48
+ summary_length (`str`):
49
+ A value such as 'short', 'medium', 'long' indicating the length of the summary.
50
+ summary_format (`str`):
51
+ This indicates whether the generated summary should be in 'paragraph' format or 'bullets'.
52
+ extractiveness (`str`, *optional*, defaults to 'high'):
53
+ A value such as 'low', 'medium', 'high' indicating how close the generated summary should be in meaning to the original text.
54
+ temperature (`str`):
55
+ This controls the randomness of the output. Lower values tend to generate more “predictable” output, while higher values tend to generate more “creative” output.
56
+ Returns:
57
+ generated_summary (`str`):
58
+ The generated summary from the summarization model.
59
+ """
60
+
61
+ summary_response = cohere.Client(COHERE_API_KEY).summarize(
62
+ text=document,
63
+ length=summary_length,
64
+ format=summary_format,
65
+ model=SUMMARIZATION_MODEL,
66
+ extractiveness=extractiveness,
67
+ temperature=temperature,
68
+ )
69
+ generated_summary = summary_response.summary
70
+ return generated_summary
71
+
72
+
73
+ def question_answer(input_document: str, history: List) -> str:
74
+ """
75
+ Generates an appropriate answer for the question asked by the user based on the input document.
76
+ Args:
77
+ input_document (`str`):
78
+ The document given by the user for which summary must be generated.
79
+ history (`List[List[str,str]]`):
80
+ A list made up of pairs of input question asked by the user & corresponding generated answers. It is used to keep track of the history of the chat between the user and the model.
81
+ Returns:
82
+ answer (`str`):
83
+ The generated answer corresponding to the input question and document received from the user.
84
+ """
85
+ context = input_document
86
+ # The last element of the `history` list contains the most recent question asked by the user whose answer needs to be generated.
87
+ question = history[-1][0]
88
+ word_list = context.split()
89
+ # texts = [context[k : k + 256] for k in range(0, len(context.split()), 256)]
90
+ texts = [" ".join(word_list[k : k + 256]) for k in range(0, len(word_list), 256)]
91
+
92
+ # print(texts)
93
+
94
+ embeddings = CohereEmbeddings(
95
+ model="multilingual-22-12", cohere_api_key=COHERE_API_KEY
96
+ )
97
+ context_index = Qdrant.from_texts(
98
+ texts, embeddings, url=QDRANT_HOST, api_key=QDRANT_API_KEY
99
+ )
100
+
101
+ prompt_template = """Text: {context}
102
+ Question: {question}
103
+ Answer the question based on the text provided. If the text doesn't contain the answer, reply that the answer is not available."""
104
+
105
+ PROMPT = PromptTemplate(
106
+ template=prompt_template, input_variables=["context", "question"]
107
+ )
108
+
109
+ # Generate the answer given the context
110
+ chain = load_qa_chain(
111
+ Cohere(
112
+ model="command-xlarge-nightly", temperature=0, cohere_api_key=COHERE_API_KEY
113
+ ),
114
+ chain_type="stuff",
115
+ prompt=PROMPT,
116
+ )
117
+ relevant_context = context_index.similarity_search(question)
118
+ answer = chain.run(input_documents=relevant_context, question=question)
119
+ answer = answer.replace("\n", "").replace("Answer:", "")
120
+ answer = replace_text(answer)
121
+ return answer
122
+
123
+ def generate_questions(input_document: str) -> str:
124
+ generated_response = cohere.Client(COHERE_API_KEY).generate(
125
+ prompt = f"Give me 5 different questions to test understanding of the following text provided. Here's the provided text: {input_document}. Now what is Questions 1 to 5 ?:",
126
+ max_tokens = 200,
127
+ temperature = 0.55
128
+ )
129
+ # prompt = f"Generate 5 different quiz questions to test the understanding of the following text. Here's the provided text: {input_document}. Whats Questions 1 to 5 of the quiz ?:"
130
+ # print(prompt)
131
+ return generated_response.generations[0].text
132
+
133
+
134
+ def load_science():
135
+ examples_df = pd.read_csv(EXAMPLES_FILE_PATH)
136
+ science_doc = examples_df["doc"].iloc[0]
137
+ sample_question = examples_df["question"].iloc[0]
138
+ return science_doc, sample_question
139
+
140
+
141
+ def load_history():
142
+ examples_df = pd.read_csv(EXAMPLES_FILE_PATH)
143
+ history_doc = examples_df["doc"].iloc[1]
144
+ sample_question = examples_df["question"].iloc[1]
145
+ return history_doc, sample_question
146
+
147
+
148
+ if __name__ == "__main__":
149
+ with open('sample_text.txt', 'r') as file:
150
+ text = file.read()
151
+ # summary = summarize(text, summary_length="short", summary_format="bullets")
152
+ # print(summary)
153
+ # answer = question_answer(text, [["what is photosynthesis", None]])
154
+ # print(answer)
155
+ question = question_answer(text, ["Whats photosynthesis"])
156
+ print(question)
src/example.csv ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ id,doc,question
2
+ 1,"
3
+ Nigeria (/naɪˈdʒɪəriə/ Listen ny-JEER-ee-ə),[a] officially the Federal Republic of Nigeria, is a country in West Africa. It is situated between the Sahel to the north and the Gulf of Guinea to the south in the Atlantic Ocean. It covers an area of 923,769 square kilometres (356,669 sq mi), and with a population of over 230 million, it is the most populous country in Africa, and the world's sixth-most populous country. Nigeria borders Niger in the north, Chad in the northeast, Cameroon in the east, and Benin in the west. Nigeria is a federal republic comprising 36 states and the Federal Capital Territory, where the capital, Abuja, is located. The largest city in Nigeria is Lagos, one of the largest metropolitan areas in the world and the second-largest in Africa.
4
+
5
+ Nigeria has been home to several indigenous pre-colonial states and kingdoms since the second millennium BC, with the Nok civilization in the 15th century BC marking the first internal unification in the country. The modern state originated with British colonialization in the 19th century, taking its present territorial shape with the merging of the Southern Nigeria Protectorate and Northern Nigeria Protectorate in 1914 by Lord Lugard. The British set up administrative and legal structures while practising indirect rule through traditional chiefdoms in the Nigeria region.[9] Nigeria became a formally independent federation on 1 October 1960. It experienced a civil war from 1967 to 1970, followed by a succession of military dictatorships and democratically elected civilian governments until achieving a stable democracy in the 1999 presidential election. The 2015 general election was the first time an incumbent president failed to be re-elected.[10]
6
+
7
+ Nigeria is a multinational state inhabited by more than 250 ethnic groups speaking 500 distinct languages, all identifying with a wide variety of cultures.[11][12][13] The three largest ethnic groups are the Hausa in the north, Yoruba in the west, and Igbo in the east, together constituting over 60% of the total population.[14] The official language is English, chosen to facilitate linguistic unity at the national level.[15] Nigeria's constitution ensures freedom of religion[16] and it is home to some of the world's largest Muslim and Christian populations.[17] Nigeria is divided roughly in half between Muslims, who live mostly in the north, and Christians, who live mostly in the south; indigenous religions, such as those native to the Igbo and Yoruba ethnicities, are in the minority.[18]
8
+
9
+ Nigeria is a regional power in Africa and a middle and emerging power in international affairs. Nigeria's economy is the largest in Africa, the 31st-largest in the world by nominal GDP, and 26th-largest by PPP. Nigeria is often referred to as the Giant of Africa owing to its large population and economy[19] and is considered to be an emerging market by the World Bank. However, the country ranks very low in the Human Development Index and remains one of the most corrupt nations in the world.[20][21] Nigeria is a founding member of the African Union and a member of many international organizations, including the United Nations, the Commonwealth of Nations, NAM,[22] the Economic Community of West African States, Organisation of Islamic Cooperation and OPEC. It is also a member of the informal MINT group of countries and is one of the Next Eleven economies.
10
+
11
+ Etymology
12
+ The name Nigeria was taken from the Niger River running through the country. This name was coined on 8 January 1897, by the British journalist Flora Shaw, who later married Frederick Lugard, a British colonial administrator. The neighbouring Republic of Niger takes its name from the same river. The origin of the name Niger, which originally applied to only the middle reaches of the Niger River, is uncertain. The word is likely an alteration of the Tuareg name egerew n-igerewen used by inhabitants along the middle reaches of the river around Timbuktu before 19th-century European colonialism.[23][24]
13
+
14
+ Administrative Division.
15
+ Nigeria is divided into thirty-six states and one Federal Capital Territory, which are further sub-divided into 774 local government areas. In some contexts, the states are aggregated into six geopolitical zones: North West, North East, North Central, South West, South East, and South South.[128][129]
16
+
17
+ Nigeria has five cities with a population of over a million (from largest to smallest): Lagos, Kano, Ibadan, Benin City and Port Harcourt. Lagos is the largest city in Africa, with a population of over 12 million in its urban area.[130]
18
+
19
+ The south of the country in particular is characterised by very strong urbanisation and a relatively large number of cities. According to an estimate from 2015,[131] there are 20 cities in Nigeria with more than 500,000 inhabitants, including ten cities with a population of one million.
20
+
21
+ ","how did nigeria get her name, and who named her"
22
+ 2,"
23
+ Photosynthesis is a process used by plants and other organisms to convert light energy into chemical energy that, through cellular respiration, can later be released to fuel the organism's activities. Some of this chemical energy is stored in carbohydrate molecules, such as sugars and starches, which are synthesized from carbon dioxide and water – hence the name photosynthesis, from the Greek phōs (φῶς), ""light"", and synthesis (σύνθεσις), ""putting together"".[1][2][3] Most plants, algae, and cyanobacteria perform photosynthesis; such organisms are called photoautotrophs. Photosynthesis is largely responsible for producing and maintaining the oxygen content of the Earth's atmosphere, and supplies most of the energy necessary for life on Earth.[4]
24
+
25
+ Although photosynthesis is performed differently by different species, the process always begins when energy from light is absorbed by proteins called reaction centers that contain green chlorophyll (and other colored) pigments/chromophores. In plants, these proteins are held inside organelles called chloroplasts, which are most abundant in leaf cells, while in bacteria they are embedded in the plasma membrane. In these light-dependent reactions, some energy is used to strip electrons from suitable substances, such as water, producing oxygen gas. The hydrogen freed by the splitting of water is used in the creation of two further compounds that serve as short-term stores of energy, enabling its transfer to drive other reactions: these compounds are reduced nicotinamide adenine dinucleotide phosphate (NADPH) and adenosine triphosphate (ATP), the ""energy currency"" of cells.
26
+
27
+ In plants, algae and cyanobacteria, sugars are synthesized by a subsequent sequence of light-independent reactions called the Calvin cycle. In the Calvin cycle, atmospheric carbon dioxide is incorporated into already existing organic carbon compounds, such as ribulose bisphosphate (RuBP).[5] Using the ATP and NADPH produced by the light-dependent reactions, the resulting compounds are then reduced and removed to form further carbohydrates, such as glucose. In other bacteria, different mechanisms such as the reverse Krebs cycle are used to achieve the same end.
28
+
29
+ The first photosynthetic organisms probably evolved early in the evolutionary history of life and most likely used reducing agents such as hydrogen or hydrogen sulfide, rather than water, as sources of electrons.[6] Cyanobacteria appeared later; the excess oxygen they produced contributed directly to the oxygenation of the Earth,[7] which rendered the evolution of complex life possible. Today, the average rate of energy capture by photosynthesis globally is approximately 130 terawatts,[8][9][10] which is about eight times the current power consumption of human civilization.[11] Photosynthetic organisms also convert around 100–115 billion tons (91–104 Pg petagrams, or billion metric tons), of carbon into biomass per year.[12][13] That plants receive some energy from light – in addition to air, soil, and water – was first discovered in 1779 by Jan Ingenhousz.
30
+
31
+ Photosynthesis is vital for climate processes, as it captures carbon dioxide from the air and then binds carbon in plants and further in soils and harvested products. Cereals alone are estimated to bind 3,825 Tg (teragrams) or 3.825 Pg (petagrams) of carbon dioxide every year, i.e. 3.825 billion metric tons.[14]
32
+
33
+ There are four main factors influencing photosynthesis[clarification needed] and several corollary factors. The four main are:[96] Light irradiance and wavelength, Water absorption, Carbon dioxide concentration,Temperature.
34
+ Total photosynthesis is limited by a range of environmental factors. These include the amount of light available, the amount of leaf area a plant has to capture light (shading by other plants is a major limitation of photosynthesis), the rate at which carbon dioxide can be supplied to the chloroplasts to support photosynthesis, the availability of water, and the availability of suitable temperatures for carrying out photosynthesis.[97]
35
+ ",What are the factors responsible for photosynthesis
src/sample_text.txt ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Photosynthesis is a process used by plants and other organisms to convert light energy into chemical energy that,
2
+ through cellular respiration, can later be released to fuel the organism's activities. Some of this chemical energy
3
+ is stored in carbohydrate molecules, such as sugars and starches, which are synthesized from carbon dioxide and water – hence
4
+ the name photosynthesis, from the Greek phōs (φῶς), "light", and synthesis (σύνθεσις), "putting together".[1][2][3] Most plants,
5
+ algae, and cyanobacteria perform photosynthesis; such organisms are called photoautotrophs. Photosynthesis is largely responsible for producing and maintaining the oxygen content of the Earth's atmosphere, and supplies most of the energy necessary for life on Earth.[4]
6
+ Although photosynthesis is performed differently by different species, the process always begins when energy from light is
7
+ absorbed by proteins called reaction centers that contain green chlorophyll (and other colored) pigments/chromophores.
8
+ In plants, these proteins are held inside organelles called chloroplasts, which are most abundant in leaf cells,
9
+ while in bacteria they are embedded in the plasma membrane. In these light-dependent reactions, some energy is used
10
+ to strip electrons from suitable substances, such as water, producing oxygen gas. The hydrogen freed by the splitting
11
+ of water is used in the creation of two further compounds that serve as short-term stores of energy, enabling its
12
+ transfer to drive other reactions: these compounds are reduced nicotinamide adenine dinucleotide phosphate (NADPH) and
13
+ adenosine triphosphate (ATP), the "energy currency" of cells.
14
+
15
+ In plants, algae and cyanobacteria, sugars are synthesized by a subsequent sequence of light-independent
16
+ reactions called the Calvin cycle. In the Calvin cycle, atmospheric carbon dioxide is incorporated
17
+ into already existing organic carbon compounds, such as ribulose bisphosphate (RuBP).[5] Using the ATP and NADPH
18
+ produced by the light-dependent reactions, the resulting compounds are then reduced and removed to form further carbohydrates,
19
+ such as glucose. In other bacteria, different mechanisms such as the reverse Krebs cycle are used to achieve the same end.
src/theme.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ from typing import Iterable
3
+ import gradio as gr
4
+ from gradio.themes.base import Base
5
+ from gradio.themes import Soft
6
+ from gradio.themes.utils import colors, fonts, sizes
7
+
8
+
9
+ class CustomTheme(Base):
10
+ def __init__(
11
+ self,
12
+ *,
13
+ primary_hue: colors.Color | str = colors.blue,
14
+ secondary_hue: colors.Color | str = colors.cyan,
15
+ neutral_hue: colors.Color | str = colors.zinc,
16
+ spacing_size: sizes.Size | str = sizes.spacing_md,
17
+ radius_size: sizes.Size | str = sizes.radius_md,
18
+ text_size: sizes.Size | str = sizes.text_md,
19
+ ):
20
+ super().__init__(
21
+ primary_hue=primary_hue,
22
+ secondary_hue=secondary_hue,
23
+ neutral_hue=neutral_hue,
24
+ spacing_size=spacing_size,
25
+ radius_size=radius_size,
26
+ text_size=text_size,
27
+ )
28
+ self.name = "custom_theme"
29
+ super().set(
30
+ # Colors
31
+ background_fill_primary="*neutral_50",
32
+ slider_color="*primary_500",
33
+ slider_color_dark="*primary_600",
34
+ # Shadows
35
+ shadow_drop="0 1px 4px 0 rgb(0 0 0 / 0.1)",
36
+ shadow_drop_lg="0 2px 5px 0 rgb(0 0 0 / 0.1)",
37
+ # Block Labels
38
+ block_background_fill="white",
39
+ block_label_padding="*spacing_sm *spacing_md",
40
+ block_label_background_fill="*primary_100",
41
+ block_label_background_fill_dark="*primary_600",
42
+ block_label_radius="*radius_md",
43
+ block_label_text_size="*text_md",
44
+ block_label_text_weight="600",
45
+ block_label_text_color="*primary_500",
46
+ block_label_text_color_dark="*white",
47
+ block_title_radius="*block_label_radius",
48
+ block_title_padding="*block_label_padding",
49
+ block_title_background_fill="*block_label_background_fill",
50
+ block_title_text_weight="600",
51
+ block_title_text_color="*primary_500",
52
+ block_title_text_color_dark="*white",
53
+ block_label_margin="*spacing_md",
54
+ block_shadow="*shadow_drop_lg",
55
+ # Inputs
56
+ input_border_color="*neutral_50",
57
+ input_shadow="*shadow_drop",
58
+ input_shadow_focus="*shadow_drop_lg",
59
+ checkbox_shadow="none",
60
+ # Buttons
61
+ shadow_spread="6px",
62
+ button_shadow="*shadow_drop_lg",
63
+ button_shadow_hover="*shadow_drop_lg",
64
+ button_shadow_active="*shadow_inset",
65
+ button_primary_background_fill="linear-gradient(90deg, *primary_300, *secondary_400)",
66
+ button_primary_background_fill_hover="linear-gradient(90deg, *primary_200, *secondary_300)",
67
+ button_primary_text_color="white",
68
+ button_primary_background_fill_dark="linear-gradient(90deg, *primary_600, *secondary_800)",
69
+ button_primary_background_fill_hover_dark="*primary_500",
70
+ button_secondary_background_fill="white",
71
+ button_secondary_background_fill_hover="*neutral_100",
72
+ button_secondary_background_fill_hover_dark="*primary_500",
73
+ button_secondary_text_color="*neutral_800",
74
+ button_cancel_background_fill="*button_secondary_background_fill",
75
+ button_cancel_background_fill_hover="*button_secondary_background_fill_hover",
76
+ button_cancel_background_fill_hover_dark="*button_secondary_background_fill_hover",
77
+ button_cancel_text_color="*button_secondary_text_color",
78
+ # checkboxes
79
+ checkbox_label_shadow="*shadow_drop_lg",
80
+ checkbox_label_background_fill_selected="*primary_500",
81
+ checkbox_label_background_fill_selected_dark="*primary_600",
82
+ checkbox_border_width="1px",
83
+ checkbox_border_color="*neutral_100",
84
+ checkbox_border_color_dark="*neutral_600",
85
+ checkbox_background_color_selected="*primary_600",
86
+ checkbox_background_color_selected_dark="*primary_700",
87
+ checkbox_border_color_focus="*primary_500",
88
+ checkbox_border_color_focus_dark="*primary_600",
89
+ checkbox_border_color_selected="*primary_600",
90
+ checkbox_border_color_selected_dark="*primary_700",
91
+ checkbox_label_text_color_selected="white",
92
+ # Borders
93
+ block_border_width="0px",
94
+ panel_border_width="1px",
95
+ )
src/wiki_search.py ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import cohere
3
+ from typing import List
4
+ from dotenv import load_dotenv
5
+
6
+ from qdrant_client import QdrantClient
7
+ from qdrant_client import models
8
+
9
+ CWD = os.path.dirname(os.path.abspath(__file__))
10
+ dotenv_path = os.path.join(os.path.dirname(CWD), ".env")
11
+ load_dotenv(dotenv_path)
12
+ # load environment variables
13
+ QDRANT_HOST = os.getenv("QDRANT_HOST")
14
+ QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
15
+ COHERE_API_KEY = os.getenv("COHERE_API_KEY")
16
+
17
+ MODEL_NAME = "multilingual-22-12"
18
+ COLLECTION = "wiki-embed"
19
+
20
+ # create qdrant and cohere client
21
+ cohere_client = cohere.Client(COHERE_API_KEY)
22
+
23
+ qdrant_client = QdrantClient(
24
+ url=QDRANT_HOST,
25
+ prefer_grpc=True,
26
+ api_key=QDRANT_API_KEY,
27
+ )
28
+
29
+ def embed_user_query(user_query):
30
+
31
+ embeddings = cohere_client.embed(
32
+ texts=[user_query],
33
+ model=MODEL_NAME,
34
+ )
35
+ query_embedding = embeddings.embeddings[0]
36
+ return query_embedding, user_query
37
+
38
+
39
+ def search_wiki_for_query(
40
+ query_embedding,
41
+ num_results = 3,
42
+ user_query= "",
43
+ languages = [],
44
+ match_text = None,
45
+ ):
46
+ filters = []
47
+
48
+ language_mapping = {
49
+ "English": "en",
50
+ "Yoruba": "yo",
51
+ "Igbo": "ig",
52
+ "Hause": "ha",
53
+ }
54
+
55
+ # prepare filters to narrow down search results
56
+ # if the `match_text` list is not empty then create filter to find exact matching text in the documents
57
+ if match_text:
58
+ filters.append(
59
+ models.FieldCondition(
60
+ key="text",
61
+ match=models.MatchText(text=user_query),
62
+ )
63
+ )
64
+
65
+ # filter documents based on language before performing search:
66
+ if languages:
67
+ for lang in languages:
68
+ filters.append(
69
+ models.FieldCondition(
70
+ key="lang",
71
+ match=models.MatchValue(
72
+ value=language_mapping[lang],
73
+ ),
74
+ )
75
+ )
76
+
77
+ # perform search and get results
78
+ results = qdrant_client.search(
79
+ collection_name=COLLECTION,
80
+ query_filter=models.Filter(should=filters),
81
+ search_params=models.SearchParams(hnsw_ef=128, exact=False),
82
+ query_vector=query_embedding,
83
+ limit=num_results,
84
+ )
85
+ return results
86
+
87
+
88
+ def cross_lingual_document_search(
89
+ user_input: str, num_results: int, languages, text_match
90
+ ) -> List:
91
+ """
92
+ Wrapper function for performing search on the collection of documents for the given user query.
93
+ Prepares query embedding, retrieves search results, checks if expected number of search results are being returned.
94
+ Args:
95
+ user_input (`str`):
96
+ The user input based on which search will be performed.
97
+ num_results (`str`):
98
+ The number of expected search results.
99
+ languages (`str`):
100
+ The list of languages based on which search results must be filtered.
101
+ text_match (`str`):
102
+ A field based on which it is decided whether to perform full-text-match while performing search.
103
+ Returns:
104
+ final_results (`List[str]`):
105
+ A list containing the final search results corresponding to the given user input.
106
+ """
107
+ # create an embedding for the input query
108
+ query_embedding, _ = embed_user_query(user_input)
109
+
110
+ # retrieve search results
111
+ result = search_wiki_for_query(
112
+ query_embedding,
113
+ num_results,
114
+ user_input,
115
+ languages,
116
+ text_match,
117
+ )
118
+ final_results = [result[i].payload["text"] for i in range(len(result))]
119
+
120
+ # check if number of search results obtained (i.e. `final_results`) is matching with number of expected search results i.e. `num_results`
121
+ if num_results > len(final_results):
122
+ remaining_inputs = num_results - len(final_results)
123
+ for input in range(remaining_inputs):
124
+ final_results.append("")
125
+
126
+ return final_results
127
+
128
+ def document_source(
129
+ user_input: str, num_results: int, languages, text_match
130
+ ) -> List:
131
+ query_embedding, _ = embed_user_query(user_input)
132
+
133
+ # retrieve search results
134
+ result = search_wiki_for_query(
135
+ query_embedding,
136
+ num_results,
137
+ user_input,
138
+ languages,
139
+ text_match,
140
+ )
141
+ sources = [result[i].payload["url"] for i in range(len(result))]
142
+
143
+ # check if number of search results obtained (i.e. `final_results`) is matching with number of expected search results i.e. `num_results`
144
+ if num_results > len(sources):
145
+ remaining_inputs = num_results - len(sources)
146
+ for input in range(remaining_inputs):
147
+ sources.append("")
148
+
149
+ return sources
150
+
151
+
152
+ def translate_search_result():
153
+ pass
154
+
155
+ if __name__ == "__main__":
156
+ # query_embedding, user_query = embed_user_query("Who is the president of Nigeria")
157
+ # result = search_wiki_for_query(query_embedding,user_query=user_query)
158
+
159
+ # for item in result:
160
+ # print(item.payload["url"])
161
+ result = cross_lingual_document_search("Who is the president of Nigeria",
162
+ num_results=3,
163
+ languages=["Yoruba"],
164
+ text_match=False)
165
+ print(result, len(result))