ajaynagotha commited on
Commit
3435406
·
verified ·
1 Parent(s): 6c61576

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -38
app.py CHANGED
@@ -1,53 +1,27 @@
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
- from sentence_transformers import SentenceTransformer, util
4
- import torch
5
- from datasets import load_dataset
6
 
7
- # Load the model and tokenizer
8
  model_name = "google/flan-t5-xl"
9
  tokenizer = AutoTokenizer.from_pretrained(model_name)
10
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
11
 
12
- # Load the Gita dataset
13
- ds = load_dataset("knowrohit07/gita_dataset")
14
- chapters = ds['train']['Chapter']
15
- sentence_ranges = ds['train']['sentence_range']
16
- texts = ds['train']['Text']
17
-
18
- # Load a sentence transformer model for semantic search
19
- sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
20
-
21
- # Encode all texts for faster similarity search
22
- text_embeddings = sentence_model.encode(texts, convert_to_tensor=True)
23
-
24
- def find_relevant_texts(query, top_k=3):
25
- query_embedding = sentence_model.encode(query, convert_to_tensor=True)
26
- cos_scores = util.cos_sim(query_embedding, text_embeddings)[0]
27
- top_results = torch.topk(cos_scores, k=top_k)
28
-
29
- relevant_texts = []
30
- for score, idx in zip(top_results[0], top_results[1]):
31
- relevant_texts.append(f"Chapter {chapters[idx]}, Verses {sentence_ranges[idx]}: {texts[idx]}")
32
-
33
- return "\n\n".join(relevant_texts)
34
 
35
  def generate_response(question):
36
- relevant_texts = find_relevant_texts(question)
37
-
38
- prompt = f"""Based on the following excerpts from the Bhagavad Gita, answer the question.
39
-
40
- Relevant excerpts:
41
- {relevant_texts}
42
-
43
- Question: {question}
44
-
45
- Answer:"""
46
-
47
  input_ids = tokenizer(prompt, return_tensors="pt").input_ids
48
  outputs = model.generate(input_ids, max_new_tokens=200, do_sample=True, temperature=0.7, top_p=0.95)
49
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
50
-
51
  return response
52
 
53
  iface = gr.Interface(
 
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 
 
 
3
 
 
4
  model_name = "google/flan-t5-xl"
5
  tokenizer = AutoTokenizer.from_pretrained(model_name)
6
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
7
 
8
+ gita_context = """
9
+ The Bhagavad Gita is a 700-verse Hindu scripture that is part of the Indian epic Mahabharata. It is a dialogue between Prince Arjuna and Lord Krishna, who serves as his charioteer. The Gita's core message includes:
10
+ 1. The immortality of the soul (Atman)
11
+ 2. The nature of action (Karma) and duty (Dharma)
12
+ 3. The importance of devotion (Bhakti)
13
+ 4. The pursuit of knowledge (Jnana) and wisdom
14
+ 5. Different types of Yoga: Karma Yoga, Bhakti Yoga, Jnana Yoga, and Raja Yoga
15
+ 6. The concept of detachment from the fruits of one's actions
16
+ 7. The divine nature of Krishna as an avatar of Vishnu
17
+ Key teachings include performing one's duty without attachment to results, the importance of self-realization, and the path to liberation (Moksha).
18
+ """
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  def generate_response(question):
21
+ prompt = f"Based on the following context about the Bhagavad Gita, answer the question.\n\nContext: {gita_context}\n\nQuestion: {question}\n\nAnswer:"
 
 
 
 
 
 
 
 
 
 
22
  input_ids = tokenizer(prompt, return_tensors="pt").input_ids
23
  outputs = model.generate(input_ids, max_new_tokens=200, do_sample=True, temperature=0.7, top_p=0.95)
24
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
25
  return response
26
 
27
  iface = gr.Interface(