Kushwanth Chowday Kandala commited on
Commit
4997aeb
1 Parent(s): 9780a75

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -0
app.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ from streamlit_chat import message
4
+
5
+
6
+ from datasets import load_dataset
7
+
8
+ dataset = load_dataset("wikipedia", "20220301.en", split="train[240000:290000]")
9
+
10
+
11
+ wikidata = []
12
+
13
+ for record in dataset:
14
+ wikidata.append(record["text"])
15
+
16
+ wikidata = list(set(wikidata))
17
+ # print("\n".join(wikidata[:5]))
18
+ # print(len(wikidata))
19
+
20
+ from sentence_transformers import SentenceTransformer
21
+ import torch
22
+
23
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
24
+
25
+ if device != 'cuda':
26
+ print(f"you are using {device}. This is much slower than using "
27
+ "a CUDA-enabled GPU. If on colab you can chnage this by "
28
+ "clicking Runtime > change runtime type > GPU.")
29
+
30
+ model = SentenceTransformer("all-MiniLM-L6-v2", device=device)
31
+
32
+ # Creating a Index(Pinecone Vector Database)