Papajams commited on
Commit
c01403d
1 Parent(s): e27a1aa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -38
app.py CHANGED
@@ -1,40 +1,66 @@
1
  import streamlit as st
2
  import requests
3
- from bs4 import BeautifulSoup
4
-
5
- def search_arxiv(search_query: str, start: int = 0):
6
- base_url = 'http://export.arxiv.org/api/query?'
7
- query = f'search_query={search_query}&start={start}&max_results=10'
8
- response = requests.get(base_url + query)
9
- feed = BeautifulSoup(response.content, 'html.parser')
10
- entries = feed.find_all('entry')
11
- articles = []
12
- for entry in entries:
13
- article = {}
14
- article['title'] = entry.title.text
15
- article['authors'] = [author.find('name').text for author in entry.find_all('author')]
16
- article['abstract'] = entry.summary.text
17
- articles.append(article)
18
- return articles
19
-
20
- def get_paper_info(paper_id: str):
21
- base_url = 'https://api.semanticscholar.org/v1/paper/'
22
- response = requests.get(base_url + paper_id)
23
- paper = response.json()
24
- paper_info = {}
25
- paper_info['title'] = paper['title']
26
- paper_info['authors'] = [author['name'] for author in paper['authors']]
27
- paper_info['abstract'] = paper['abstract']
28
- paper_info['fieldsOfStudy'] = paper['fieldsOfStudy']
29
- return paper_info
30
-
31
- st.title('Scientific Data Substantiator')
32
- search_query = st.text_input("Enter your search term")
33
-
34
- if search_query:
35
- articles = search_arxiv(search_query)
36
- for article in articles:
37
- st.write("Title: ", article['title'])
38
- st.write("Authors: ", ", ".join(article['authors']))
39
- st.write("Abstract: ", article['abstract'])
40
- st.write("-----")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import requests
3
+ from transformers import AutoTokenizer, AutoModel
4
+
5
+ # Load SciBERT pre-trained model and tokenizer
6
+ model_name = "allenai/scibert_scivocab_uncased"
7
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
8
+ model = AutoModel.from_pretrained(model_name)
9
+
10
+ def calculate_similarity(claim, document):
11
+ # Tokenize claim and document
12
+ inputs = tokenizer.encode_plus(claim, document, return_tensors='pt', padding=True, truncation=True)
13
+
14
+ # Generate embeddings for claim and document
15
+ with torch.no_grad():
16
+ claim_embeddings = model(**inputs)['pooler_output']
17
+
18
+ # Compute cosine similarity between embeddings
19
+ similarity = torch.cosine_similarity(claim_embeddings, document_embeddings).item()
20
+
21
+ return similarity
22
+
23
+ def search_papers(user_input):
24
+ # Implement your code to fetch search results from the desired source (e.g., arXiv, Semantic Scholar)
25
+ # ...
26
+
27
+ # For the purpose of this example, we'll use dummy data
28
+ search_results = [
29
+ {
30
+ 'title': 'Paper 1 Title',
31
+ 'abstract': 'Paper 1 Abstract',
32
+ 'authors': ['Author 1', 'Author 2'],
33
+ 'url': 'https://example.com/paper1'
34
+ },
35
+ {
36
+ 'title': 'Paper 2 Title',
37
+ 'abstract': 'Paper 2 Abstract',
38
+ 'authors': ['Author 3', 'Author 4'],
39
+ 'url': 'https://example.com/paper2'
40
+ },
41
+ {
42
+ 'title': 'Paper 3 Title',
43
+ 'abstract': 'Paper 3 Abstract',
44
+ 'authors': ['Author 5', 'Author 6'],
45
+ 'url': 'https://example.com/paper3'
46
+ }
47
+ ]
48
+
49
+ return search_results
50
+
51
+ st.title('The Substantiator')
52
+
53
+ user_input = st.text_input('Input your claim')
54
+
55
+ if st.button('Substantiate'):
56
+ search_results = search_papers(user_input)
57
+ if search_results is not None and len(search_results) > 0:
58
+ for result in search_results:
59
+ st.write(result["title"])
60
+ st.write(result["abstract"])
61
+ st.write("Authors: ", ", ".join(result["authors"]))
62
+ similarity = calculate_similarity(user_input, result["abstract"])
63
+ st.write("Similarity Score: ", similarity)
64
+ st.write("-----")
65
+ else:
66
+ st.write("No results found.")