File size: 4,619 Bytes
b281633
 
 
 
3da5b11
b281633
 
 
59feffc
 
a08e894
59feffc
 
 
a08e894
b40a7cf
a08e894
59feffc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b281633
 
 
 
 
 
 
 
 
 
 
 
 
e1f11d0
b281633
 
 
 
 
 
e1f11d0
b281633
 
 
 
24c9f0c
 
b281633
 
 
 
 
 
 
 
 
 
 
2a551bf
 
 
 
 
 
 
 
b78d1fc
27c4510
 
 
1a0b379
 
 
 
01e2624
 
 
b281633
 
94d28e2
b6bdeb4
a08e894
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
from transformers import pipeline
import wikipedia
import random
import gradio as gr
import csv
model_name = "deepset/electra-base-squad2"
nlp = pipeline('question-answering', model=model_name, tokenizer=model_name)

# dataset save ------------------------------------
import huggingface_hub
import os
from huggingface_hub import Repository, hf_hub_download, upload_file
from datetime import datetime
# created new dataset as awacke1/MindfulStory.csv
DATASET_REPO_URL = "https://huggingface.co/datasets/awacke1/WikipediaSearch"
DATASET_REPO_ID = "awacke1/WikipediaSearch"
DATA_FILENAME = "WikipediaSearch.csv"
DATA_FILE = os.path.join("data", DATA_FILENAME)
HF_TOKEN = os.environ.get("HF_TOKEN")
# Download dataset repo using hub download
try:
    hf_hub_download(
        repo_id=DATASET_REPO_ID,
        filename=DATA_FILENAME,
        cache_dir=DATA_DIRNAME,
        force_filename=DATA_FILENAME
    )
except:
    print("file not found")
def AIMemory(title: str, story: str):
    if title and story:
        with open(DATA_FILE, "a") as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=["title", "story", "time"])
            writer.writerow({"title": title, "story": story, "time": str(datetime.now())})
        commit_url = repo.push_to_hub()
    return ""
# Set up cloned dataset from repo for operations
repo = Repository(
    local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN
)
# dataset save ------------------------------------


def get_wiki_article(topic):
    topic=topic
    try:
        search = wikipedia.search(topic, results = 1)[0]
    except wikipedia.DisambiguationError as e:
        choices = [x for x in e.options if ('disambiguation' not in x) and ('All pages' not in x) and (x!=topic)]
        search = random.choice(choices)
    try:
        p = wikipedia.page(search)
    except wikipedia.exceptions.DisambiguationError as e:
        choices = [x for x in e.options if ('disambiguation' not in x) and ('All pages' not in x) and (x!=topic)]
        s = random.choice(choices)
        p = wikipedia.page(s)
        saved = AIMemory(p.content, p.url)
    return p.content, p.url

def get_answer(topic, question):
    w_art, w_url=get_wiki_article(topic)
    qa = {'question': question, 'context': w_art}
    res = nlp(qa)
    saved = AIMemory(res['answer'], w_url)
    return res['answer'], w_url, {'confidence':res['score']}


inputs = [
          gr.inputs.Textbox(lines=2, label="Topic"),
          gr.inputs.Textbox(lines=2, label="Question")
]
outputs = [
            gr.outputs.Textbox(type='str',label="Answer"),
            gr.outputs.Textbox(type='str',label="Wikipedia Reference Article"),
            gr.outputs.Label(type="confidences",label="Confidence in answer (assuming the correct wikipedia article)"),
]

title = "AI Wikipedia Search"
description = 'Contextual Question and Answer'
article = ''
examples = [
    ['Health and fitness', 'What is the DSM-IV?'],
    ['Technology and applied sciences', 'List of military strategies and concepts?'],
    ['Culture and the arts', 'What films are considered the best?'],
    ['Health and fitness', 'What are the types of psychotherapies?'],
    ['Health and fitness', 'What are macronutrients?'],
    ['Health and fitness', 'What are micronutrients?'],
    ['Health and fitness', 'What are Nootropics?'],
    ['Health and fitness', 'What is the timeline of psychology?'],
    ['Cicero', 'What quotes did Marcus Tullius Cicero make?'],
    ['Alzheimers', 'What causes alzheimers?'],
    ['Neuropathy', 'With neuropathy and neuro-muskoskeletal issues, and what are the treatments available?'],
    ['Chemotherapy', 'What are possible care options for patients in chemotherapy?'],
    ['Health', 'What is mindfulness and how does it affect health?'],
    ['Medicine', 'In medicine what is the Hippocratic Oath?'],
    ['Insurance', 'What is Medicare?'],
    ['Financial Services', 'Does Medicaid offer financial assistance?'],
    ['Ontology', 'Why is an anthology different than ontology?'],   
    ['Taxonomy', 'What is a biology taxonomy?'],
    ['Pharmacy', 'What does a pharmacist do?']     
]

gr.Interface(get_answer, inputs, outputs, title=title, description=description, examples=examples, 
article="Saved dataset: https://huggingface.co/datasets/awacke1/WikipediaSearch stores search and the result url.  List of topics is at https://en.wikipedia.org/wiki/Wikipedia:Contents/Lists and wikipedia library docs are here: https://pypi.org/project/wikipedia/",
flagging_options=["strongly related","related", "neutral", "unrelated", "strongly unrelated"]).launch(share=False,enable_queue=False)