Spaces:

biodatlab
/

NBDT-Recommendation-Engine

Runtime error

App Files Files Community

atrytone commited on Jun 29, 2023

Commit

f9f623d

•

1 Parent(s): 90369c2

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -205

app.py DELETED Viewed

@@ -1,205 +0,0 @@
-import gradio as gr
-from langchain.vectorstores import FAISS
-from langchain.embeddings import HuggingFaceEmbeddings
-from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig, pipeline
-import textwrap
-import torch
-prompt = 'BEGINNING OF CONVERSATION: USER: \
-I will provide you with two abstracts, I intend to use the author of the second to review the first. Tell me in a few words why or why not the second author is a good fit to review the first paper.\n\
-Abstract To Be Reviewed: '
-tokenizer = LlamaTokenizer.from_pretrained("samwit/koala-7b")
-base_model = LlamaForCausalLM.from_pretrained(
-    "samwit/koala-7b",
-    load_in_8bit=True,
-    device=-1,
-    device_map='auto',
-)
-pipe = pipeline(
-    "text-generation",
-    model=base_model,
-    tokenizer=tokenizer,
-    max_length=1024,
-    temperature=0.7,
-    top_p=0.95,
-    repetition_penalty=1.15,
-    device=-1
-)
-def wrap_text_preserve_newlines(text, width=110):
-    # Split the input text into lines based on newline characters
-    lines = text.split('\n')
-    # Wrap each line individually
-    wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
-    # Join the wrapped lines back together using newline characters
-    wrapped_text = '\n'.join(wrapped_lines)
-    return wrapped_text
-def create_miread_embed(sents, bundle):
-    tokenizer = bundle[0]
-    model = bundle[1]
-    model.cpu()
-    tokens = tokenizer(sents,
-                       max_length=512,
-                       padding=True,
-                       truncation=True,
-                       return_tensors="pt"
-                       )
-    device = torch.device('cpu')
-    tokens = tokens.to(device)
-    with torch.no_grad():
-        out = model.bert(**tokens)
-        feature = out.last_hidden_state[:, 0, :]
-    return feature.cpu()
-def get_matches(query, k):
-    matches = vecdb.similarity_search_with_score(query, k=k)
-    return matches
-def inference(query,k=30):
-    matches = get_matches(query,k)
-    j_bucket = {}
-    n_table = []
-    a_table = []
-    r_table = []
-    scores = [round(match[1].item(),3) for match in matches]
-    min_score = min(scores)
-    max_score = max(scores)
-    normaliser = lambda x: round(1 - (x-min_score)/max_score,3)
-    for i,match in enumerate(matches):
-        doc = match[0]
-        score = normaliser(round(match[1].item(),3))
-        title = doc.metadata['title']
-        author = eval(doc.metadata['authors'])[0]
-        date = doc.metadata.get('date','None')
-        link = doc.metadata.get('link','None')
-        submitter = doc.metadata.get('submitter','None')
-        journal = doc.metadata.get('journal','None')
-        # For journals
-        if journal not in j_bucket:
-            j_bucket[journal] = score
-        else:
-            j_bucket[journal] += score
-        # For authors
-        record = [i+1,
-                  score,
-                  author,
-                  title,
-                  link,
-                  date]
-        n_table.append(record)
-        # For abstracts
-        record = [i+1,
-                  title,
-                  author,
-                  submitter,
-                  journal,
-                  date,
-                  link,
-                  score
-        ]
-        a_table.append(record)
-        # For reviewer
-        output = pipe(prompt + query + '\n Candidate Abstract: ' + candidate + '\n')
-        r_record = [i+1,
-                    score,
-                    author,
-                    title,
-                    output[0]['generated_text'],
-                    link,
-                    date]
-        r_table.append(r_record)
-    j_table = sorted([[journal,score] for journal,score in j_bucket.items()],key= lambda x : x[1],reverse=True)
-    j_table = [[i+1,item[0],item[1]] for i,item in enumerate(j_table)]
-    j_output= gr.Dataframe.update(value=j_table,visible=True)
-    n_output= gr.Dataframe.update(value=n_table,visible=True)
-    a_output = gr.Dataframe.update(value=a_table,visible=True)
-    r_output = gr.Dataframe.update(value=r_table,visible=True)
-    return [a_output,j_output,n_output,r_output]
-model_name = "biodatlab/MIReAD-Neuro"
-model_kwargs = {'device': 'cpu'}
-encode_kwargs = {'normalize_embeddings': False}
-faiss_embedder = HuggingFaceEmbeddings(
-    model_name=model_name,
-    model_kwargs=model_kwargs,
-    encode_kwargs=encode_kwargs
-)
-vecdb = FAISS.load_local("faiss_index", faiss_embedder)
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# NBDT Recommendation Engine for Editors")
-    gr.Markdown("NBDT Recommendation Engine for Editors is a tool for neuroscience authors/abstracts/journalsrecommendation built for NBDT journal editors. \
-    It aims to help an editor to find similar reviewers, abstracts, and journals to a given submitted abstract.\
-    To find a recommendation, paste a `title[SEP]abstract` or `abstract` in the text box below and click \"Find Matches\".\
-    Then, you can hover to authors/abstracts/journals tab to find a suggested list.\
-    The data in our current demo is selected from 2018 to 2022. We will update the data monthly for an up-to-date publications.")
-    abst = gr.Textbox(label="Abstract",lines=10)
-    k = gr.Slider(1,100,step=1,value=50,label="Number of matches to consider")
-    action_btn = gr.Button(value="Find Matches")
-    with gr.Tab("Authors"):
-        n_output = gr.Dataframe(
-            headers=['No.','Score','Name','Title','Link','Date'],
-            datatype=['number','number','str','str','str','str'],
-            col_count=(6, "fixed"),
-            wrap=True,
-            visible=False
-        )
-    with gr.Tab("Abstracts"):
-        a_output = gr.Dataframe(
-            headers=['No.','Title','Author','Corresponding Author','Journal','Date','Link','Score'],
-            datatype=['number','str','str','str','str','str','str','number'],
-            col_count=(8,"fixed"),
-            wrap=True,
-            visible=False
-        )
-    with gr.Tab("Journals"):
-        j_output = gr.Dataframe(
-            headers=['No.','Name','Score'],
-            datatype=['number','str','number'],
-            col_count=(3, "fixed"),
-            wrap=True,
-            visible=False
-        )
-    with gr.Tab("Reviewers New"):
-      r_output = gr.Dataframe(
-          headers=['No.','Score','Name','Title','Reasoning','Link','Date'],
-          datatype=['number','number','str','str','str','str','str'],
-          col_count=(7,"fixed"),
-          wrap=True,
-          visible=False
-      )
-    action_btn.click(fn=inference,
-                     inputs=[
-                            abst,
-                            k,
-                            # modes,
-                            ],
-                     outputs=[a_output,j_output,n_output,r_output],
-                     api_name="neurojane")
-demo.launch(debug=True)