Spaces:
Runtime error
Runtime error
import os | |
import gradio as gr | |
from tridentmodel import classification | |
import pandas as pd | |
import torch | |
import torch.nn as nn | |
import transformers | |
from transformers import AutoTokenizer, AutoConfig, LlamaForCausalLM, LlamaTokenizer, GenerationConfig | |
auth_token = os.environ.get("AUTH_TOKEN_SECRET") | |
tokenizer = LlamaTokenizer.from_pretrained("Claimed/capybara", use_auth_token=auth_token) | |
model = LlamaForCausalLM.from_pretrained( | |
"Claimed/capybara", use_auth_token=auth_token, | |
load_in_8bit=True, | |
device_map="auto") #low_cpu_mem_usage=True) | |
#model = model.to('cuda') | |
def broad_scope_class_predictor(class_embeddings, abstract_embedding, N=5, Sensitivity='Medium'): | |
""" | |
Takes in pre-computed class embeddings and abstract texts, converts abstract text into | |
:param class_embeddings: dataframe of class embeddings | |
:param abstract: a single abstract embedding | |
:param N: N highest matching classes to return, from highest to lowest, default is 5 | |
:return: predictions: a full dataframe of all the predictions on the 9500+ classes, HighestSimilarity: Dataframe of the N most similar classes | |
""" | |
predictions = pd.DataFrame(columns=['Class Name', 'Score']) | |
for i in range(len(class_embeddings)): | |
class_name = class_embeddings.iloc[i, 0] | |
embedding = class_embeddings.iloc[i, 2] | |
embedding = convert_saved_embeddings(embedding) | |
abstract_embedding = abstract_embedding.numpy() | |
abstract_embedding = torch.from_numpy(abstract_embedding) | |
cos = torch.nn.CosineSimilarity(dim=1) | |
score = cos(abstract_embedding, embedding).numpy().tolist() | |
result = [class_name, score[0]] | |
predictions.loc[len(predictions)] = result | |
if Sensitivity == 'High': | |
Threshold = 0.5 | |
elif Sensitivity == 'Medium': | |
Threshold = 0.40 | |
elif Sensitivity == 'Low': | |
Threshold = 0.35 | |
GreenLikelihood = 'False' | |
for i in range(len(greenpredictions)): | |
score = greenpredictions.iloc[i, 1] | |
if float(score) >= Threshold: | |
GreenLikelihood = 'True' | |
break | |
else: | |
continue | |
HighestSimilarity = predictions.nlargest(N, ['Score']) | |
def sentence_embedder(sentences, model_path): | |
tokenizer = AutoTokenizer.from_pretrained(model_path) #instantiating the sentence embedder using HuggingFace library | |
model = AutoModel.from_pretrained(model_path, from_tf=True) #making a model instance | |
encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt') | |
# Compute token embeddings | |
with torch.no_grad(): | |
model_output = model(**encoded_input) | |
sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask']) #outputs a (1, 384) tensor representation of input text | |
return sentence_embeddings | |
return HighestSimilarity | |
def add_text(history, text): | |
history = history + [(text, None)] | |
return history, "" | |
def add_file(history, file): | |
history = history + [((file.name,), None)] | |
return history | |
def bot(history): | |
response = "**That's cool!**" | |
history[-1][1] = response | |
return history | |
########## LOADING PRE-COMPUTED EMBEDDINGS ########## | |
class_embeddings = pd.read_csv('Embeddings/MainClassEmbeddings.csv') | |
def classifier(userin): | |
clean_in = classification.clean_data(userin, type='String') | |
in_emb = sentence_embedder(clean_in, 'Model_bert') | |
Number = 10 | |
broad_scope_predictions = broad_scope_class_predictor(class_embeddings, in_emb, Number, Sensitivity='High') | |
return broad_scope_predictions | |
def generateresponse(history):#, task): | |
""" | |
Model definition here: | |
""" | |
global model | |
global tokenizer | |
user = history[-1][0] | |
PROMPT = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request. | |
### Instruction: | |
{user} | |
### Response:""" | |
inputs = tokenizer( | |
PROMPT, | |
return_tensors="pt", | |
) | |
input_ids = inputs["input_ids"].cuda() | |
generation_config = GenerationConfig( | |
temperature=0.6, | |
top_p=0.95, | |
repetition_penalty=1.15, | |
) | |
print("Generating...") | |
generation_output = model.generate( | |
input_ids=input_ids, | |
generation_config=generation_config, | |
return_dict_in_generate=True, | |
output_scores=True, | |
max_new_tokens=256, | |
) | |
output = [] | |
for s in generation_output.sequences: | |
output.append(tokenizer.decode(s)) | |
print(tokenizer.decode(s)) | |
outputs = (output[0].split('### Response:'))[1] | |
response = f"Response: {outputs}" | |
history[-1][1] = response | |
print(history) | |
return history | |
theme = gr.themes.Base( | |
primary_hue="indigo", | |
).set( | |
prose_text_size='*text_sm' | |
) | |
with gr.Blocks(title='Claimed', theme=theme) as demo: | |
gr.Markdown(""" | |
# CLAIMED - A GENERATIVE TOOLKIT FOR PATENT ATTORNEYS | |
The patenting process can by incredibly time-consuming and expensive. We're on a mission to change that. | |
Welcome to our demo! We've trained Meta's Llama on over 200k entries, with a focus on tasks related to the intellectual property domain. | |
Please note that this is for research purposes and shouldn't be used commercially. | |
None of the outputs of this model, taken in part or in its entirety, constitutes legal advice. If you are seeking protection for you intellectual property, consult a registered patent/trademark attorney. | |
""") | |
with gr.Tab("Claim Drafter"): | |
gr.Markdown(""" | |
Use this tool to expand your idea into the technical language of a patent claim. | |
""") | |
with gr.Row(scale=1, min_width=600): | |
text1 = gr.Textbox(label="Input", | |
placeholder='Type in your idea here!') | |
text2 = gr.Textbox(label="Output") | |
with gr.Tab("Description Generator"): | |
gr.Markdown(""" | |
Use this tool to expand your patent claim into a description. You can also use this tool to generate abstracts and give you ideas about the benefit of an invention by changing the settings in the dropdown menu. | |
""") | |
gr.Dropdown(["Generate Description", "Generate Abstract", "Benefits of the invention"], label='Choose Generation Type Here') | |
with gr.Row(scale=1, min_width=600): | |
text1 = gr.Textbox(label="Input", | |
placeholder='Type in your idea here!') | |
text2 = gr.Textbox(label="Output") | |
with gr.Tab("Knowledge Graph"): | |
gr.Markdown(""" | |
Use this tool to generate a knowledge graph of your invention. This will help highlight the links between features. | |
""") | |
with gr.Row(scale=1, min_width=600): | |
text1 = gr.Textbox(label="Input", | |
placeholder='Type in your idea here!') | |
text2 = gr.Textbox(label="Output") | |
with gr.Tab("Prosecution Ideator"): | |
gr.Markdown(""" | |
Use this tool to generate ideas for how to overcome objections to novelty and inventive step. Outputs are in the problem-solution format. | |
""") | |
with gr.Row(scale=1, min_width=600): | |
text1 = gr.Textbox(label="Input", | |
placeholder='Type in your idea here!') | |
text2 = gr.Textbox(label="Output") | |
# with gr.Tab("Claimed Infill"): | |
# gr.Markdown(""" | |
# Below is our | |
# Example input: A device to help the visually impaired using proprioception. | |
# Output: | |
# """) | |
# with gr.Row(scale=1, min_width=600): | |
# text1 = gr.Textbox(label="Input", | |
# placeholder='Type in your idea here!') | |
# text2 = gr.Textbox(label="Output") | |
with gr.Tab("CPC Search Tool"): | |
gr.Markdown(""" | |
Use this tool to classify your invention according to the Cooperative Patent Classification system. | |
Click on the link to initiate either an Espacenet or Google Patents classification search using the generated classifications. You can specify which you would like using the dropdown menu. | |
""") | |
gr.Dropdown(["Google Patent Search", "Espacenet Patent Search"], label='Choose Search Type Here') | |
with gr.Row(scale=1, min_width=600): | |
userin = gr.Textbox(label="Input", | |
placeholder='Type in your Claim/Description/Abstract Here') | |
output = gr.Textbox(label="Output") | |
with gr.Row(): | |
classify_btn = gr.Button("Classify") | |
classify_btn.click(fn=classifier, inputs=[userin] , outputs=output) | |
gr.Markdown(""" | |
# THE CHATBOT | |
Do you want a bit more freedom over the outputs you generate? No worries, you can use a chatbot version of our model below. You can ask it anything. | |
If you're concerned about a particular output, hit the flag button and we will use that information to improve the model. | |
""") | |
chatbot = gr.Chatbot([], elem_id="Claimed Assistant").style(height=500) | |
with gr.Row(): | |
with gr.Column(scale=0.85): | |
txt = gr.Textbox( | |
show_label=False, | |
placeholder="Enter text and submit", | |
).style(container=False) | |
with gr.Column(scale=0.15, min_width=0): | |
btn = gr.Button("Submit") | |
txt.submit(add_text, [chatbot, txt], [chatbot, txt]).then( | |
generateresponse, chatbot, chatbot) | |
gr.Markdown(""" | |
# HAVE AN IDEA? GET IT CLAIMED | |
In the future, we are looking to expand our model's capabilities further to assist in a range of IP related tasks. | |
If you are interested in using a more powerful model that we have trained, or if you have any suggestions of features you would like to see us add, please get in touch! | |
As far as data is concerned, you have nothing to worry about! We don't store any of your inputs to use for further training, we're not OpenAI. | |
""") | |
demo.launch() |