deplot_plus_llm

Runtime error

App Files Files Community

fl399 commited on Apr 4, 2023

Commit

4cfb376

1 Parent(s): fd2e81d

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -23

app.py CHANGED Viewed

@@ -112,9 +112,18 @@ if torch.__version__ >= "2":
     model = torch.compile(model)
 def evaluate(
     table,
     question,
     input=None,
     temperature=0.1,
     top_p=0.75,
@@ -124,26 +133,34 @@ def evaluate(
     **kwargs,
 ):
     prompt = _TEMPLATE + "\n" + _add_markup(table) + "\n" + "Q: " + question + "\n" + "A:"
-    inputs = tokenizer(prompt, return_tensors="pt")
-    input_ids = inputs["input_ids"].to(device)
-    generation_config = GenerationConfig(
-        temperature=temperature,
-        top_p=top_p,
-        top_k=top_k,
-        num_beams=num_beams,
-        **kwargs,
-    )
-    with torch.no_grad():
-        generation_output = model.generate(
-            input_ids=input_ids,
-            generation_config=generation_config,
-            return_dict_in_generate=True,
-            output_scores=True,
-            max_new_tokens=max_new_tokens,
         )
-    s = generation_output.sequences[0]
-    output = tokenizer.decode(s)
-    #return output.split("A:")[-1].strip()
     return output
@@ -151,23 +168,31 @@ def evaluate(
 model_deplot = Pix2StructForConditionalGeneration.from_pretrained("google/deplot", torch_dtype=torch.bfloat16).to(0)
 processor_deplot = Pix2StructProcessor.from_pretrained("google/deplot")
-def process_document(image, question):
     # image = Image.open(image)
     inputs = processor_deplot(images=image, text="Generate the underlying data table for the figure below:", return_tensors="pt").to(0, torch.bfloat16)
     predictions = model_deplot.generate(**inputs, max_new_tokens=512)
     table = processor_deplot.decode(predictions[0], skip_special_tokens=True).replace("<0x0A>", "\n")
     # send prompt+table to LLM
-    res = evaluate(table, question)
     #return res + "\n\n" + res.split("A:")[-1]
-    return [table, res.split("A:")[-1]]
 description = "Demo for DePlot+LLM for QA and summarisation. [DePlot](https://arxiv.org/abs/2212.10505) is an image-to-text model that converts plots and charts into a textual sequence. The sequence then is used to prompt LLM for chain-of-thought reasoning. The current underlying LLM is [alpaca-lora](https://huggingface.co/spaces/tloen/alpaca-lora). To use it, simply upload your image and type a question or instruction and click 'submit', or click one of the examples to load them. Read more at the links below."
 article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2212.10505' target='_blank'>DePlot: One-shot visual language reasoning by plot-to-table translation</a></p>"
 demo = gr.Interface(
     fn=process_document,
-    inputs=["image", "text"],
     outputs=[
         gr.inputs.Textbox(
             lines=8,

     model = torch.compile(model)
+## FLAN-UL2
+TOKEN = os.environ.get("API_TOKEN", None)
+API_URL = "https://api-inference.huggingface.co/models/google/flan-ul2"
+headers = {"Authorization": f"Bearer {TOKEN}"}
+def query(payload):
+	response = requests.post(API_URL, headers=headers, json=payload)
+	return response.json()
 def evaluate(
     table,
     question,
+    llm="alpaca-lora",
     input=None,
     temperature=0.1,
     top_p=0.75,
     **kwargs,
 ):
     prompt = _TEMPLATE + "\n" + _add_markup(table) + "\n" + "Q: " + question + "\n" + "A:"
+    if llm == "alpaca-lora":
+        inputs = tokenizer(prompt, return_tensors="pt")
+        input_ids = inputs["input_ids"].to(device)
+        generation_config = GenerationConfig(
+            temperature=temperature,
+            top_p=top_p,
+            top_k=top_k,
+            num_beams=num_beams,
+            **kwargs,
         )
+        with torch.no_grad():
+            generation_output = model.generate(
+                input_ids=input_ids,
+                generation_config=generation_config,
+                return_dict_in_generate=True,
+                output_scores=True,
+                max_new_tokens=max_new_tokens,
+            )
+        s = generation_output.sequences[0]
+        output = tokenizer.decode(s)
+    elif llm == "flan-ul2":
+        output = query({
+            "inputs": prompt
+        })[0]["generated_text"]
+    else:
+        RuntimeError(f"No such LLM: {llm}")
     return output
 model_deplot = Pix2StructForConditionalGeneration.from_pretrained("google/deplot", torch_dtype=torch.bfloat16).to(0)
 processor_deplot = Pix2StructProcessor.from_pretrained("google/deplot")
+def process_document(llm, image, question):
     # image = Image.open(image)
     inputs = processor_deplot(images=image, text="Generate the underlying data table for the figure below:", return_tensors="pt").to(0, torch.bfloat16)
     predictions = model_deplot.generate(**inputs, max_new_tokens=512)
     table = processor_deplot.decode(predictions[0], skip_special_tokens=True).replace("<0x0A>", "\n")
     # send prompt+table to LLM
+    res = evaluate(table, question, llm=llm)
     #return res + "\n\n" + res.split("A:")[-1]
+    if llm == "alpaca-lora":
+        return [table, res.split("A:")[-1]]
+    else:
+        return [table, res]
 description = "Demo for DePlot+LLM for QA and summarisation. [DePlot](https://arxiv.org/abs/2212.10505) is an image-to-text model that converts plots and charts into a textual sequence. The sequence then is used to prompt LLM for chain-of-thought reasoning. The current underlying LLM is [alpaca-lora](https://huggingface.co/spaces/tloen/alpaca-lora). To use it, simply upload your image and type a question or instruction and click 'submit', or click one of the examples to load them. Read more at the links below."
 article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2212.10505' target='_blank'>DePlot: One-shot visual language reasoning by plot-to-table translation</a></p>"
 demo = gr.Interface(
     fn=process_document,
+    inputs=[
+        gr.Dropdown(
+            ["alpaca-lora", "flan-ul2"], label="LLM", info="Will add more LLMs later!"
+        ),
+        "image",
+        "text"],
     outputs=[
         gr.inputs.Textbox(
             lines=8,