king007 commited on
Commit
3d5690d
1 Parent(s): c0d5863

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -88
app.py CHANGED
@@ -1,88 +1,21 @@
1
- import gradio as gr
2
- from transformers import (
3
- AutoModelForSeq2SeqLM,
4
- AutoModelForTableQuestionAnswering,
5
- AutoTokenizer,
6
- pipeline,
7
- TapexTokenizer,
8
- BartForConditionalGeneration
9
- )
10
- import pandas as pd
11
- import json
12
-
13
- # model_tapex = "microsoft/tapex-large-finetuned-wtq"
14
- # tokenizer_tapex = AutoTokenizer.from_pretrained(model_tapex)
15
- # model_tapex = AutoModelForSeq2SeqLM.from_pretrained(model_tapex)
16
- # pipe_tapex = pipeline(
17
- # "table-question-answering", model=model_tapex, tokenizer=tokenizer_tapex
18
- # )
19
-
20
- #new
21
- tokenizer = TapexTokenizer.from_pretrained("microsoft/tapex-large-finetuned-wtq")
22
- model = BartForConditionalGeneration.from_pretrained("microsoft/tapex-large-finetuned-wtq")
23
-
24
-
25
- # model_tapas = "google/tapas-large-finetuned-wtq"
26
- # tokenizer_tapas = AutoTokenizer.from_pretrained(model_tapas)
27
- # model_tapas = AutoModelForTableQuestionAnswering.from_pretrained(model_tapas)
28
- # pipe_tapas = pipeline(
29
- # "table-question-answering", model=model_tapas, tokenizer=tokenizer_tapas
30
- # )
31
-
32
- #new
33
- pipe_tapas = pipeline(task="table-question-answering", model="google/tapas-large-finetuned-wtq")
34
- pipe_tapas2 = pipeline(task="table-question-answering", model="google/tapas-large-finetuned-wikisql-supervised")
35
-
36
-
37
-
38
-
39
- def process2(query, csv_dataStr):
40
- # csv_data={"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
41
- csv_data = json.loads(csv_dataStr)
42
- table = pd.DataFrame.from_dict(csv_data)
43
- #microsoft
44
- encoding = tokenizer(table=table, query=query, return_tensors="pt")
45
- outputs = model.generate(**encoding)
46
- result_tapex=tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
47
- #google
48
- result_tapas = pipe_tapas(table=table, query=query)['cells'][0]
49
- #google2
50
- result_tapas2 = pipe_tapas2(table=table, query=query)['cells'][0]
51
- return result_tapex, result_tapas, result_tapas2
52
-
53
-
54
- # Inputs
55
- query_text = gr.Text(label="")
56
- # input_file = gr.File(label="Upload a CSV file", type="file")
57
- input_data = gr.Text(label="")
58
- # rows_slider = gr.Slider(label="Number of rows")
59
-
60
- # Output
61
- answer_text_tapex = gr.Text(label="")
62
- answer_text_tapas = gr.Text(label="")
63
- answer_text_tapas2 = gr.Text(label="")
64
-
65
- description = "This Space lets you ask questions on CSV documents with Microsoft [TAPEX-Large](https://huggingface.co/microsoft/tapex-large-finetuned-wtq) and Google [TAPAS-Large](https://huggingface.co/google/tapas-large-finetuned-wtq). \
66
- Both have been fine-tuned on the [WikiTableQuestions](https://huggingface.co/datasets/wikitablequestions) dataset. \n\n\
67
- A sample file with football statistics is available in the repository: \n\n\
68
- * Which team has the most wins? Answer: Manchester City FC\n\
69
- * Which team has the most wins: Chelsea, Liverpool or Everton? Answer: Liverpool\n\
70
- * Which teams have scored less than 40 goals? Answer: Cardiff City FC, Fulham FC, Brighton & Hove Albion FC, Huddersfield Town FC\n\
71
- * What is the average number of wins? Answer: 16 (rounded)\n\n\
72
- You can also upload your own CSV file. Please note that maximum sequence length for both models is 1024 tokens, \
73
- so you may need to limit the number of rows in your CSV file. Chunking is not implemented yet."
74
-
75
- iface = gr.Interface(
76
- theme="huggingface",
77
- description=description,
78
- layout="vertical",
79
- fn=process2,
80
- inputs=[query_text, input_data],
81
- outputs=[answer_text_tapex, answer_text_tapas, answer_text_tapas2],
82
- examples=[
83
-
84
- ],
85
- allow_flagging="never",
86
- )
87
-
88
- iface.launch()
 
1
+ import torch
2
+ from transformers import GPT2Tokenizer, GPT2LMHeadModel, pipeline
3
+ tokenizer = GPT2Tokenizer.from_pretrained('distilgpt2')
4
+ tokenizer.add_special_tokens({'pad_token': '[PAD]'})
5
+ model = GPT2LMHeadModel.from_pretrained('FredZhang7/anime-anything-promptgen-v2')
6
+
7
+ prompt = r'1girl, genshin'
8
+
9
+ # generate text using fine-tuned model
10
+ nlp = pipeline('text-generation', model=model, tokenizer=tokenizer)
11
+
12
+ # generate 10 samples using contrastive search
13
+ outs = nlp(prompt, max_length=76, num_return_sequences=10, do_sample=True, repetition_penalty=1.2, temperature=0.7, top_k=4, early_stopping=True)
14
+
15
+ print('\nInput:\n' + 100 * '-')
16
+ print('\033[96m' + prompt + '\033[0m')
17
+ print('\nOutput:\n' + 100 * '-')
18
+ for i in range(len(outs)):
19
+ # remove trailing commas and double spaces
20
+ outs[i] = str(outs[i]['generated_text']).replace(' ', '').rstrip(',')
21
+ print('\033[92m' + '\n\n'.join(outs) + '\033[0m\n')