thomasgauthier commited on
Commit
10c1f9c
1 Parent(s): ac86214

will this work?

Browse files
Files changed (28) hide show
  1. dataset_adapters/3e6e51bb55f0c510628fc0e07baaeec1a162a7b0ef863165b0891efb92ed0101.py +2 -0
  2. dataset_adapters/4d52bd9e40bac418bcc390a42ffaf0c0c1e85370628381af2608ddcbfb3a679b.py +29 -0
  3. dataset_adapters/7a329ccea52693be98470e74ada5768849ba2523454c19d1f2d84b60221c156e.py +32 -0
  4. dataset_adapters/87522c634adeee86c404df5141f0a2b983dff4bdde32f7d475db4cefa1dc2520.py +83 -0
  5. dataset_adapters/952b489de97f366fb44523b27fb3f0069050635fc6ada37e63997201324b3c41.py +22 -0
  6. dataset_adapters/CollectiveCognitionchats-data-2023-10-16.py +11 -0
  7. dataset_adapters/LDJnrLessWrong-Amplify-Instruct.py +9 -0
  8. dataset_adapters/LDJnrPuffin.py +13 -0
  9. dataset_adapters/LDJnrPure-Dove.py +18 -0
  10. dataset_adapters/WizardLMWizardLM_evol_instruct_70k.py +13 -0
  11. dataset_adapters/WizardLMWizardLM_evol_instruct_70k_2.py +14 -0
  12. dataset_adapters/WizardLMWizardLM_evol_instruct_70k_fubnh.py +23 -0
  13. dataset_adapters/WizardLMWizardLM_evol_instruct_V2_196k.py +16 -0
  14. dataset_adapters/e46a55643ce19efc8adfe855f6ff7a2a3e93a60ea42b1897f4c705919e6f821a.py +16 -0
  15. dataset_adapters/ed2b4cf199998dfb4690d6ae767d25dca1256ccd97729b257db3a37206a72969.py +38 -0
  16. dataset_adapters/ed2b4cf199998dfb4690d6ae767d25dca1256ccd97729b257db3a37206a72969_bp.py +22 -0
  17. main.py +373 -8
  18. requirements.txt +8 -7
  19. static/dist/assets/index-34528448.js +0 -0
  20. static/dist/assets/index-6ff09fc9.css +1 -0
  21. static/dist/fonts/icomoon.eot +0 -0
  22. static/dist/fonts/icomoon.svg +11 -0
  23. static/dist/fonts/icomoon.ttf +0 -0
  24. static/dist/fonts/icomoon.woff +0 -0
  25. static/dist/index.html +15 -0
  26. static/index.html +0 -36
  27. static/script.js +0 -21
  28. static/style.css +0 -45
dataset_adapters/3e6e51bb55f0c510628fc0e07baaeec1a162a7b0ef863165b0891efb92ed0101.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ def transform_data(data):
2
+ return data
dataset_adapters/4d52bd9e40bac418bcc390a42ffaf0c0c1e85370628381af2608ddcbfb3a679b.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def transform_data(data):
2
+ conversations = []
3
+
4
+ # Iterate over messages, always processing 'input' and 'instruction' before 'output'
5
+ for message in data.get('messages', []):
6
+ # Check if it's a 'system' message and place it first if it exists
7
+ if message['role'] == 'system':
8
+ conversations.insert(0, {'from': 'system', 'value': message['content']})
9
+ elif message['role'] == 'assistant':
10
+ # 'assistant' is taken to be 'gpt'
11
+ conversations.append({'from': 'gpt', 'value': message['content']})
12
+ else:
13
+ # 'user' is taken to be 'human'
14
+ # Add 'instruction' directly if there is no 'input' for concatenation
15
+ if message.get('role') == 'input' and message.get('content'):
16
+ # If there are instructions before the input, we concatenate them.
17
+ if conversations and conversations[-1]['from'] == 'human':
18
+ conversations[-1]['value'] += '\n\n' + message['content']
19
+ else:
20
+ conversations.append({'from': 'human', 'value': message['content']})
21
+ else:
22
+ conversations.append({'from': 'human', 'value': message['content']})
23
+
24
+ # Check for the order of conversation entries
25
+ if conversations and conversations[0]['from'] == 'gpt':
26
+ # If the first message is from 'gpt', prepend a 'human' message
27
+ conversations.insert(0, {'from': 'human', 'value': ''})
28
+
29
+ return conversations
dataset_adapters/7a329ccea52693be98470e74ada5768849ba2523454c19d1f2d84b60221c156e.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def transform_data(data):
2
+ # Define roles and map them to the 'from' fields
3
+ role_mapping = {
4
+ 'role_1': 'human',
5
+ 'message_1': 'human',
6
+ 'message_2': 'gpt',
7
+ }
8
+
9
+ # Use heuristics to properly order the messages
10
+ conv_order = ['role_1', 'message_1', 'message_2']
11
+ # Add 'instruction' if available, ensuring it comes before 'output'
12
+ if 'instruction' in data:
13
+ conv_order.insert(conv_order.index('message_1'), 'instruction')
14
+
15
+ # Iterate over the data in the specified order and construct the conversation list
16
+ conversation = []
17
+ for key in conv_order:
18
+ if key in data and data[key]:
19
+ from_role = 'system' if 'system' in key else role_mapping.get(key, 'human')
20
+ msg_value = data[key] if 'message' in key else data[key].split('.')[-1].replace('_', ' ').capitalize()
21
+
22
+ # If there is 'instruction' and 'input', concat 'input' at the end of the message
23
+ if key == 'instruction' and 'input' in data and data['input']:
24
+ msg_value += ' ' + data['input']
25
+
26
+ conv_item = {
27
+ 'from': from_role,
28
+ 'value': msg_value.strip()
29
+ }
30
+ conversation.append(conv_item)
31
+
32
+ return {'conversations': conversation}
dataset_adapters/87522c634adeee86c404df5141f0a2b983dff4bdde32f7d475db4cefa1dc2520.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # def transform_data(data):
2
+ # conversations = []
3
+
4
+ # # start with instruction or input
5
+ # if "instruction" in data:
6
+ # conversation = {}
7
+ # conversation["from"] = "system"
8
+ # conversation["value"] = data["instruction"]
9
+ # conversations.append(conversation)
10
+
11
+ # if "input" in data and data["input"].strip() != "":
12
+ # if conversations:
13
+ # # Concat the input at the end of the first message
14
+ # conversations[0]["value"] += "\n" + data["input"]
15
+ # else:
16
+ # conversation = {}
17
+ # conversation["from"] = "human"
18
+ # conversation["value"] = data["input"]
19
+ # conversations.append(conversation)
20
+
21
+ # # finalize with "output"
22
+ # if "output" in data:
23
+ # conversation = {}
24
+ # conversation["from"] = "gpt"
25
+ # conversation["value"] = data["output"]
26
+ # conversations.append(conversation)
27
+
28
+ # return {"conversations": conversations}
29
+
30
+
31
+ # def transform_data(data):
32
+ # # Initialize the final result list
33
+ # result = []
34
+
35
+ # # Process "instruction"
36
+ # if "instruction" in data and data["instruction"]:
37
+ # result.append({
38
+ # 'from': 'system',
39
+ # 'value': data["instruction"]
40
+ # })
41
+
42
+ # # Process "input"
43
+ # if "input" in data and data["input"]:
44
+ # # If "instruction" has already been added
45
+ # if result:
46
+ # # Add "input" to the end of the first message
47
+ # result[0]['value'] += '\n' + data["input"]
48
+ # else:
49
+ # # If there's no "instruction", add "input" as a separate message
50
+ # result.append({
51
+ # 'from': 'human',
52
+ # 'value': data["input"]
53
+ # })
54
+
55
+ # # Process "output"
56
+ # if "output" in data and data["output"]:
57
+ # result.append({
58
+ # 'from': 'gpt',
59
+ # 'value': data["output"]
60
+ # })
61
+
62
+ # return { 'conversations': result }
63
+
64
+
65
+ def transform_data(data):
66
+ result = {'conversations': []}
67
+
68
+ if 'instruction' in data and data['instruction']:
69
+ msg = {'from': 'system', 'value': data['instruction']}
70
+ result['conversations'].append(msg)
71
+
72
+ if 'input' in data and data['input']:
73
+ if 'instruction' in data and data['instruction']:
74
+ result['conversations'][-1]['value'] += ' ' + data['input']
75
+ else:
76
+ msg = {'from': 'human', 'value': data['input']}
77
+ result['conversations'].append(msg)
78
+
79
+ if 'output' in data and data['output']:
80
+ msg = {'from': 'gpt', 'value': data['output']}
81
+ result['conversations'].append(msg)
82
+
83
+ return result
dataset_adapters/952b489de97f366fb44523b27fb3f0069050635fc6ada37e63997201324b3c41.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def transform_data(data):
2
+ conversations = []
3
+
4
+ # Check for system message and prepend if present
5
+ if data.get('system'):
6
+ conversations.append({'from': 'system', 'value': data['system']})
7
+
8
+ # Determine the correct order of human and gpt messages
9
+ human_msg = ''
10
+ if 'instruction' in data:
11
+ human_msg += data['instruction']
12
+ if 'input' in data and data['input']: # Check if input exists and is not empty
13
+ human_msg += (' ' if human_msg else '') + data['input']
14
+
15
+ if human_msg: # Add the human message if it's not empty
16
+ conversations.append({'from': 'human', 'value': human_msg})
17
+
18
+ if 'response' in data:
19
+ conversations.append({'from': 'gpt', 'value': data['response']})
20
+
21
+ # Return the transformed data without the schema
22
+ return {'conversations': conversations}
dataset_adapters/CollectiveCognitionchats-data-2023-10-16.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def transform_data(data):
2
+ new_data = {'id': id(data['title']), 'conversations': []}
3
+
4
+ # Ensure the conversation starts with a human message
5
+ if data['conversations'][0]['from'] == 'assistant':
6
+ new_data['conversations'].append({'from': 'system', 'value': 'START'})
7
+
8
+ # Copy the remaining conversations
9
+ new_data['conversations'].extend(data['conversations'])
10
+
11
+ return new_data
dataset_adapters/LDJnrLessWrong-Amplify-Instruct.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ def transform_data(data):
2
+ transformed_data = {"id": hash(data["source"]), "conversations": []}
3
+ for item in data["conversation"]:
4
+ transformed_data["conversations"].extend([
5
+ {"from": "human", "value": item["input"]},
6
+ {"from": "gpt", "value": item["output"]}
7
+ ])
8
+
9
+ return transformed_data
dataset_adapters/LDJnrPuffin.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def transform_data(data):
2
+ transformed_data = {
3
+ 'id': data['id'],
4
+ 'conversations': []
5
+ }
6
+
7
+ for conversation in data['conversations']:
8
+ if conversation['from'] == 'human':
9
+ transformed_data['conversations'].append({'input': conversation['value']})
10
+ elif conversation['from'] == 'gpt':
11
+ transformed_data['conversations'][-1]['output'] = conversation['value']
12
+
13
+ return transformed_data
dataset_adapters/LDJnrPure-Dove.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def transform_data(data):
2
+ conversations = []
3
+ for item in data.get('conversation', []):
4
+ conversations.append({
5
+ "from" : "human",
6
+ "value" : item.get("input", "")
7
+ })
8
+ conversations.append({
9
+ "from" : "gpt",
10
+ "value" : item.get("output", "")
11
+ })
12
+
13
+ transformed_data = {
14
+ "id": hash(data['source']),
15
+ "conversations": conversations
16
+ }
17
+
18
+ return transformed_data
dataset_adapters/WizardLMWizardLM_evol_instruct_70k.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def transform_data(data):
2
+ id_value = 1 # You may assign the 'id' value, here I have used 1 for simplicity.
3
+ result = {'id': id_value, 'conversations': []}
4
+
5
+ for key in ('instruction', 'output'):
6
+ if key in data:
7
+ origin = 'human' if key == 'instruction' else 'gpt'
8
+ result['conversations'].append({
9
+ 'from': origin,
10
+ 'value': data[key]
11
+ })
12
+
13
+ return result
dataset_adapters/WizardLMWizardLM_evol_instruct_70k_2.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def transform_data(data):
2
+ transformed_data = {'id': 1, 'conversations': []}
3
+ id_counter = 1
4
+
5
+ for key, value in data.items():
6
+ if key == 'instruction':
7
+ transformed_data['conversations'].append({'from': 'human', 'value': value})
8
+ elif key == 'output':
9
+ transformed_data['conversations'].append({'from': 'gpt', 'value': value})
10
+
11
+ transformed_data['id'] = id_counter
12
+ id_counter += 1
13
+
14
+ return transformed_data
dataset_adapters/WizardLMWizardLM_evol_instruct_70k_fubnh.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def transform_data(data):
2
+ transformed_data = {}
3
+
4
+ # get the id
5
+ transformation_id = data.get("id", 0) # substitute 0 (or any default) if no id is found
6
+ transformed_data["id"] = transformation_id
7
+
8
+ # split the conversations into separate messages
9
+ instructions = data.get("instruction", None)
10
+ outputs = data.get("output", None)
11
+
12
+ # build conversation array
13
+ conversations = []
14
+
15
+ if instructions:
16
+ conversations.append({"from": "human", "value":instructions})
17
+
18
+ if outputs:
19
+ conversations.append({"from": "gpt", "value": outputs})
20
+
21
+ transformed_data["conversations"] = conversations
22
+
23
+ return transformed_data
dataset_adapters/WizardLMWizardLM_evol_instruct_V2_196k.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+
3
+ def transform_data(data):
4
+ transformed_data = {}
5
+ transformed_data['id'] = random.randint(1, 1000000) # generates a random integer as ID
6
+ transformed_data['conversations'] = []
7
+
8
+ conversations = data.get('conversations', [])
9
+ for conversation in conversations:
10
+ from_val = conversation.get('from', '')
11
+ value = conversation.get('value', '')
12
+ if from_val.lower() in ['human', 'gpt', 'system']:
13
+ transformed_conversation = {'from': from_val, 'value': value}
14
+ transformed_data['conversations'].append(transformed_conversation)
15
+
16
+ return transformed_data
dataset_adapters/e46a55643ce19efc8adfe855f6ff7a2a3e93a60ea42b1897f4c705919e6f821a.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def transform_data(data):
2
+ transformed_data = []
3
+ for i in range(len(data["data"])):
4
+ # Setting the correct "from" field based on the index
5
+ if i % 2 == 0:
6
+ # Case of input or instruction
7
+ if i < len(data["data"]) - 1:
8
+ # There is a response after this message, it is an input
9
+ msg_type = 'human'
10
+ else:
11
+ msg_type = 'system' # There is not a response after this message, it is an instruction
12
+ transformed_data.append({'from': msg_type, 'value': data["data"][i]})
13
+ else:
14
+ # The case where the "from" field would be 'gpt'
15
+ transformed_data.append({'from': 'gpt', 'value': data["data"][i]})
16
+ return {'conversations': transformed_data}
dataset_adapters/ed2b4cf199998dfb4690d6ae767d25dca1256ccd97729b257db3a37206a72969.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def transform_data(data):
2
+ # Setup alias roles for conversion
3
+ role_mapping = {'user': 'human', 'assistant': 'gpt', 'system': 'system'}
4
+ conversations = []
5
+
6
+ # Check for system messages and prepend if present
7
+ system_messages = [msg for msg in data['messages'] if msg['role'] == 'system']
8
+ if system_messages:
9
+ for msg in system_messages:
10
+ conversations.append({'from': role_mapping[msg['role']], 'value': msg['content']})
11
+
12
+ # Prepare human and gpt messages
13
+ prompt = data.get('prompt', '')
14
+ human_messages = [msg for msg in data['messages'] if msg['role'] == 'user']
15
+ gpt_messages = [msg for msg in data['messages'] if msg['role'] == 'assistant']
16
+
17
+ # If there are both "instruction" and "input" and "input" is not empty, append it to first message
18
+ if human_messages and prompt.strip():
19
+ human_messages[0]['content'] = prompt + '\n\n' + human_messages[0]['content']
20
+
21
+ # Pair each human message with corresponding gpt message, ensuring human speaks first
22
+ paired_messages = zip(human_messages, gpt_messages)
23
+
24
+ # Append paired messages to the conversation list
25
+ for user_msg, gpt_msg in paired_messages:
26
+ conversations.append({'from': role_mapping[user_msg['role']], 'value': user_msg['content']})
27
+ conversations.append({'from': role_mapping[gpt_msg['role']], 'value': gpt_msg['content']})
28
+
29
+ # Handle possible remaining unpaired human message
30
+ for user_msg in human_messages[len(gpt_messages):]:
31
+ conversations.append({'from': role_mapping[user_msg['role']], 'value': user_msg['content']})
32
+
33
+ # Handle any unprocessed system message if present
34
+ for msg in system_messages[len(conversations):]:
35
+ conversations.append({'from': role_mapping[msg['role']], 'value': msg['content']})
36
+
37
+ # Resulting data is a dictionary with a single key "conversations"
38
+ return {'conversations': conversations}
dataset_adapters/ed2b4cf199998dfb4690d6ae767d25dca1256ccd97729b257db3a37206a72969_bp.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def transform_data(data):
2
+ # Create the base structure for the transformed data
3
+ transformed = {'conversations': []}
4
+
5
+ # Check for system message type, if any, before human input and output
6
+ system_msg = next((msg for msg in data.get('messages', []) if msg.get('role') == 'system'), None)
7
+ input_msg = next((msg for msg in data.get('messages', []) if msg.get('role') == 'user'), None)
8
+ output_msg = next((msg for msg in data.get('messages', []) if msg.get('role') == 'assistant'), None)
9
+
10
+ # Include system message if present
11
+ if system_msg:
12
+ transformed['conversations'].append({'from': 'system', 'value': system_msg['content']})
13
+
14
+ # Handle input and instruction
15
+ if input_msg:
16
+ transformed['conversations'].append({'from': 'human', 'value': input_msg['content']})
17
+
18
+ # Include GPT message if present and after human input
19
+ if output_msg:
20
+ transformed['conversations'].append({'from': 'gpt', 'value': output_msg['content']})
21
+
22
+ return transformed
main.py CHANGED
@@ -1,20 +1,385 @@
1
  from fastapi import FastAPI
2
  from fastapi.staticfiles import StaticFiles
3
  from fastapi.responses import FileResponse
4
-
5
- from transformers import pipeline
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  app = FastAPI()
8
 
9
- pipe_flan = pipeline("text2text-generation", model="google/flan-t5-small")
10
 
11
- @app.get("/infer_t5")
12
- def t5(input):
13
- output = pipe_flan(input)
14
- return {"output": output[0]["generated_text"]}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- app.mount("/", StaticFiles(directory="static", html=True), name="static")
17
 
 
 
 
18
  @app.get("/")
19
  def index() -> FileResponse:
20
  return FileResponse(path="/app/static/index.html", media_type="text/html")
 
1
  from fastapi import FastAPI
2
  from fastapi.staticfiles import StaticFiles
3
  from fastapi.responses import FileResponse
4
+ from fastapi import FastAPI, BackgroundTasks, HTTPException, Query
5
+ from fastapi.responses import StreamingResponse
6
+ from starlette.concurrency import run_in_threadpool
7
+ from datasets import load_dataset
8
+ import random
9
+ import json
10
+ from genson import SchemaBuilder
11
+ from pathvalidate import sanitize_filename
12
+ from openai import OpenAI
13
+ import hashlib
14
+ from pprint import pprint
15
+ import asyncio
16
+ import importlib.util
17
+ import sys
18
+ import json
19
+ import jsonschema
20
+ # import aiosqlite
21
+ from utils import extract_code
22
+ import numpy as np
23
 
24
  app = FastAPI()
25
 
 
26
 
27
+ # DATABASE_FILE = "samples.db"
28
+
29
+
30
+ client = OpenAI(
31
+ base_url="https://openrouter.ai/api/v1",
32
+ api_key=os.environ.get('OPENROUTER_KEY')
33
+ )
34
+
35
+
36
+ # async def setup_database():
37
+ # async with aiosqlite.connect(DATABASE_FILE) as db:
38
+ # await db.execute("""
39
+ # CREATE TABLE IF NOT EXISTS samples (
40
+ # hash TEXT PRIMARY KEY,
41
+ # data TEXT NOT NULL,
42
+ # dataset TEXT NOT NULL
43
+ # )
44
+ # """)
45
+ # await db.commit()
46
+
47
+ # async def insert_sample(hash: str, data: str, dataset: str):
48
+ # async with aiosqlite.connect(DATABASE_FILE) as db:
49
+ # # Check if a record with the same hash already exists
50
+ # cursor = await db.execute("SELECT COUNT(*) FROM samples WHERE hash = ?", (hash,))
51
+ # count = await cursor.fetchone()
52
+
53
+ # if count[0] == 0:
54
+ # # Insert the new record since it doesn't exist
55
+ # await db.execute("INSERT INTO samples (hash, data, dataset) VALUES (?, ?, ?)", (hash, data, dataset))
56
+ # await db.commit()
57
+ # else:
58
+ # # A record with the same hash already exists
59
+ # print("Record with the same hash already exists in the database.")
60
+
61
+ # async def get_sample_by_hash(hash: str):
62
+ # async with aiosqlite.connect(DATABASE_FILE) as db:
63
+ # cursor = await db.execute("SELECT data, dataset FROM samples WHERE hash = ?", (hash,))
64
+ # row = await cursor.fetchone()
65
+ # return row
66
+
67
+ def is_sharegpt(sample):
68
+ schema={'$schema': 'http://json-schema.org/schema#', 'type': 'object', 'properties': {'conversations': {'type': 'array', 'items': {'type': 'object', 'properties': {'from': { 'type': 'string', 'enum': ['human', 'gpt', 'system'] }, 'value': {'type': 'string'}}, 'required': ['from', 'value']}}}, 'required': ['conversations']}
69
+ try:
70
+ jsonschema.validate(instance=sample, schema=schema)
71
+ return True
72
+ except jsonschema.exceptions.ValidationError as e:
73
+ return False
74
+
75
+ def sha256(string):
76
+ # Create a hashlib object for SHA-256
77
+ sha256_hash = hashlib.sha256()
78
+ # Update the hash object with your string encoded as bytes
79
+ sha256_hash.update(string.encode('utf-8'))
80
+
81
+ return sha256_hash.hexdigest()
82
+
83
+ def get_adapter_name(sample):
84
+ builder = SchemaBuilder()
85
+ builder.add_object(sample)
86
+ schema = builder.to_schema()
87
+
88
+ return sha256(json.dumps(schema))
89
+
90
+ def has_adapter(sample):
91
+ adapter_name = get_adapter_name(sample)
92
+
93
+ module_name = f"dataset_adapters.{adapter_name}"
94
+ module_spec = importlib.util.find_spec(module_name)
95
+
96
+ if module_spec is None:
97
+ return False
98
+
99
+ return True
100
+
101
+ def auto_tranform(sample):
102
+ adapter_name = get_adapter_name(sample)
103
+ if not has_adapter(sample):
104
+ create_adapter(sample, adapter_name)
105
+
106
+ module_name = f"dataset_adapters.{adapter_name}"
107
+ spec = importlib.util.spec_from_file_location(module_name, f"dataset_adapters/{adapter_name}.py")
108
+ dynamic_module = importlib.util.module_from_spec(spec)
109
+ sys.modules[module_name] = dynamic_module
110
+ spec.loader.exec_module(dynamic_module)
111
+
112
+ # Use the function from the dynamically imported module
113
+ transformed_data = dynamic_module.transform_data(sample)
114
+
115
+ if isinstance(transformed_data, list):
116
+ return {'conversations' : transformed_data}
117
+
118
+
119
+ return transformed_data
120
+
121
+
122
+
123
+
124
+ # def create_adapter(sample, adapter_name):
125
+ # builder = SchemaBuilder()
126
+ # builder.add_object(sample)
127
+ # schema = builder.to_schema()
128
+
129
+ # code_string = """def transform_data(data):
130
+ # raise Exception('')"""
131
+
132
+ with open(f"dataset_adapters/{adapter_name}.py", 'w') as file:
133
+ file.write(code_string)
134
+
135
+
136
+ def create_adapter(sample, adapter_name):
137
+ builder = SchemaBuilder()
138
+ builder.add_object(sample)
139
+ schema = builder.to_schema()
140
+
141
+ prompt = f"""Make me minimal and efficient python code to convert data in the shape of
142
+
143
+ initial data shape
144
+ ==========➡️📑📐==========
145
+ ```jsonschema
146
+ {schema}
147
+ ```
148
+ ==========➡️📑📐==========
149
+
150
+ to equivalent data in the form
151
+
152
+ final data shape
153
+ ==========⬇️📑📐==========
154
+ ```jsonschema
155
+ {{'$schema': 'http://json-schema.org/schema#', 'type': 'object', 'properties': {{'conversations': {{'type': 'array', 'items': {{'type': 'object', 'properties': {{'from': {{ 'type': 'string', 'enum': ['human', 'gpt', 'system'] }}, 'value': {{'type': 'string'}}}}, 'required': ['from', 'value']}}}}}}, 'required': ['conversations']}}
156
+ ```
157
+ ==========⬇️📑📐==========
158
+
159
+ the data to transform is
160
+ ```json
161
+ {sample}
162
+ ```
163
+
164
+
165
+ Inside the data to transform, `input` and `instruction` is usually associated with `"from" : "human"` while `output` is usually associated with `"from" : "gpt"`
166
+
167
+ For transforming the data you shall use python. Make robust and elegant python code that will do the transformation
168
+
169
+
170
+ your code will contain a function `def transform_data(data):` that does the transformation and outputs the newly shaped data. Only the data, no schema. Your code snippet will include only the function signature and body. I know how to call it. You won't need to import anything, I will take care of parsing and dumping json. You work with dicts. Remember to be careful if you iterate over the data because I want the output conversation to always start with the prompt. In other words, always process "input" before "output" and "instruction" before "output". Such heuristics are very important. If there is "instruction" and "input" and the "input" is not empty, concat the input at the end of the first message. If the data contains no "system" message, human always speaks first. If it contains a "system" message, the "system" message is first, then human, then gpt, then alternating if needed
171
+
172
+ "human" ALWAYS SPEAKS BEFORE "gpt", if you suspect your code makes "gpt speak first, fix it
173
+
174
+ MOST IMPORTANT IS THAT YOU look at the initial data shape (➡️📑📐) to ground your transformation into final data shape (⬇️📑📐)
175
+
176
+ Your output should contain only the code for `def transform_data(data):`, signature and body. Put the code inside markdown code block"""
177
+
178
+ response = client.chat.completions.create(
179
+ model="openai/gpt-4-1106-preview", # Optional (user controls the default)
180
+ messages=[
181
+ { "role": "system", "content": """You are ChatGPT, a large language model trained by OpenAI, based on the GPT-4 architecture.
182
+ Knowledge cutoff: 2023-04
183
+ Current date: 2023-11-05
184
+
185
+ Image input capabilities: Enabled""" },
186
+ # {"role": "user", "content": f"""Make me minimal and efficient python code to convert data in the shape of
187
+
188
+ # ```jsonschema
189
+ # {json.dumps(schema)}
190
+ # ```
191
+
192
+ # to equivalent data in the form ```
193
+ # {{'$schema': 'http://json-schema.org/schema#', 'type': 'object', 'properties': {{'conversations': {{'type': 'array', 'items': {{'type': 'object', 'properties': {{'from': {{ 'type': 'string', 'enum': ['human', 'gpt', 'system'] }}, 'value': {{'type': 'string'}}}}, 'required': ['from', 'value']}}}}}}, 'required': ['conversations']}}
194
+ # ```
195
+
196
+ # the input is
197
+ # ```
198
+ # {json.dumps(sample)}
199
+ # ```
200
+
201
+
202
+ # `input` is usually associated with `"from" : "human"` while `output` is usually associated with `"from" : "gpt"`
203
+
204
+ # don't transform, make robust and elegant python code that will do the transformation
205
+
206
+
207
+ # your code will contain a function `def transform_data(data):` that does the transformation and outputs the newly shaped data. Only the data, no schema. Your code snippet will include only the function signature and body. I know how to call it. You won't need to import anything, I will take care of parsing and dumping json. You work with dicts. Remember to be careful if you iterate over the data because I want the output conversation to always start with the prompt. In other words, always process "input" before "output" and "instruction" before "output". Such heuristics are very important. If there is "instruction" and "input" and the "input" is not empty, concat the input at the end of the first message."""
208
+ # }
209
+ {"role": "user", "content": prompt}
210
+ ]
211
+ )
212
+
213
+ val = response.choices[0].message.content
214
+ # index = val.index('def transform_data(data)')
215
+
216
+ # def get_code_start():
217
+ # for i in range(index,0,-1):
218
+ # if val[i:i+3] == "```":
219
+ # idx = val[i:].index('\n')
220
+ # return i + (idx) + 1
221
+
222
+ # def get_code_end():
223
+ # for i in range(index, len(val)):
224
+ # if val[i:i+3] == "```":
225
+ # return i-1
226
+
227
+ # code_string = val[get_code_start():get_code_end()]
228
+
229
+
230
+ # print("###", val)
231
+ code_string = extract_code(val)
232
+
233
+ if code_string is None:
234
+ raise Exception("hey la")
235
+
236
+ with open(f"dataset_adapters/{adapter_name}.py", 'w') as file:
237
+ file.write(code_string)
238
+
239
+
240
+ @app.get("/sample")
241
+ async def get_sample(hash: str = Query(..., alias="hash")):
242
+ res = await get_sample_by_hash(hash)
243
+ if res is None:
244
+ raise HTTPException(status_code=404, detail="Item not found")
245
+ data, dataset = res
246
+ sample= auto_tranform(json.loads(data))
247
+ return {'sample': sample, 'dataset': dataset}
248
+
249
+ @app.get("/random-sample-stream")
250
+ async def get_random_sample(background_tasks: BackgroundTasks, dataset_name: str = Query(..., alias="dataset-name"), index: str = Query(None, alias="index")):
251
+ queue = asyncio.Queue()
252
+ def event_stream(queue):
253
+ yield f"data: {json.dumps({'status': 'grab_sample'})}\n\n"
254
+ try:
255
+
256
+
257
+
258
+
259
+ # dataset = load_dataset(dataset_name,streaming=True)
260
+ # split = [key for key in dataset.keys() if "train" in key]
261
+
262
+
263
+
264
+
265
+ import requests
266
+ headers = {"Authorization": f"Bearer {os.environ.get('HF_TOKEN')}"}
267
+ API_URL = f"https://datasets-server.huggingface.co/info?dataset={dataset_name}"
268
+ def query():
269
+ response = requests.get(API_URL, headers=headers)
270
+ return response.json()
271
+ data = query()
272
+
273
+ splits = data['dataset_info']['default']['splits']
274
+ split = next(iter(splits.values()))
275
+
276
+ num_samples = split['num_examples']
277
+ split_name = split['name']
278
+
279
+ # dataset = load_dataset(dataset_name, split=split_name, streaming=True)
280
+ idx = random.randint(0, num_samples) if index is None else int(index)
281
+
282
+
283
+ API_URL = f"https://datasets-server.huggingface.co/rows?dataset={dataset_name}&config=default&split=train&offset={idx}&length=1"
284
+
285
+ def query():
286
+ headers = {"Authorization": f"Bearer {os.environ.get('HF_TOKEN')}"}
287
+ response = requests.get(API_URL, headers=headers)
288
+
289
+ if response.status_code != 200:
290
+ raise Exception("hugging face api error")
291
+ return response.json()
292
+ data = query()
293
+
294
+ random_sample = data['rows'][0]['row']
295
+
296
+ # pprint(random_sample)
297
+
298
+
299
+ # selected = dataset.skip(idx)
300
+ # random_sample = next(iter(selected))#random.choice(samples_buffer)
301
+
302
+ hashed = sha256(json.dumps(random_sample))
303
+ # insert_sample(hashed, json.dumps(random_sample), dataset_name)
304
+ # background_tasks.add_task(insert_sample, hashed, json.dumps(random_sample), dataset_name)
305
+
306
+ except Exception as e:
307
+ message = ""
308
+ if hasattr(e, 'message'):
309
+ message = e.message
310
+ else:
311
+ message = str(e)
312
+
313
+ print("error : ", message)
314
+ yield f"data: {json.dumps({'status': 'error', 'message' : message })}\n\n"
315
+
316
+ transformed_data = random_sample
317
+
318
+ success = True
319
+
320
+ if not is_sharegpt(random_sample):
321
+ try:
322
+ if not has_adapter(random_sample):
323
+ yield f"data: {json.dumps({'status': 'creating_adapter'})}\n\n"
324
+
325
+ transformed_data = auto_tranform(random_sample)
326
+ except Exception as e:
327
+ success = False
328
+ if hasattr(e, 'message'):
329
+ print("error : ", e.message)
330
+ else:
331
+ print("error : ", e)
332
+ yield f"data: {json.dumps({'status': 'error'})}\n\n"
333
+
334
+ if success:
335
+ yield f"data: {json.dumps({'status': 'done', 'data' : transformed_data, 'index' : str(idx)})}\n\n"
336
+
337
+ return StreamingResponse(event_stream(queue), media_type="text/event-stream")
338
+
339
+
340
+
341
+ @app.get("/random-sample")
342
+ async def get_random_sample(dataset_name: str = Query(..., alias="dataset-name")):
343
+ try:
344
+ dataset = load_dataset(dataset_name,streaming=True)
345
+ split = [key for key in dataset.keys() if "train" in key]
346
+ dataset = load_dataset(dataset_name, split=split[0], streaming=True)
347
+
348
+ buffer_size = 100 # Define a reasonable buffer size
349
+ samples_buffer = [sample for _, sample in zip(range(buffer_size), dataset)]
350
+
351
+ random_sample = random.choice(samples_buffer)
352
+
353
+
354
+ hashed = sha256(json.dumps(random_sample))
355
+
356
+ sanitized = sanitize_filename(dataset_name)
357
+ module_name = f"dataset_adapters.{sanitized}"
358
+ module_spec = importlib.util.find_spec(module_name)
359
+
360
+ if module_spec is None:
361
+ create_adapter(random_sample, sanitized)
362
+
363
+ spec = importlib.util.spec_from_file_location(module_name, f"dataset_adapters/{sanitized}.py")
364
+ dynamic_module = importlib.util.module_from_spec(spec)
365
+ sys.modules[module_name] = dynamic_module
366
+ spec.loader.exec_module(dynamic_module)
367
+
368
+ # Use the function from the dynamically imported module
369
+ transformed_data = dynamic_module.transform_data(random_sample)
370
+
371
+ return transformed_data
372
+
373
+ except FileNotFoundError:
374
+ raise HTTPException(status_code=404, detail="Dataset not found")
375
+ except Exception as e:
376
+ raise HTTPException(status_code=500, detail=str(e))
377
+
378
 
 
379
 
380
+ # @app.on_event("startup")
381
+ # async def startup_event():
382
+ # await setup_database()
383
  @app.get("/")
384
  def index() -> FileResponse:
385
  return FileResponse(path="/app/static/index.html", media_type="text/html")
requirements.txt CHANGED
@@ -1,7 +1,8 @@
1
- fastapi==0.74.*
2
- requests==2.27.*
3
- sentencepiece==0.1.*
4
- torch==1.11.*
5
- transformers==4.*
6
- uvicorn[standard]==0.17.*
7
-
 
 
1
+ fastapi==0.104.0
2
+ starlette==0.27.0
3
+ datasets==2.14.5
4
+ genson==1.2.2
5
+ pathvalidate==3.2.0
6
+ openai==1.3.3
7
+ jsonschema==4.17.3
8
+ numpy==1.22.0
static/dist/assets/index-34528448.js ADDED
The diff for this file is too large to render. See raw diff
 
static/dist/assets/index-6ff09fc9.css ADDED
@@ -0,0 +1 @@
 
 
1
+ #app{max-width:1280px;margin:0 auto;padding:1rem .4rem;width:100%}@media (min-width: 640px){#app{padding:1rem 2rem;display:flex;place-items:center}}*,:before,:after{box-sizing:border-box;border-width:0;border-style:solid;border-color:#e5e7eb}:before,:after{--tw-content: ""}html{line-height:1.5;-webkit-text-size-adjust:100%;-moz-tab-size:4;-o-tab-size:4;tab-size:4;font-family:ui-sans-serif,system-ui,-apple-system,BlinkMacSystemFont,Segoe UI,Roboto,Helvetica Neue,Arial,Noto Sans,sans-serif,"Apple Color Emoji","Segoe UI Emoji",Segoe UI Symbol,"Noto Color Emoji";font-feature-settings:normal;font-variation-settings:normal}body{margin:0;line-height:inherit}hr{height:0;color:inherit;border-top-width:1px}abbr:where([title]){-webkit-text-decoration:underline dotted;text-decoration:underline dotted}h1,h2,h3,h4,h5,h6{font-size:inherit;font-weight:inherit}a{color:inherit;text-decoration:inherit}b,strong{font-weight:bolder}code,kbd,samp,pre{font-family:ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,monospace;font-size:1em}small{font-size:80%}sub,sup{font-size:75%;line-height:0;position:relative;vertical-align:baseline}sub{bottom:-.25em}sup{top:-.5em}table{text-indent:0;border-color:inherit;border-collapse:collapse}button,input,optgroup,select,textarea{font-family:inherit;font-feature-settings:inherit;font-variation-settings:inherit;font-size:100%;font-weight:inherit;line-height:inherit;color:inherit;margin:0;padding:0}button,select{text-transform:none}button,[type=button],[type=reset],[type=submit]{-webkit-appearance:button;background-color:transparent;background-image:none}:-moz-focusring{outline:auto}:-moz-ui-invalid{box-shadow:none}progress{vertical-align:baseline}::-webkit-inner-spin-button,::-webkit-outer-spin-button{height:auto}[type=search]{-webkit-appearance:textfield;outline-offset:-2px}::-webkit-search-decoration{-webkit-appearance:none}::-webkit-file-upload-button{-webkit-appearance:button;font:inherit}summary{display:list-item}blockquote,dl,dd,h1,h2,h3,h4,h5,h6,hr,figure,p,pre{margin:0}fieldset{margin:0;padding:0}legend{padding:0}ol,ul,menu{list-style:none;margin:0;padding:0}dialog{padding:0}textarea{resize:vertical}input::-moz-placeholder,textarea::-moz-placeholder{opacity:1;color:#9ca3af}input::placeholder,textarea::placeholder{opacity:1;color:#9ca3af}button,[role=button]{cursor:pointer}:disabled{cursor:default}img,svg,video,canvas,audio,iframe,embed,object{display:block;vertical-align:middle}img,video{max-width:100%;height:auto}[hidden]{display:none}*,:before,:after{--tw-border-spacing-x: 0;--tw-border-spacing-y: 0;--tw-translate-x: 0;--tw-translate-y: 0;--tw-rotate: 0;--tw-skew-x: 0;--tw-skew-y: 0;--tw-scale-x: 1;--tw-scale-y: 1;--tw-pan-x: ;--tw-pan-y: ;--tw-pinch-zoom: ;--tw-scroll-snap-strictness: proximity;--tw-gradient-from-position: ;--tw-gradient-via-position: ;--tw-gradient-to-position: ;--tw-ordinal: ;--tw-slashed-zero: ;--tw-numeric-figure: ;--tw-numeric-spacing: ;--tw-numeric-fraction: ;--tw-ring-inset: ;--tw-ring-offset-width: 0px;--tw-ring-offset-color: #fff;--tw-ring-color: rgb(59 130 246 / .5);--tw-ring-offset-shadow: 0 0 #0000;--tw-ring-shadow: 0 0 #0000;--tw-shadow: 0 0 #0000;--tw-shadow-colored: 0 0 #0000;--tw-blur: ;--tw-brightness: ;--tw-contrast: ;--tw-grayscale: ;--tw-hue-rotate: ;--tw-invert: ;--tw-saturate: ;--tw-sepia: ;--tw-drop-shadow: ;--tw-backdrop-blur: ;--tw-backdrop-brightness: ;--tw-backdrop-contrast: ;--tw-backdrop-grayscale: ;--tw-backdrop-hue-rotate: ;--tw-backdrop-invert: ;--tw-backdrop-opacity: ;--tw-backdrop-saturate: ;--tw-backdrop-sepia: }::backdrop{--tw-border-spacing-x: 0;--tw-border-spacing-y: 0;--tw-translate-x: 0;--tw-translate-y: 0;--tw-rotate: 0;--tw-skew-x: 0;--tw-skew-y: 0;--tw-scale-x: 1;--tw-scale-y: 1;--tw-pan-x: ;--tw-pan-y: ;--tw-pinch-zoom: ;--tw-scroll-snap-strictness: proximity;--tw-gradient-from-position: ;--tw-gradient-via-position: ;--tw-gradient-to-position: ;--tw-ordinal: ;--tw-slashed-zero: ;--tw-numeric-figure: ;--tw-numeric-spacing: ;--tw-numeric-fraction: ;--tw-ring-inset: ;--tw-ring-offset-width: 0px;--tw-ring-offset-color: #fff;--tw-ring-color: rgb(59 130 246 / .5);--tw-ring-offset-shadow: 0 0 #0000;--tw-ring-shadow: 0 0 #0000;--tw-shadow: 0 0 #0000;--tw-shadow-colored: 0 0 #0000;--tw-blur: ;--tw-brightness: ;--tw-contrast: ;--tw-grayscale: ;--tw-hue-rotate: ;--tw-invert: ;--tw-saturate: ;--tw-sepia: ;--tw-drop-shadow: ;--tw-backdrop-blur: ;--tw-backdrop-brightness: ;--tw-backdrop-contrast: ;--tw-backdrop-grayscale: ;--tw-backdrop-hue-rotate: ;--tw-backdrop-invert: ;--tw-backdrop-opacity: ;--tw-backdrop-saturate: ;--tw-backdrop-sepia: }.fixed{position:fixed}.absolute{position:absolute}.relative{position:relative}.bottom-3{bottom:.75rem}.left-0{left:0}.right-0{right:0}.right-2{right:.5rem}.top-0{top:0}.my-1{margin-top:.25rem;margin-bottom:.25rem}.mb-12{margin-bottom:3rem}.mb-3{margin-bottom:.75rem}.mb-4{margin-bottom:1rem}.mr-1{margin-right:.25rem}.mt-2{margin-top:.5rem}.mt-3{margin-top:.75rem}.mt-4{margin-top:1rem}.mt-\[0\.4em\]{margin-top:.4em}.block{display:block}.inline-block{display:inline-block}.flex{display:flex}.h-6{height:1.5rem}.h-8{height:2rem}.h-full{height:100%}.min-h-\[44px\]{min-height:44px}.w-6{width:1.5rem}.w-8{width:2rem}.w-\[33\%\]{width:33%}.w-\[66\%\]{width:66%}.w-fit{width:-moz-fit-content;width:fit-content}.w-full{width:100%}.max-w-\[80\%\]{max-width:80%}.flex-1{flex:1 1 0%}.grow-0{flex-grow:0}@keyframes pulse{50%{opacity:.5}}.animate-pulse{animation:pulse 2s cubic-bezier(.4,0,.6,1) infinite}@keyframes spin{to{transform:rotate(360deg)}}.animate-spin{animation:spin 1s linear infinite}.cursor-pointer{cursor:pointer}.resize{resize:both}.flex-row{flex-direction:row}.flex-col{flex-direction:column}.items-start{align-items:flex-start}.items-end{align-items:flex-end}.items-center{align-items:center}.justify-center{justify-content:center}.justify-between{justify-content:space-between}.space-x-2>:not([hidden])~:not([hidden]){--tw-space-x-reverse: 0;margin-right:calc(.5rem * var(--tw-space-x-reverse));margin-left:calc(.5rem * calc(1 - var(--tw-space-x-reverse)))}.space-y-2>:not([hidden])~:not([hidden]){--tw-space-y-reverse: 0;margin-top:calc(.5rem * calc(1 - var(--tw-space-y-reverse)));margin-bottom:calc(.5rem * var(--tw-space-y-reverse))}.space-y-4>:not([hidden])~:not([hidden]){--tw-space-y-reverse: 0;margin-top:calc(1rem * calc(1 - var(--tw-space-y-reverse)));margin-bottom:calc(1rem * var(--tw-space-y-reverse))}.rounded-2xl{border-radius:1rem}.rounded-full{border-radius:9999px}.rounded-lg{border-radius:.5rem}.rounded-md{border-radius:.375rem}.border{border-width:1px}.border-2{border-width:2px}.border-4{border-width:4px}.border-t{border-top-width:1px}.border-dashed{border-style:dashed}.border-blue-400{--tw-border-opacity: 1;border-color:rgb(96 165 250 / var(--tw-border-opacity))}.border-blue-500{--tw-border-opacity: 1;border-color:rgb(59 130 246 / var(--tw-border-opacity))}.border-gray-300{--tw-border-opacity: 1;border-color:rgb(209 213 219 / var(--tw-border-opacity))}.border-neutral-200{--tw-border-opacity: 1;border-color:rgb(229 229 229 / var(--tw-border-opacity))}.border-neutral-300{--tw-border-opacity: 1;border-color:rgb(212 212 212 / var(--tw-border-opacity))}.border-red-400{--tw-border-opacity: 1;border-color:rgb(248 113 113 / var(--tw-border-opacity))}.border-t-transparent{border-top-color:transparent}.bg-blue-500{--tw-bg-opacity: 1;background-color:rgb(59 130 246 / var(--tw-bg-opacity))}.bg-neutral-200{--tw-bg-opacity: 1;background-color:rgb(229 229 229 / var(--tw-bg-opacity))}.bg-red-100{--tw-bg-opacity: 1;background-color:rgb(254 226 226 / var(--tw-bg-opacity))}.bg-white{--tw-bg-opacity: 1;background-color:rgb(255 255 255 / var(--tw-bg-opacity))}.bg-yellow-100{--tw-bg-opacity: 1;background-color:rgb(254 249 195 / var(--tw-bg-opacity))}.p-1{padding:.25rem}.p-4{padding:1rem}.px-2{padding-left:.5rem;padding-right:.5rem}.px-3{padding-left:.75rem;padding-right:.75rem}.px-4{padding-left:1rem;padding-right:1rem}.py-2{padding-top:.5rem;padding-bottom:.5rem}.py-3{padding-top:.75rem;padding-bottom:.75rem}.py-6{padding-top:1.5rem;padding-bottom:1.5rem}.pl-4{padding-left:1rem}.pr-12{padding-right:3rem}.pt-12{padding-top:3rem}.text-center{text-align:center}.text-3xl{font-size:1.875rem;line-height:2.25rem}.text-sm{font-size:.875rem;line-height:1.25rem}.font-bold{font-weight:700}.font-semibold{font-weight:600}.text-gray-500{--tw-text-opacity: 1;color:rgb(107 114 128 / var(--tw-text-opacity))}.text-gray-600{--tw-text-opacity: 1;color:rgb(75 85 99 / var(--tw-text-opacity))}.text-neutral-900{--tw-text-opacity: 1;color:rgb(23 23 23 / var(--tw-text-opacity))}.text-red-700{--tw-text-opacity: 1;color:rgb(185 28 28 / var(--tw-text-opacity))}.text-white{--tw-text-opacity: 1;color:rgb(255 255 255 / var(--tw-text-opacity))}.text-yellow-700{--tw-text-opacity: 1;color:rgb(161 98 7 / var(--tw-text-opacity))}.placeholder-gray-400::-moz-placeholder{--tw-placeholder-opacity: 1;color:rgb(156 163 175 / var(--tw-placeholder-opacity))}.placeholder-gray-400::placeholder{--tw-placeholder-opacity: 1;color:rgb(156 163 175 / var(--tw-placeholder-opacity))}.filter{filter:var(--tw-blur) var(--tw-brightness) var(--tw-contrast) var(--tw-grayscale) var(--tw-hue-rotate) var(--tw-invert) var(--tw-saturate) var(--tw-sepia) var(--tw-drop-shadow)}.transition{transition-property:color,background-color,border-color,text-decoration-color,fill,stroke,opacity,box-shadow,transform,filter,-webkit-backdrop-filter;transition-property:color,background-color,border-color,text-decoration-color,fill,stroke,opacity,box-shadow,transform,filter,backdrop-filter;transition-property:color,background-color,border-color,text-decoration-color,fill,stroke,opacity,box-shadow,transform,filter,backdrop-filter,-webkit-backdrop-filter;transition-timing-function:cubic-bezier(.4,0,.2,1);transition-duration:.15s}.duration-300{transition-duration:.3s}:root{font-family:Inter,system-ui,Avenir,Helvetica,Arial,sans-serif;line-height:1.5;font-weight:400;color-scheme:light dark;color:#ffffffde;background-color:#242424;font-synthesis:none;text-rendering:optimizeLegibility;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale;-webkit-text-size-adjust:100%}a{font-weight:500;color:#646cff;text-decoration:inherit}a:hover{color:#535bf2}body{margin:0;display:flex;min-width:320px;min-height:100vh}h1{font-size:3.2em;line-height:1.1}button{border-radius:8px;border:1px solid transparent;padding:.6em 1.2em;font-size:1em;font-weight:500;font-family:inherit;background-color:#1a1a1a;cursor:pointer;transition:border-color .25s}button:hover{border-color:#646cff}button:focus,button:focus-visible{outline:4px auto -webkit-focus-ring-color}@media (prefers-color-scheme: light){:root{color:#213547;background-color:#fff}a:hover{color:#747bff}button{background-color:#f9f9f9}}code{color:#abb2bf;padding:.2em .4em;margin:0;font-size:85%;white-space:break-spaces;background-color:#282c34;border-radius:6px}table,thead,tbody,th,tr,td{border:solid white 1px}td,th{padding:1em}h1{font-size:1.5em}.system-message{padding-bottom:2rem;color:#fff;background-color:#334155}.system-message:after{content:"";font-family:icomoon;display:block;position:absolute;font-size:1.25rem;bottom:.5rem;right:1rem}.hover\:cursor-pointer:hover{cursor:pointer}.hover\:border-gray-400:hover{--tw-border-opacity: 1;border-color:rgb(156 163 175 / var(--tw-border-opacity))}.hover\:bg-blue-600:hover{--tw-bg-opacity: 1;background-color:rgb(37 99 235 / var(--tw-bg-opacity))}.hover\:bg-blue-700:hover{--tw-bg-opacity: 1;background-color:rgb(29 78 216 / var(--tw-bg-opacity))}.hover\:bg-gray-100:hover{--tw-bg-opacity: 1;background-color:rgb(243 244 246 / var(--tw-bg-opacity))}.hover\:bg-neutral-300:hover{--tw-bg-opacity: 1;background-color:rgb(212 212 212 / var(--tw-bg-opacity))}.hover\:opacity-80:hover{opacity:.8}.focus\:border-blue-500:focus{--tw-border-opacity: 1;border-color:rgb(59 130 246 / var(--tw-border-opacity))}.focus\:outline-none:focus{outline:2px solid transparent;outline-offset:2px}.focus\:ring:focus{--tw-ring-offset-shadow: var(--tw-ring-inset) 0 0 0 var(--tw-ring-offset-width) var(--tw-ring-offset-color);--tw-ring-shadow: var(--tw-ring-inset) 0 0 0 calc(3px + var(--tw-ring-offset-width)) var(--tw-ring-color);box-shadow:var(--tw-ring-offset-shadow),var(--tw-ring-shadow),var(--tw-shadow, 0 0 #0000)}.focus\:ring-1:focus{--tw-ring-offset-shadow: var(--tw-ring-inset) 0 0 0 var(--tw-ring-offset-width) var(--tw-ring-offset-color);--tw-ring-shadow: var(--tw-ring-inset) 0 0 0 calc(1px + var(--tw-ring-offset-width)) var(--tw-ring-color);box-shadow:var(--tw-ring-offset-shadow),var(--tw-ring-shadow),var(--tw-shadow, 0 0 #0000)}.focus\:ring-neutral-300:focus{--tw-ring-opacity: 1;--tw-ring-color: rgb(212 212 212 / var(--tw-ring-opacity))}@media (min-width: 640px){.sm\:my-1{margin-top:.25rem;margin-bottom:.25rem}.sm\:my-1\.5{margin-top:.375rem;margin-bottom:.375rem}.sm\:mb-8{margin-bottom:2rem}.sm\:inline{display:inline}.sm\:border{border-width:1px}.sm\:p-4{padding:1rem}.sm\:text-base{font-size:1rem;line-height:1.5rem}}@media (min-width: 768px){.md\:max-w-\[67\%\]{max-width:67%}.md\:justify-center{justify-content:center}.md\:pt-0{padding-top:0}}@font-face{font-family:icomoon;src:url(/fonts/icomoon.eot?qu2wpf);src:url(/fonts/icomoon.eot?qu2wpf#iefix) format("embedded-opentype"),url(/fonts/icomoon.ttf?qu2wpf) format("truetype"),url(/fonts/icomoon.woff?qu2wpf) format("woff"),url(/fonts/icomoon.svg?qu2wpf#icomoon) format("svg");font-weight:400;font-style:normal;font-display:block}[class^=icon-],[class*=" icon-"]{font-family:icomoon!important;speak:never;font-style:normal;font-weight:400;font-variant:normal;text-transform:none;line-height:1;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale}.icon-system:before{content:""}
static/dist/fonts/icomoon.eot ADDED
Binary file (1.54 kB). View file
 
static/dist/fonts/icomoon.svg ADDED
static/dist/fonts/icomoon.ttf ADDED
Binary file (1.38 kB). View file
 
static/dist/fonts/icomoon.woff ADDED
Binary file (1.45 kB). View file
 
static/dist/index.html ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!doctype html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <link rel="icon" type="image/svg+xml" href="/vite.svg" />
6
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
7
+ <title>Vite + Preact</title>
8
+ <script type="module" crossorigin src="/assets/index-34528448.js"></script>
9
+ <link rel="stylesheet" href="/assets/index-6ff09fc9.css">
10
+ </head>
11
+ <body ondrop="event.preventDefault()" >
12
+ <div id="app"></div>
13
+
14
+ </body>
15
+ </html>
static/index.html DELETED
@@ -1,36 +0,0 @@
1
- <!DOCTYPE html>
2
- <html lang="en">
3
- <head>
4
- <meta charset="UTF-8" />
5
- <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
- <title>Fast API 🤗 Space served with Uvicorn</title>
7
- <link rel="stylesheet" href="style.css" />
8
- <script type="module" src="script.js"></script>
9
- </head>
10
- <body>
11
- <main>
12
- <section id="text-gen">
13
- <h1>Text generation using Flan T5</h1>
14
- <p>
15
- Model:
16
- <a
17
- href="https://huggingface.co/google/flan-t5-small"
18
- rel="noreferrer"
19
- target="_blank"
20
- >google/flan-t5-small</a
21
- >
22
- </p>
23
- <form class="text-gen-form">
24
- <label for="text-gen-input">Text prompt</label>
25
- <input
26
- id="text-gen-input"
27
- type="text"
28
- value="English: Translate There are many ducks. German:"
29
- />
30
- <button id="text-gen-submit">Submit</button>
31
- <p class="text-gen-output"></p>
32
- </form>
33
- </section>
34
- </main>
35
- </body>
36
- </html>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
static/script.js DELETED
@@ -1,21 +0,0 @@
1
- const textGenForm = document.querySelector('.text-gen-form');
2
-
3
- const translateText = async (text) => {
4
- const inferResponse = await fetch(`infer_t5?input=${text}`);
5
- const inferJson = await inferResponse.json();
6
-
7
- return inferJson.output;
8
- };
9
-
10
- textGenForm.addEventListener('submit', async (event) => {
11
- event.preventDefault();
12
-
13
- const textGenInput = document.getElementById('text-gen-input');
14
- const textGenParagraph = document.querySelector('.text-gen-output');
15
-
16
- try {
17
- textGenParagraph.textContent = await translateText(textGenInput.value);
18
- } catch (err) {
19
- console.error(err);
20
- }
21
- });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
static/style.css DELETED
@@ -1,45 +0,0 @@
1
- body {
2
- --text: hsl(0 0% 15%);
3
- padding: 2.5rem;
4
- font-family: sans-serif;
5
- color: var(--text);
6
- }
7
-
8
- body.dark-theme {
9
- --text: hsl(0 0% 90%);
10
- background-color: hsl(223 39% 7%);
11
- }
12
-
13
- main {
14
- max-width: 80rem;
15
- text-align: center;
16
- }
17
-
18
- section {
19
- display: flex;
20
- flex-direction: column;
21
- align-items: center;
22
- }
23
-
24
- a {
25
- color: var(--text);
26
- }
27
-
28
- form {
29
- width: 30rem;
30
- margin: 0 auto;
31
- }
32
-
33
- input {
34
- width: 100%;
35
- }
36
-
37
- button {
38
- cursor: pointer;
39
- }
40
-
41
- .text-gen-output {
42
- min-height: 1.2rem;
43
- margin: 1rem;
44
- border: 0.5px solid grey;
45
- }