Commit
·
7cb2c40
1
Parent(s):
029808c
final deployment v-1.1.5
Browse files
README.md
CHANGED
|
@@ -1,14 +1,14 @@
|
|
| 1 |
---
|
| 2 |
-
title: Sarvam
|
| 3 |
emoji: 🚀
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: streamlit
|
| 7 |
sdk_version: 1.42.0
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
license: mit
|
| 11 |
-
short_description:
|
| 12 |
---
|
| 13 |
|
| 14 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Sarvam AI Entity Normalisation
|
| 3 |
emoji: 🚀
|
| 4 |
+
colorFrom: orange
|
| 5 |
+
colorTo: pink
|
| 6 |
sdk: streamlit
|
| 7 |
sdk_version: 1.42.0
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
license: mit
|
| 11 |
+
short_description: Finetuned & quantised llama-3.1-8b model for entity normalisation in indic languages
|
| 12 |
---
|
| 13 |
|
| 14 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
CHANGED
|
@@ -11,6 +11,26 @@ env_var = "ran_script_once"
|
|
| 11 |
host = "127.0.0.1"
|
| 12 |
port = 8081
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
def is_port_in_use(host: str, port: int) -> bool:
|
| 15 |
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
|
| 16 |
sock.settimeout(1) # Set a timeout in case nothing is listening
|
|
@@ -112,26 +132,6 @@ def setup():
|
|
| 112 |
# st.write("An error occurred:")
|
| 113 |
# st.write(e.stderr)
|
| 114 |
|
| 115 |
-
prompt_format = \
|
| 116 |
-
'''Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
|
| 117 |
-
|
| 118 |
-
## Instruction:
|
| 119 |
-
Normalize entities in a given sentence, including dates (various formats), currencies (multiple symbols and notations), and scientific units (single and compound). Convert them into their full, standardized textual representations in the same language.
|
| 120 |
-
|
| 121 |
-
### Example Input:
|
| 122 |
-
15/03/1990 को, वैज्ञानिक ने $120 में 500mg यौगिक का एक नमूना खरीदा।
|
| 123 |
-
|
| 124 |
-
### Example Response:
|
| 125 |
-
पंद्रह मार्च उन्नीस सौ नब्बे को, वैज्ञानिक ने एक सौ बीस अमेरिकी डॉलर में पाँच सौ मिलीग्राम यौगिक का एक नमूना खरीदा।
|
| 126 |
-
|
| 127 |
-
Just as entities like dates, currencies, and scientific units have been normalized into simple terms, you must do the same. Do not leave any entity un-normalised.
|
| 128 |
-
|
| 129 |
-
## Input:
|
| 130 |
-
{}
|
| 131 |
-
|
| 132 |
-
## Response:
|
| 133 |
-
{}'''
|
| 134 |
-
|
| 135 |
_ = infer("हा अहवाल 30 pages लांब आणि 10 MB आकाराचा आहे.")
|
| 136 |
# output = "hello me tasmay!"
|
| 137 |
# time.sleep(5)
|
|
@@ -142,7 +142,7 @@ def main():
|
|
| 142 |
start_time = time.time()
|
| 143 |
# Show a spinner while the app is setting up.
|
| 144 |
if "setup_done" not in st.session_state:
|
| 145 |
-
with st.spinner("Setting up the app, please wait
|
| 146 |
setup()
|
| 147 |
st.session_state["setup_done"] = True
|
| 148 |
else:
|
|
|
|
| 11 |
host = "127.0.0.1"
|
| 12 |
port = 8081
|
| 13 |
|
| 14 |
+
prompt_format = \
|
| 15 |
+
'''Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
|
| 16 |
+
|
| 17 |
+
## Instruction:
|
| 18 |
+
Normalize entities in a given sentence, including dates (various formats), currencies (multiple symbols and notations), and scientific units (single and compound). Convert them into their full, standardized textual representations in the same language.
|
| 19 |
+
|
| 20 |
+
### Example Input:
|
| 21 |
+
15/03/1990 को, वैज्ञानिक ने $120 में 500mg यौगिक का एक नमूना खरीदा।
|
| 22 |
+
|
| 23 |
+
### Example Response:
|
| 24 |
+
पंद्रह मार्च उन्नीस सौ नब्बे को, वैज्ञानिक ने एक सौ बीस अमेरिकी डॉलर में पाँच सौ मिलीग्राम यौगिक का एक नमूना खरीदा।
|
| 25 |
+
|
| 26 |
+
Just as entities like dates, currencies, and scientific units have been normalized into simple terms, you must do the same. Do not leave any entity un-normalised.
|
| 27 |
+
|
| 28 |
+
## Input:
|
| 29 |
+
{}
|
| 30 |
+
|
| 31 |
+
## Response:
|
| 32 |
+
{}'''
|
| 33 |
+
|
| 34 |
def is_port_in_use(host: str, port: int) -> bool:
|
| 35 |
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
|
| 36 |
sock.settimeout(1) # Set a timeout in case nothing is listening
|
|
|
|
| 132 |
# st.write("An error occurred:")
|
| 133 |
# st.write(e.stderr)
|
| 134 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
_ = infer("हा अहवाल 30 pages लांब आणि 10 MB आकाराचा आहे.")
|
| 136 |
# output = "hello me tasmay!"
|
| 137 |
# time.sleep(5)
|
|
|
|
| 142 |
start_time = time.time()
|
| 143 |
# Show a spinner while the app is setting up.
|
| 144 |
if "setup_done" not in st.session_state:
|
| 145 |
+
with st.spinner("Setting up the app, please wait. It may take around 6-7 minutes to setup."):
|
| 146 |
setup()
|
| 147 |
st.session_state["setup_done"] = True
|
| 148 |
else:
|