Spaces:
Running
on
Zero
Running
on
Zero
File size: 3,908 Bytes
a0188fc fd96f46 b1361ec a0188fc a81b12c a0188fc b1361ec a0188fc 8113c0b f69e84b 8113c0b a0188fc fd96f46 a0188fc fd96f46 b1361ec a0188fc 3ad7345 a0188fc 7849f7f 849d516 aad18c2 849d516 aad18c2 849d516 a0188fc aad18c2 a0188fc aad18c2 849d516 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 |
import gradio as gr
import torch
from transformers import pipeline
import os
import spaces
from huggingface_hub import CommitScheduler
from pathlib import Path
import uuid
import json
import time
from datetime import datetime
import logging
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler("app.log"),
logging.StreamHandler()
]
)
logger = logging.getLogger("darija-masked-lm")
#load_dotenv()
key=os.environ["HF_KEY"]
# Define the file where to save the data
submit_file = Path("user_submit/") / f"data_{uuid.uuid4()}.json"
feedback_file = submit_file
# Create directory if it doesn't exist
submit_file.parent.mkdir(exist_ok=True, parents=True)
logger.info(f"Created feedback file: {feedback_file}")
scheduler = CommitScheduler(
repo_id="atlasia/atlaset_inference_ds",
repo_type="dataset",
folder_path=submit_file.parent,
path_in_repo="masked_lm",
every=5,
token=key
)
logger.info(f"Initialized CommitScheduler for repo: atlasia/atlaset_inference_ds")
def save_feedback(input, output) -> None:
"""
Append input/outputs and parameters to a JSON Lines file using a thread lock
to avoid concurrent writes from different users.
"""
logger.info(f"Saving feedback to {feedback_file}")
with scheduler.lock:
try:
with feedback_file.open("a") as f:
f.write(json.dumps({
"input": input,
"output": output,
}))
f.write("\n")
logger.info("Feedback saved successfully")
except Exception as e:
logger.error(f"Error saving feedback: {str(e)}")
def load_model():
print("[INFO] Loading model... This may take a minute on Spaces")
pipe = pipeline(
task="fill-mask",
model="atlasia/XLM-RoBERTa-Morocco",
token=key,
device=0,
torch_dtype=torch.float16 # Use half precision
)
print("[INFO] Model loaded successfully!")
return pipe
print("[INFO] load model ...")
pipe=load_model()
print("[INFO] model loaded")
@spaces.GPU
def predict(text):
outputs = pipe(text)
scores= [x["score"] for x in outputs]
tokens= [x["token_str"] for x in outputs]
result={label: float(prob) for label, prob in zip(tokens, scores)}
# Save feedback with additional metadata
save_feedback(
text,
result
)
return result
# Create Gradio interface
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
# Input text box
input_text = gr.Textbox(
label="Input",
placeholder="Enter text here...",
rtl=True
)
# Button row
with gr.Row():
clear_btn = gr.Button("Clear")
submit_btn = gr.Button("Submit", variant="primary")
# Output probabilities
output_labels = gr.Label(
label="Prediction Results",
show_label=False
)
# Examples section with basic configuration
gr.Examples(
examples=["العاصمة د <mask> هي الرباط","المغرب <mask> زوين","انا سميتي مريم، و كنسكن ف<mask> العاصمة دفلسطين"],
inputs=input_text,
fn=predict,
outputs=output_labels,
cache_examples=True
)
# Button actions
submit_btn.click(
predict,
inputs=input_text,
outputs=output_labels
)
clear_btn.click(
lambda: "",
outputs=input_text
)
# Launch the app with simple queue
demo.queue() # No parameters for older Gradio versions
demo.launch() |