Spaces:

yashphalle
/

AIMI

Sleeping

App Files Files Community

yashphalle commited on Dec 26, 2024

Commit

73e70b2

verified ·

1 Parent(s): 5554a1e

Create app.py

Browse files

Files changed (1) hide show

app.py +152 -0

app.py ADDED Viewed

	@@ -0,0 +1,152 @@

+import gradio as gr
+import requests
+import re
+# -----------------------------
+# 1. Configure the open-source LLM API endpoint
+#    For demonstration, we can use a hosted inference API on Hugging Face
+#    that is free to use (to a certain rate limit).
+# -----------------------------
+# Example: We'll use an OpenAssistant model endpoint on HF.
+# You can find many such endpoints in the Hugging Face "Spaces" or "Models" section
+# that provide Inference API for free.
+API_URL = "https://api-inference.huggingface.co/models/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5"
+headers = {"Authorization": "Bearer HF_API_TOKEN"}  # If needed; if the model doesn't require a token, leave blank or remove.
+# -----------------------------
+# 2. Define a function to query the model
+# -----------------------------
+def query_model(prompt: str) -> str:
+    """
+    Sends the prompt to the Hugging Face Inference Endpoint and returns the model's response.
+    """
+    # The payload format for text generation can vary by model. We'll try a general approach:
+    payload = {
+        "inputs": prompt,
+        "parameters": {
+            "max_new_tokens": 200,     # limit response length
+            "temperature": 0.7,       # moderate creativity
+        }
+    }
+    response = requests.post(API_URL, headers=headers, json=payload)
+    if response.status_code == 200:
+        model_output = response.json()
+        # "generated_text" or "text" can vary depending on the model
+        if isinstance(model_output, dict) and "generated_text" in model_output:
+            return model_output["generated_text"]
+        elif isinstance(model_output, list) and len(model_output) > 0:
+            # Some endpoints return a list of dicts
+            return model_output[0].get("generated_text", "")
+        else:
+            return "Error: Unexpected model output format."
+    else:
+        return f"Error {response.status_code}: {response.text}"
+# -----------------------------
+# 3. Define a simple evaluation function
+#    This is a naive "keyword and structure" based scoring for demonstration.
+# -----------------------------
+def evaluate_response(response: str) -> dict:
+    """
+    Rates the response on a scale of 1–5 for:
+      1) Relevance (R)
+      2) Depth (D)
+      3) Clarity (C)
+      4) References (E)
+      5) Overall Quality (Q)
+    Returns a dict with individual scores and total.
+    """
+    # We'll do a very simplistic approach:
+    # Relevance: presence of 'remote work' or synonyms + mention of 'software engineers'
+    relevance = 5 if ("remote work" in response.lower() and "software engineer" in response.lower()) else 3
+    # Depth: check if the text is > 100 words or includes multiple paragraphs
+    word_count = len(response.split())
+    depth = 5 if word_count > 150 else (4 if word_count > 80 else 3)
+    # Clarity: check if there's a mention of 'introduction'/'conclusion' or if it has multiple paragraphs
+    paragraphs = response.strip().split("\n\n")
+    clarity = 5 if len(paragraphs) >= 2 else 3
+    # References: look for something like 'reference', 'source', 'citation', or an URL
+    if re.search(r"reference|source|citation|http", response, re.IGNORECASE):
+        references = 5
+    else:
+        references = 2
+    # Overall Quality: a naive combination
+    # We'll penalize if the text is too short or if it's obviously incomplete
+    if "..." in response[-10:]:
+        # If it ends with ... maybe it's incomplete
+        overall = 3
+    else:
+        overall = 5 if (relevance >= 4 and depth >= 4 and references >= 4) else 4
+    # Summation
+    total_score = relevance + depth + clarity + references + overall
+    return {
+        "Relevance": relevance,
+        "Depth": depth,
+        "Clarity": clarity,
+        "References": references,
+        "Overall": overall,
+        "Total": total_score
+    }
+# -----------------------------
+# 4. Define the Gradio interface function
+#    This is the function that runs when user clicks "Generate & Evaluate"
+# -----------------------------
+def generate_and_evaluate(prompt: str):
+    if not prompt.strip():
+        return "Please enter a prompt.", {}
+    # 1) Get LLM response
+    llm_response = query_model(prompt)
+    # 2) Evaluate
+    scores = evaluate_response(llm_response)
+    return llm_response, scores
+# -----------------------------
+# 5. Build the Gradio UI
+# -----------------------------
+with gr.Blocks() as demo:
+    gr.Markdown("# Remote Work Benefits Generator & Evaluator")
+    gr.Markdown(
+        "Enter a prompt about the key benefits of remote work for software engineers. "
+        "The model will generate a response and our auto-evaluator will score it."
+    )
+    prompt_input = gr.Textbox(
+        label="Enter your prompt here",
+        placeholder="E.g., 'Write a short report on the benefits of remote work for software engineers...'",
+        lines=3
+    )
+    generate_button = gr.Button("Generate & Evaluate")
+    response_output = gr.Textbox(
+        label="LLM Response",
+        lines=10
+    )
+    score_output = gr.JSON(
+        label="Evaluation Scores",
+        visible=True
+    )
+    generate_button.click(
+        fn=generate_and_evaluate,
+        inputs=[prompt_input],
+        outputs=[response_output, score_output]
+    )
+# -----------------------------
+# 6. Launch
+# -----------------------------
+if __name__ == "__main__":
+    demo.launch()