Spaces:
Sleeping
Sleeping
Initial streamlit app - fikran
Browse files
app.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
|
3 |
+
st.set_page_config(page_title="LLM Evaluation Lab β by Fikran", layout="wide")
|
4 |
+
|
5 |
+
st.title("π¬ LLM Evaluation Lab β Real-world Testing by Fikran")
|
6 |
+
st.markdown(
|
7 |
+
"""
|
8 |
+
Welcome to the interactive showcase for evaluating **Large Language Models** (LLMs) using real-world user interactions!
|
9 |
+
|
10 |
+
π **What this is**: A companion demo to [Fikran](https://fikran.com), a multilingual platform where people interact with AI agents in a natural, dynamic environment.
|
11 |
+
β
We offer model testing, benchmarking, and prompt refinement in live contextsβnot just synthetic benchmarks.
|
12 |
+
|
13 |
+
---
|
14 |
+
"""
|
15 |
+
)
|
16 |
+
|
17 |
+
st.header("π― What You Can Do Here")
|
18 |
+
st.markdown(
|
19 |
+
"""
|
20 |
+
- β
Understand the **real-world performance** of your LLMs.
|
21 |
+
- π Track how they behave across different user queries and scenarios.
|
22 |
+
- π§ Apply **prompt engineering**, **LoRA-based customization**, and **dialogue tuning**.
|
23 |
+
- π Export insights as PDF reports or structured logs.
|
24 |
+
|
25 |
+
This system is perfect for:
|
26 |
+
- Researchers evaluating fine-tuned models
|
27 |
+
- Product teams testing chatbot behavior before deployment
|
28 |
+
- Prompt engineers experimenting with multi-agent setups
|
29 |
+
"""
|
30 |
+
)
|
31 |
+
|
32 |
+
st.header("π οΈ Try It or Order a Full Evaluation")
|
33 |
+
|
34 |
+
col1, col2 = st.columns(2)
|
35 |
+
|
36 |
+
with col1:
|
37 |
+
if st.button("π Try Fikran Now"):
|
38 |
+
st.markdown("[Click to explore Fikran](https://www.fikran.com/terms/about-us?lang=english)", unsafe_allow_html=True)
|
39 |
+
|
40 |
+
with col2:
|
41 |
+
if st.button("π¦ Order a Full Evaluation"):
|
42 |
+
st.markdown("[See the Service on Upwork](https://www.upwork.com/services/product/development-it-a-real-world-evaluation-of-your-llm-in-a-dynamic-interactive-environment-1909379987479305454)", unsafe_allow_html=True)
|
43 |
+
|
44 |
+
st.markdown("---")
|
45 |
+
|
46 |
+
st.info("This app is maintained by [Abdennacer Elbasri](https://huggingface.co/elbasri), founder of Fikran.")
|
47 |
+
|