MohamedMotaz commited on
Commit
db3ee5d
1 Parent(s): c952d3f
Files changed (3) hide show
  1. app.py +81 -0
  2. assistant.py +63 -0
  3. requirements.txt +116 -0
app.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from phi.assistant import Assistant
3
+ from phi.document.reader.pdf import PDFReader
4
+ from phi.utils.log import logger
5
+ from assistant import get_groq_assistant
6
+ import io
7
+
8
+ st.set_page_config(
9
+ page_title="Test Corrector Model"
10
+ )
11
+ st.title("Test Corrector Model")
12
+ st.markdown("##### Upload Model Answer and Student Answer PDFs to get the grades")
13
+
14
+ def restart_assistant():
15
+ st.session_state["assistant"] = None
16
+ st.session_state["assistant_run_id"] = None
17
+ st.rerun()
18
+
19
+ def main():
20
+ # Get LLM model
21
+ llm_model = st.sidebar.selectbox("Select LLM", options=["llama3-70b-8192", "llama3-8b-8192", "mixtral-8x7b-32768"])
22
+ embeddings_model = st.sidebar.selectbox("Select Embeddings", options=["nomic-embed-text", "text-embedding-3-small"])
23
+
24
+ if "llm_model" not in st.session_state:
25
+ st.session_state["llm_model"] = llm_model
26
+ elif st.session_state["llm_model"] != llm_model:
27
+ st.session_state["llm_model"] = llm_model
28
+ restart_assistant()
29
+
30
+ if "embeddings_model" not in st.session_state:
31
+ st.session_state["embeddings_model"] = embeddings_model
32
+ elif st.session_state["embeddings_model"] != embeddings_model:
33
+ st.session_state["embeddings_model"] = embeddings_model
34
+ restart_assistant()
35
+
36
+ #type annotation in Python. It indicates that the variable assistant is expected to be an instance of the Assistant class.
37
+ assistant: Assistant
38
+ if "assistant" not in st.session_state or st.session_state["assistant"] is None:
39
+ logger.info(f"---*--- Creating {llm_model} Assistant ---*---")
40
+ assistant = get_groq_assistant(llm_model=llm_model, embeddings_model=embeddings_model)
41
+ st.session_state["assistant"] = assistant
42
+ else:
43
+ assistant = st.session_state["assistant"]
44
+
45
+ try:
46
+ st.session_state["assistant_run_id"] = assistant.create_run()
47
+ except Exception:
48
+ st.warning("Could not create assistant, is the database running?")
49
+ return
50
+
51
+ # Upload model answer PDF
52
+ model_answer_pdf = st.file_uploader("Upload Model Answer PDF", type="pdf")
53
+ model_answers = []
54
+ if model_answer_pdf:
55
+ reader = PDFReader()
56
+ model_documents = reader.read(io.BytesIO(model_answer_pdf.read()))
57
+ model_answers = [doc.content for doc in model_documents]
58
+
59
+ # Upload student answer PDF
60
+ student_answer_pdf = st.file_uploader("Upload Student Answer PDF", type="pdf")
61
+ student_answers = []
62
+ if student_answer_pdf:
63
+ reader = PDFReader()
64
+ student_documents = reader.read(io.BytesIO(student_answer_pdf.read()))
65
+ student_answers = [doc.content for doc in student_documents]
66
+
67
+ # Grade answers
68
+ if st.button("Grade Answers"):
69
+ if model_answers and student_answers:
70
+ grades = []
71
+ # for model_answer, student_answer in zip(model_answers, student_answers):
72
+ prompt = f"Grade the following student answer based on the model answer:\n\nModel Answer: {[doc.content for doc in model_documents]}\n\nStudent Answer: {[doc.content for doc in student_documents]}"
73
+ response_generator = assistant.run(prompt)
74
+ response = ''.join(list(response_generator))
75
+ grades.append(response)
76
+ for i, grade in enumerate(grades, 1):
77
+ st.write(f"{grade}")
78
+ else:
79
+ st.warning("Please upload both Model Answer PDF and Student Answer PDF")
80
+
81
+ main()
assistant.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Optional
2
+ from phi.assistant import Assistant
3
+ from phi.knowledge import AssistantKnowledge
4
+ from phi.llm.groq import Groq
5
+ from phi.embedder.openai import OpenAIEmbedder
6
+ from phi.embedder.ollama import OllamaEmbedder
7
+ from phi.vectordb.pgvector import PgVector2
8
+ from phi.storage.assistant.postgres import PgAssistantStorage
9
+
10
+ # db_url = "postgresql+psycopg://ai:ai@localhost:5532/ai"
11
+ db_url = "postgresql://ai_owner:B9iIwFyus4VO@ep-restless-block-a1e1oiah.ap-southeast-1.aws.neon.tech/ai?sslmode=require"
12
+
13
+ def get_groq_assistant(
14
+ llm_model: str = "llama3-70b-8192",
15
+ embeddings_model: str = "text-embedding-3-small",
16
+ user_id: Optional[str] = None,
17
+ run_id: Optional[str] = None,
18
+ debug_mode: bool = True,
19
+ ) -> Assistant:
20
+ """Get a Groq RAG Assistant."""
21
+
22
+ embedder = (
23
+ OllamaEmbedder(model=embeddings_model, dimensions=768)
24
+ if embeddings_model == "nomic-embed-text"
25
+ else OpenAIEmbedder(model=embeddings_model, dimensions=1536)
26
+ )
27
+ embeddings_table = (
28
+ "groq_rag_documents_ollama" if embeddings_model == "nomic-embed-text" else "groq_rag_documents_openai"
29
+ )
30
+
31
+ return Assistant(
32
+ name="groq_rag_assistant",
33
+ run_id=run_id,
34
+ user_id=user_id,
35
+ llm=Groq(model=llm_model),
36
+ storage=PgAssistantStorage(table_name="groq_rag_assistant", db_url=db_url),
37
+ knowledge_base=AssistantKnowledge(
38
+ vector_db=PgVector2(
39
+ db_url=db_url,
40
+ collection=embeddings_table,
41
+ embedder=embedder,
42
+ ),
43
+ num_documents=2,
44
+ ),
45
+ # Slytle different instructions results in different grading or grading style.
46
+ description="You are an AI called 'GroqRAG' and your task is to grade student answers based on model answers.",
47
+ instructions=[
48
+ "You will always take two PDF files as input: Model Answer (best answers) and Student Answer.",
49
+ "Don't give marks to the model answers file only use it as a refrance",
50
+ "You should give a grade to each question on the student answer based on the model answer.",
51
+ "Use the model answer as the reference for grading.",
52
+ "A student who provides the meaning of an answer but uses different words and mentions the entire information given in the model answer will receive full marks.",
53
+ "A student who provides incomplete or irrelevant information will lose marks based on the quality and completeness of their answer.",
54
+ "Use a consistent marking technique so that The same answers should always receive the same marks."
55
+ "A question with no answer should receive zero marks."
56
+ ],
57
+ add_references_to_prompt=False,
58
+ markdown=True,
59
+ add_chat_history_to_messages=True,
60
+ num_history_messages=4,
61
+ add_datetime_to_instructions=True,
62
+ debug_mode=debug_mode,
63
+ )
requirements.txt ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiohttp==3.9.5
2
+ aiosignal==1.3.1
3
+ altair==5.3.0
4
+ annotated-types==0.6.0
5
+ anyio==4.3.0
6
+ asttokens==2.4.1
7
+ attrs==23.2.0
8
+ beautifulsoup4==4.12.3
9
+ blinker==1.7.0
10
+ bs4==0.0.2
11
+ cachetools==5.3.3
12
+ certifi==2024.2.2
13
+ cffi==1.16.0
14
+ charset-normalizer==3.3.2
15
+ click==8.1.7
16
+ colorama==0.4.6
17
+ comm==0.2.2
18
+ cryptography==42.0.8
19
+ curl_cffi==0.6.3
20
+ datasets==2.20.0
21
+ debugpy==1.8.2
22
+ decorator==5.1.1
23
+ dill==0.3.8
24
+ distro==1.9.0
25
+ duckduckgo_search==5.3.0
26
+ exceptiongroup==1.2.1
27
+ executing==2.0.1
28
+ filelock==3.15.4
29
+ frozenlist==1.4.1
30
+ fsspec==2024.5.0
31
+ gitdb==4.0.11
32
+ GitPython==3.1.43
33
+ greenlet==3.0.3
34
+ groq==0.5.0
35
+ h11==0.14.0
36
+ httpcore==1.0.5
37
+ httpx==0.27.0
38
+ huggingface-hub==0.23.4
39
+ idna==3.7
40
+ ipykernel==6.29.5
41
+ ipython==8.26.0
42
+ jedi==0.19.1
43
+ Jinja2==3.1.3
44
+ jsonschema==4.21.1
45
+ jsonschema-specifications==2023.12.1
46
+ jupyter_client==8.6.2
47
+ jupyter_core==5.7.2
48
+ markdown-it-py==3.0.0
49
+ MarkupSafe==2.1.5
50
+ matplotlib-inline==0.1.7
51
+ mdurl==0.1.2
52
+ multidict==6.0.5
53
+ multiprocess==0.70.16
54
+ nest-asyncio==1.6.0
55
+ numpy==1.26.4
56
+ ollama==0.1.8
57
+ openai==1.23.2
58
+ orjson==3.10.1
59
+ packaging==24.0
60
+ pandas==2.2.2
61
+ parso==0.8.4
62
+ pdfminer==20191125
63
+ pdfminer.six==20240706
64
+ pgvector==0.2.5
65
+ phidata==2.4.20
66
+ pillow==10.3.0
67
+ platformdirs==4.2.2
68
+ prompt_toolkit==3.0.47
69
+ protobuf==4.25.3
70
+ psutil==6.0.0
71
+ psycopg==3.1.18
72
+ psycopg-binary==3.1.18
73
+ psycopg2==2.9.9
74
+ pure-eval==0.2.2
75
+ pyarrow==16.0.0
76
+ pyarrow-hotfix==0.6
77
+ pycparser==2.22
78
+ pycryptodome==3.20.0
79
+ pydantic==2.7.0
80
+ pydantic-settings==2.2.1
81
+ pydantic_core==2.18.1
82
+ pydeck==0.8.1b0
83
+ Pygments==2.17.2
84
+ pypdf==4.2.0
85
+ python-dateutil==2.9.0.post0
86
+ python-dotenv==1.0.1
87
+ pytz==2024.1
88
+ PyYAML==6.0.1
89
+ pyzmq==26.0.3
90
+ referencing==0.34.0
91
+ requests==2.32.3
92
+ rich==13.7.1
93
+ rpds-py==0.18.0
94
+ shellingham==1.5.4
95
+ six==1.16.0
96
+ smmap==5.0.1
97
+ sniffio==1.3.1
98
+ soupsieve==2.5
99
+ SQLAlchemy==2.0.29
100
+ stack-data==0.6.3
101
+ streamlit==1.33.0
102
+ tenacity==8.2.3
103
+ toml==0.10.2
104
+ tomli==2.0.1
105
+ toolz==0.12.1
106
+ tornado==6.4
107
+ tqdm==4.66.4
108
+ traitlets==5.14.3
109
+ typer==0.12.3
110
+ typing_extensions==4.11.0
111
+ tzdata==2024.1
112
+ urllib3==1.26.18
113
+ watchdog==4.0.1
114
+ wcwidth==0.2.13
115
+ xxhash==3.4.1
116
+ yarl==1.9.4