from fastapi import FastAPI from pydantic import BaseModel from langchain.embeddings import HuggingFaceEmbeddings #for using HugginFace models from langchain.chains.question_answering import load_qa_chain from langchain.chains.question_answering import load_qa_chain from langchain import HuggingFaceHub from langchain import PromptTemplate import os os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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" # NOTE - we configure docs_url to serve the interactive Docs at the root path # of the app. This way, we can use the docs as a landing page for the app on Spaces. app = FastAPI(docs_url="/") class ModelOutputEvaluate(BaseModel): question: str answer: str domain: str context: str class BasePromptContext: def __init__(self): self.variables_list = ["question","answer","context"] self.base_template = """Please act as an impartial judge and evaluate the quality of the provided answer which attempts to answer the provided question based on a provided context. And you'll need to submit your grading for the correctness, comprehensiveness and readability of the answer, using the following format: Reasoning for correctness: [your one line step by step reasoning about the correctness of the answer] Score for correctness: [your score number for the correctness of the answer] Below is your grading rubric: - Correctness: If the answer correctly answer the question, below are the details for different scores: - Score 0: the answer is completely incorrect, doesn’t mention anything about the question or is completely contrary to the correct answer. - For example, when asked “How to terminate a databricks cluster”, the answer is empty string, or content that’s completely irrelevant, or sorry I don’t know the answer. - Score 4: the answer provides some relevance to the question and answer one aspect of the question correctly. - Example: - Question: How to terminate a databricks cluster - Answer: Databricks cluster is a cloud-based computing environment that allows users to process big data and run distributed data processing tasks efficiently. - Or answer: In the Databricks workspace, navigate to the "Clusters" tab. And then this is a hard question that I need to think more about it - Score 7: the answer mostly answer the question but is missing or hallucinating on one critical aspect. - Example: - Question: How to terminate a databricks cluster” - Answer: “In the Databricks workspace, navigate to the "Clusters" tab. Find the cluster you want to terminate from the list of active clusters. And then you’ll find a button to terminate all clusters at once” - Score 10: the answer correctly answer the question and not missing any major aspect - Example: - Question: How to terminate a databricks cluster - Answer: In the Databricks workspace, navigate to the "Clusters" tab. Find the cluster you want to terminate from the list of active clusters. Click on the down-arrow next to the cluster name to open the cluster details. Click on the "Terminate" button. A confirmation dialog will appear. Click "Terminate" again to confirm the action.” Provided question: {question} Provided answer: {answer} Provided context: {context} Please provide your grading for the correctness""" class Evaluater: def __init__(self, item: ModelOutputEvaluate): self.question = item.question self.answer = item.answer self.domain = item.domain self.context = item.context def get_prompt_template(self): prompt = BasePromptContext() template = prompt.base_template varialbles = prompt.variables_list eval_template = PromptTemplate(input_variables=varialbles, template=template) return eval_template def evaluate(self): llm=HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":1, "max_length":1000000}) prompt = self.get_prompt_template().format(question = self.question, answer = self.answer, context = self.context) score = llm(prompt) return score # Create extractor instance @app.post("/evaluate/") async def create_evaluation_scenario(item: ModelOutputEvaluate): output = { "input": item, "score" : Evaluater(item).evaluate() } return output # def evaluate(question: str): # # question = "what is the document about?" # answer = search(question) # # print(question, answer) # return {answer}