|
import gradio as gr |
|
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer |
|
import torch |
|
import theme |
|
|
|
theme = theme.Theme() |
|
|
|
import os |
|
import sys |
|
sys.path.append('../..') |
|
|
|
|
|
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter |
|
from langchain.embeddings import HuggingFaceEmbeddings |
|
from langchain.prompts import PromptTemplate |
|
from langchain.chains import RetrievalQA |
|
from langchain.prompts import ChatPromptTemplate |
|
from langchain.schema import StrOutputParser |
|
from langchain.schema.runnable import Runnable |
|
from langchain.schema.runnable.config import RunnableConfig |
|
from langchain.chains import ( |
|
LLMChain, ConversationalRetrievalChain) |
|
from langchain.vectorstores import Chroma |
|
from langchain.memory import ConversationBufferMemory |
|
from langchain.chains import LLMChain |
|
from langchain.prompts.prompt import PromptTemplate |
|
from langchain.prompts.chat import ChatPromptTemplate, SystemMessagePromptTemplate |
|
from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate, MessagesPlaceholder |
|
from langchain.document_loaders import PyPDFDirectoryLoader |
|
from pydantic import BaseModel, Field |
|
from langchain.output_parsers import PydanticOutputParser |
|
from langchain_community.llms import HuggingFaceHub |
|
from langchain_community.document_loaders import WebBaseLoader |
|
|
|
from pydantic import BaseModel |
|
import shutil |
|
|
|
|
|
|
|
custom_title = "<span style='color: rgb(243, 239, 224);'>Green Greta</span>" |
|
|
|
|
|
|
|
image_pipeline = pipeline(task="image-classification", model="guillen/vit-basura-test1") |
|
|
|
def predict_image(input_img): |
|
predictions = image_pipeline(input_img) |
|
return {p["label"]: p["score"] for p in predictions} |
|
|
|
image_gradio_app = gr.Interface( |
|
fn=predict_image, |
|
inputs=gr.Image(label="Image", sources=['upload', 'webcam'], type="pil"), |
|
outputs=[gr.Label(label="Result")], |
|
title=custom_title, |
|
theme=theme |
|
) |
|
|
|
loader = WebBaseLoader(["https://www.epa.gov/recycle/frequent-questions-recycling", "https://www.whitehorsedc.gov.uk/vale-of-white-horse-district-council/recycling-rubbish-and-waste/lets-get-real-about-recycling/", "https://www.teimas.com/blog/13-preguntas-y-respuestas-sobre-la-ley-de-residuos-07-2022", "https://www.molok.com/es/blog/gestion-de-residuos-solidos-urbanos-rsu-10-dudas-comunes"]) |
|
data=loader.load() |
|
|
|
text_splitter = RecursiveCharacterTextSplitter( |
|
chunk_size=1024, |
|
chunk_overlap=150, |
|
length_function=len |
|
) |
|
docs = text_splitter.split_documents(data) |
|
|
|
embeddings = HuggingFaceEmbeddings(model_name='thenlper/gte-small') |
|
|
|
persist_directory = 'docs/chroma/' |
|
|
|
|
|
shutil.rmtree(persist_directory, ignore_errors=True) |
|
vectordb = Chroma.from_documents( |
|
documents=docs, |
|
embedding=embeddings, |
|
persist_directory=persist_directory |
|
) |
|
|
|
retriever = vectordb.as_retriever(search_kwargs={"k": 2}, search_type="mmr") |
|
|
|
class FinalAnswer(BaseModel): |
|
question: str = Field(description="the original question") |
|
answer: str = Field(description="the extracted answer") |
|
|
|
|
|
parser = PydanticOutputParser(pydantic_object=FinalAnswer) |
|
|
|
template = """ |
|
Your name is Greta and you are a recycling chatbot with the objective to anwer questions from user in English or Spanish / |
|
Use the following pieces of context to answer the question / |
|
If the question is English answer in English / |
|
If the question is Spanish answer in Spanish / |
|
Do not mention the word context when you answer a question / |
|
Answer the question fully and provide as much relevant detail as possible. Do not cut your response short / |
|
Context: {context} |
|
User: {question} |
|
{format_instructions} |
|
""" |
|
|
|
|
|
sys_prompt = SystemMessagePromptTemplate.from_template(template) |
|
qa_prompt = ChatPromptTemplate( |
|
messages=[ |
|
sys_prompt, |
|
HumanMessagePromptTemplate.from_template("{question}")], |
|
partial_variables={"format_instructions": parser.get_format_instructions()} |
|
) |
|
llm = HuggingFaceHub( |
|
repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1", |
|
task="text-generation", |
|
model_kwargs={ |
|
"max_new_tokens": 2000, |
|
"top_k": 30, |
|
"temperature": 0.1, |
|
"repetition_penalty": 1.03 |
|
}, |
|
) |
|
|
|
qa_chain = ConversationalRetrievalChain.from_llm( |
|
llm = llm, |
|
memory = ConversationBufferMemory(llm=llm, memory_key="chat_history", input_key='question', output_key='output'), |
|
retriever = retriever, |
|
verbose = True, |
|
combine_docs_chain_kwargs={'prompt': qa_prompt}, |
|
get_chat_history = lambda h : h, |
|
rephrase_question = False, |
|
output_key = 'output', |
|
) |
|
|
|
def chat_interface(question,history): |
|
result = qa_chain.invoke({'question': question}) |
|
output_string = result['output'] |
|
|
|
|
|
answer_index = output_string.rfind('"answer":') |
|
|
|
|
|
answer_part = output_string[answer_index + len('"answer":'):].strip() |
|
|
|
|
|
quote_index = answer_part.find('"') |
|
|
|
|
|
answer_value = answer_part[quote_index + 1:answer_part.find('"', quote_index + 1)] |
|
|
|
return answer_value |
|
|
|
|
|
chatbot_gradio_app = gr.ChatInterface( |
|
fn=chat_interface, |
|
title=custom_title |
|
) |
|
|
|
|
|
app = gr.TabbedInterface( |
|
[image_gradio_app, chatbot_gradio_app], |
|
tab_names=["Green Greta Image Classification","Green Greta Chat"], |
|
theme=theme |
|
) |
|
|
|
app.queue() |
|
app.launch() |