File size: 4,194 Bytes
5e475e7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import gradio as gr
from llama_index import SimpleDirectoryReader, GPTListIndex, readers, GPTSimpleVectorIndex, LLMPredictor, PromptHelper, ServiceContext
from langchain.agents import Tool
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
from langchain.chat_models import ChatOpenAI
from langchain.agents import initialize_agent
from langchain import OpenAI
from langchain.prompts import PromptTemplate
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.schema import HumanMessage
from langchain.callbacks.base import BaseCallbackHandler
from threading import Thread
from queue import Queue, Empty
from threading import Thread
from collections.abc import Generator

class QueueCallback(BaseCallbackHandler):
    def __init__(self, q):
        self.q = q

    def on_llm_new_token(self, token: str, **kwargs: any) -> None:
        self.q.put(token)

    def on_llm_end(self, *args, **kwargs: any) -> None:
        return self.q.empty()

PREFIX = ''' 
        You are an Automobile expert AI scientist having all the knowledge about all the existing cars and bikes with their respective models and all the information around it.
        If the question is not related to cars, bikes, automobiles or their related models then please let the user know that you don't have the relevant information.

        Return the entire output in an HTML format.

        Make sure to follow each and every instructions before giving the response.
'''
SUFFIX = '''
        Begin!
        Previous conversation history:
        {chat_history}
        Instructions: {input}
        {agent_scratchpad}
'''

index = GPTSimpleVectorIndex.load_from_disk('./cars_bikes(2).json')
tools = [
        Tool(
            name = "LlamaIndex",
            func=lambda q: str(index.query(q)),
            description="""You are an Automobile expert equipped with all the information related to all the existing cars, bikes and all its respective brands & models, features, parameters and specifications
                          who is capable of perfectly answering everything related to every automobile brands in a tabular format or list.
                          Answer using formatted tables or lists as when required.
                          If the question is not related to cars, bikes, automobiles or their related models then please let the user know that you don't have the relevant information.
                          Please answer keeping in mind the Indian context.

                          Return the entire output in an HTML format.

                          Make sure to follow each and every instructions before giving the response.
                          """,
            return_direct=True),
    ]

num_outputs = 2000
conversational_memory = ConversationBufferWindowMemory( memory_key='chat_history', k=5, return_messages=True )
llm = OpenAI(temperature=0.5, model_name="gpt-4",max_tokens=num_outputs)

def stream(input_text) -> Generator:
    conversation = initialize_agent(tools, llm, agent="conversational-react-description", memory=conversational_memory,agent_kwargs={'prefix':PREFIX,'suffix': SUFFIX})
    
    # Create a funciton to call - this will run in a thread
    def task():
        resp = conversation.run(input_text)
        q.put(job_done)

    # Create a thread and start the function
    t = Thread(target=task)
    t.start()

    content = ""

    # Get each new token from the queue and yield for our generator
    while True:
        try:
            next_token = q.get(True, timeout=1)
            if next_token is job_done:
                break
            content += next_token
            yield next_token, content
        except Empty:
            continue

add = "Return the output in a table format or an ordered list legible to the user.\n"
def greet(Question):
    for next_token, content in stream(Question):
        yield(add+Question)

demo = gr.Interface(
    fn=greet,
    inputs=gr.Textbox(lines=2, label="Question", placeholder="What do you want to know...?"),
    outputs=gr.HTML(""),
    title="Here Auto",
    description="Know everything about Cars and Bikes",
)
demo.launch()