UMAIR59 commited on
Commit
f157cdc
1 Parent(s): f401697

initial commit

Browse files
tinyllama_1_1b_llm_rag_research_chatbot (1).py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """TinyLlama 1.1B LLM RAG Research Chatbot.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1gKNj3wQw1pUbUXLJ4TcQCW16ezvL8pPo
8
+ """
9
+
10
+ !pip install pypdf
11
+ !pip install python-dotenv
12
+ !pip install -q transformers
13
+ !CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python --no-cache-dir
14
+ !pip install -q llama-index
15
+ !pip install -q transformers einops accelerate langchain bitsandbytes
16
+ !pip install sentence_transformers
17
+ !pip install llama-index-llms-huggingface
18
+ !pip install -q gradio
19
+ !pip install einops
20
+ !pip install accelerate
21
+
22
+ import logging
23
+ import sys
24
+
25
+ logging.basicConfig(stream=sys.stdout, level=logging.INFO)
26
+ logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
27
+
28
+ from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
29
+ from llama_index.llms.huggingface import HuggingFaceLLM
30
+ from llama_index.core import Settings
31
+
32
+ from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
33
+
34
+ documents = SimpleDirectoryReader("/content/Data/").load_data()
35
+
36
+ len(documents)
37
+
38
+ documents[10]
39
+
40
+ from llama_index.core import PromptTemplate
41
+
42
+ system_prompt = "You are a Q&A assistant. Your goal is to answer questions as accurately as possible based on the instructions and context provided."
43
+
44
+
45
+
46
+ # This will wrap the default prompts that are internal to llama-index
47
+ query_wrapper_prompt = PromptTemplate("<|USER|>{query_str}<|ASSISTANT|>")
48
+
49
+ from llama_index.llms.huggingface import HuggingFaceLLM
50
+
51
+ import torch
52
+ llm = HuggingFaceLLM(
53
+ context_window=2048,
54
+ max_new_tokens=256,
55
+ generate_kwargs={"temperature": 0.0, "do_sample": False},
56
+ system_prompt=system_prompt,
57
+ query_wrapper_prompt=query_wrapper_prompt,
58
+ tokenizer_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
59
+ model_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
60
+ device_map="cuda",
61
+ # uncomment this if using CUDA to reduce memory usage
62
+ model_kwargs={"torch_dtype": torch.bfloat16},
63
+ )
64
+
65
+ from langchain.embeddings import HuggingFaceEmbeddings
66
+ from llama_index.embeddings.langchain import LangchainEmbedding
67
+
68
+ lc_embed_model = HuggingFaceEmbeddings(
69
+ model_name="BAAI/bge-small-en-v1.5"
70
+ )
71
+ embed_model = LangchainEmbedding(lc_embed_model)
72
+
73
+ service_context = ServiceContext.from_defaults(
74
+ chunk_size=1024,
75
+ llm=llm,
76
+ embed_model=embed_model
77
+ )
78
+
79
+ index = VectorStoreIndex.from_documents(documents, service_context=service_context)
80
+
81
+ query_engine = index.as_query_engine()
82
+
83
+ def predict(input, history):
84
+ response = query_engine.query(input)
85
+ return str(response)
86
+
87
+ import gradio as gr
88
+ gr.ChatInterface(predict).launch(share=True)