Zwea Htet commited on
Commit
ef2a3f4
·
1 Parent(s): 1230ae3

fixed customllm

Browse files
Files changed (2) hide show
  1. models/bloom.py +15 -2
  2. utils/customLLM.py +4 -14
models/bloom.py CHANGED
@@ -7,7 +7,7 @@ from dotenv import load_dotenv
7
  from llama_index import (Document, GPTVectorStoreIndex, LLMPredictor,
8
  PromptHelper, ServiceContext, StorageContext,
9
  load_index_from_storage)
10
- from transformers import AutoModelForCausalLM, AutoTokenizer
11
 
12
  from utils.customLLM import CustomLLM
13
 
@@ -27,8 +27,21 @@ num_output = 525
27
  chunk_overlap_ratio = 0.2
28
  prompt_helper = PromptHelper(context_window, num_output, chunk_overlap_ratio)
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  # define llm
31
- llm_predictor = LLMPredictor(llm=CustomLLM(model, tokenizer))
32
  service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)
33
 
34
  def prepare_data(file_path:str):
 
7
  from llama_index import (Document, GPTVectorStoreIndex, LLMPredictor,
8
  PromptHelper, ServiceContext, StorageContext,
9
  load_index_from_storage)
10
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
11
 
12
  from utils.customLLM import CustomLLM
13
 
 
27
  chunk_overlap_ratio = 0.2
28
  prompt_helper = PromptHelper(context_window, num_output, chunk_overlap_ratio)
29
 
30
+ # create a pipeline
31
+ pl = pipeline(
32
+ model=model,
33
+ tokenizer=tokenizer,
34
+ task="text-generation",
35
+ # device=0, # GPU device number
36
+ # max_length=512,
37
+ do_sample=True,
38
+ top_p=0.95,
39
+ top_k=50,
40
+ temperature=0.7
41
+ )
42
+
43
  # define llm
44
+ llm_predictor = LLMPredictor(llm=CustomLLM(model_pipeline=pl))
45
  service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)
46
 
47
  def prepare_data(file_path:str):
utils/customLLM.py CHANGED
@@ -1,24 +1,14 @@
1
  from typing import Any, List, Mapping, Optional
2
 
3
  from langchain.llms.base import LLM
4
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
5
 
6
 
7
  class CustomLLM(LLM):
8
-
9
  # Create the pipeline for question answering
10
- def __init__(self, model: AutoModelForCausalLM, tokenizer: AutoTokenizer):
11
- self.pipeline = pipeline(
12
- model=model,
13
- tokenizer=tokenizer,
14
- task="text-generation",
15
- # device=0, # GPU device number
16
- # max_length=512,
17
- do_sample=True,
18
- top_p=0.95,
19
- top_k=50,
20
- temperature=0.7
21
- )
22
 
23
  def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
24
  prompt_length = len(prompt)
 
1
  from typing import Any, List, Mapping, Optional
2
 
3
  from langchain.llms.base import LLM
4
+ from transformers import Pipeline
5
 
6
 
7
  class CustomLLM(LLM):
8
+ pipeline = None
9
  # Create the pipeline for question answering
10
+ def __init__(self, model_pipeline: Pipeline):
11
+ self.pipeline = model_pipeline
 
 
 
 
 
 
 
 
 
 
12
 
13
  def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
14
  prompt_length = len(prompt)