SoumyaJ commited on
Commit
97a778d
1 Parent(s): 40e1d32

Initial commit

Browse files
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ notebooks/
2
+ venv/
logs/generation_error.log ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+
2
+ 2024-07-22 14:34:20,942 ERROR testing path
3
+ 2024-07-22 14:34:57,915 ERROR testing path
main.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from src.TinyLLama import text_generation
4
+ from src.classmodels.inputforgeneration import InputForGeneration
5
+ from src.classmodels.generatedoutput import GeneratedOutput
6
+ import uvicorn
7
+
8
+ app = FastAPI()
9
+
10
+ origins = ["*"]
11
+
12
+ app.add_middleware(
13
+ CORSMiddleware,
14
+ allow_origins=origins,
15
+ allow_credentials=True,
16
+ allow_methods=["*"],
17
+ allow_headers=["*"]
18
+ )
19
+
20
+ @app.get("/cmsai/warmuptextgenerationmodel")
21
+ def warmupGenerationModel():
22
+ warmupModelMessage = text_generation.warmupTextGenerationModel()
23
+ return warmupModelMessage
24
+
25
+ @app.post("/cmsai/generatetext")
26
+ async def generateTextUsingLLama(inputSettings:InputForGeneration) -> GeneratedOutput:
27
+ try:
28
+ output = text_generation.generateText(inputSettings)
29
+ if output is not None:
30
+ return GeneratedOutput(status_code=200, generated_text=output)
31
+ else:
32
+ return GeneratedOutput(status_code=400, message="error when generating text")
33
+ except Exception as e:
34
+ return GeneratedOutput(status_code=500, message=str(e))
35
+
36
+ if __name__ == "__main__":
37
+ uvicorn.run(app=app)
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ transformers
4
+ torch
5
+ bitsandbytes
6
+ peft
src/TinyLLama/__init__.py ADDED
File without changes
src/TinyLLama/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (158 Bytes). View file
 
src/TinyLLama/__pycache__/text_generation.cpython-310.pyc ADDED
Binary file (684 Bytes). View file
 
src/TinyLLama/model/config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 1,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 2048,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 5632,
14
+ "max_position_embeddings": 2048,
15
+ "mlp_bias": false,
16
+ "model_type": "llama",
17
+ "num_attention_heads": 32,
18
+ "num_hidden_layers": 22,
19
+ "num_key_value_heads": 4,
20
+ "pretraining_tp": 1,
21
+ "quantization_config": {
22
+ "_load_in_4bit": true,
23
+ "_load_in_8bit": false,
24
+ "bnb_4bit_compute_dtype": "bfloat16",
25
+ "bnb_4bit_quant_storage": "uint8",
26
+ "bnb_4bit_quant_type": "nf4",
27
+ "bnb_4bit_use_double_quant": false,
28
+ "llm_int8_enable_fp32_cpu_offload": false,
29
+ "llm_int8_has_fp16_weight": false,
30
+ "llm_int8_skip_modules": null,
31
+ "llm_int8_threshold": 6.0,
32
+ "load_in_4bit": true,
33
+ "load_in_8bit": false,
34
+ "quant_method": "bitsandbytes"
35
+ },
36
+ "rms_norm_eps": 1e-05,
37
+ "rope_scaling": null,
38
+ "rope_theta": 10000.0,
39
+ "tie_word_embeddings": false,
40
+ "torch_dtype": "float16",
41
+ "transformers_version": "4.42.3",
42
+ "use_cache": false,
43
+ "vocab_size": 32000
44
+ }
src/TinyLLama/model/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 1,
3
+ "eos_token_id": 2,
4
+ "max_length": 2048,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.42.3"
7
+ }
src/TinyLLama/model/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29e7e4f127f5b404eb938d84dd96724e5f519b49e23fd7ef490d98cdfd030307
3
+ size 807426286
src/TinyLLama/text_generation.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
2
+ from src.classmodels.inputforgeneration import InputForGeneration
3
+ from errorlog.errorlog import log_error
4
+ from pathlib import Path
5
+
6
+ #MODEL NAME AS PER IN HUGGING FACE :- TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T
7
+ current_folderpath = Path(__file__).resolve().parent
8
+
9
+ tokenizer = None
10
+ quantised_model = None
11
+ tokenizer_path = None
12
+ model_path = None
13
+
14
+ additional_kwargs = {
15
+ "do_sample" : True,
16
+ "early_stopping" :True,
17
+ "num_beams" : 5,
18
+ "no_repeat_ngram_size" : 5,
19
+ "truncation" : True
20
+ }
21
+
22
+ TASK_NAME = "text-generation"
23
+
24
+ def isModelAvailable():
25
+ model_path = current_folderpath / "model"
26
+ if model_path is not None and len(str(model_path).strip()) > 0:
27
+ return True
28
+ else:
29
+ return False
30
+
31
+ def isTokenizerAvailable():
32
+ tokenizer_path = current_folderpath / "tokenizer"
33
+ if tokenizer_path is not None and len(str(tokenizer_path).strip()) > 0:
34
+ return True
35
+ else:
36
+ return False
37
+
38
+ def warmupTextGenerationModel():
39
+ try:
40
+ if isModelAvailable() and isTokenizerAvailable():
41
+ tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
42
+ quantised_model = AutoModelForCausalLM.from_pretrained(model_path, use_safetensors = True)
43
+ return "text generation model is warm up"
44
+ else:
45
+ return "No model/tokenizer folder found..."
46
+ except Exception as ex:
47
+ log_error(str(ex))
48
+ return "Issue occured when warming up the text generation model. Please try again.."
49
+
50
+ def generateText(inputSettings: InputForGeneration):
51
+ try:
52
+ if tokenizer is not None and quantised_model is not None:
53
+ pipe = pipeline(task= TASK_NAME, model= quantised_model, tokenizer = tokenizer, device_map = "auto")
54
+
55
+ #formatted prompt for LLama Model
56
+ prompt = f"<s>[INST] {inputSettings.input_for_generation} [/INST]"
57
+ generated_text = pipe(prompt,temperature = inputSettings.temperature, max_length = inputSettings.max_length,
58
+ **additional_kwargs)
59
+
60
+ if generated_text is not None and generated_text[0]['generated_text'] is not None:
61
+ return generated_text[0]['generated_text'].replace("<s>","").replace("[INST]","").replace("[/INST]","")
62
+
63
+ else:
64
+ #If tokenizer or model is not captured, notify as an issue in generation
65
+ return None
66
+
67
+ except Exception as ex:
68
+ log_error(str(ex))
69
+ return ""
70
+
71
+
72
+
src/TinyLLama/tokenizer/special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "unk_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
src/TinyLLama/tokenizer/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
src/TinyLLama/tokenizer/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
src/TinyLLama/tokenizer/tokenizer_config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ }
30
+ },
31
+ "bos_token": "<s>",
32
+ "clean_up_tokenization_spaces": false,
33
+ "eos_token": "</s>",
34
+ "legacy": false,
35
+ "model_max_length": 1000000000000000019884624838656,
36
+ "pad_token": null,
37
+ "padding": true,
38
+ "padding_side": "right",
39
+ "sp_model_kwargs": {},
40
+ "tokenizer_class": "LlamaTokenizer",
41
+ "truncation": true,
42
+ "unk_token": "<unk>",
43
+ "use_default_system_prompt": false
44
+ }
src/classmodels/__init__.py ADDED
File without changes
src/classmodels/generatedoutput.py ADDED
File without changes
src/classmodels/inputforgeneration.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field
2
+
3
+ class InputForGeneration(BaseModel):
4
+ input_for_generation : str = Field(..., description="Input text for which generation should happen")
5
+ temperature : float = Field(0.8, description="Define the degree of randomness")
6
+ max_length: int = Field(250, description="set the maximum length for the output of generated text")
src/errorlog/__init__.py ADDED
File without changes
src/errorlog/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (173 Bytes). View file
 
src/errorlog/__pycache__/errorlog.cpython-310.pyc ADDED
Binary file (913 Bytes). View file
 
src/errorlog/errorlog.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import json
3
+ from logging.handlers import TimedRotatingFileHandler
4
+ from datetime import datetime
5
+ from pathlib import Path
6
+
7
+
8
+ parent_path = Path(__file__).resolve().parent.parent.parent
9
+ final_path = parent_path/'logs'/'generation_error.log'
10
+
11
+ # Configure logging with TimedRotatingFileHandler
12
+ logging.basicConfig(level=logging.ERROR,
13
+ format='%(asctime)s %(levelname)s %(message)s')
14
+
15
+ # Create a TimedRotatingFileHandler
16
+ handler = TimedRotatingFileHandler(filename=final_path, when='W0', interval=1, backupCount=0, encoding='utf-8')
17
+
18
+ # Set the log file name format (optional)
19
+ handler.suffix = "%Y-%m-%d_%H-%M-%S.log"
20
+
21
+ # Set the logging format
22
+ handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
23
+
24
+ logger = logging.getLogger().addHandler(handler)
25
+
26
+ def log_error(error_message):
27
+ logging.error(error_message)
28
+
29
+
30
+