Spaces:

datawithsuman
/

auto_test_case_gen_llm

Paused

App Files Files Community

datawithsuman commited on Jun 15, 2024

Commit

72cb286

verified ·

1 Parent(s): 9394139

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -5

app.py CHANGED Viewed

@@ -7,6 +7,10 @@ from llama_index.llms.openai import OpenAI
 from llama_index.core.llms import ChatMessage
 from llama_index.llms.anthropic import Anthropic
 from llama_index.llms.mistralai import MistralAI
 import nest_asyncio
 nest_asyncio.apply()
@@ -26,8 +30,11 @@ nest_asyncio.apply()
 # os.environ["ANTHROPIC_API_KEY"] = key
 # Mistral
 key = os.getenv('MISTRAL_API_KEY')
-os.environ["MISTRAL_API_KEY"] = key
 # Streamlit UI
@@ -80,7 +87,8 @@ if uploaded_files:
                 ChatMessage(role="user", content=prompt),
             ]
             # resp = Anthropic(model=model).chat(response)
-            resp = MistralAI(model).chat(response)
             return resp
         # Initialize session state for chat messages
@@ -110,9 +118,43 @@ if uploaded_files:
                 # model = "claude-3-haiku-20240307"
                 # model = "claude-3-sonnet-20240229"
                 # model = "claude-3-opus-20240229"
-                model = "codestral-latest"
                 # Generation
                 # resp = ollama.generate(model='codellama',
@@ -135,7 +177,7 @@ if uploaded_files:
                 # print(prompt)
-                resp = res(prompt, model = model)
                 st.session_state.messages.append({"role": "assistant", "content": f"{resp}"})
                 st.markdown(resp)
                 # st.session_state.messages.append({"role": "assistant", "content": f"{resp['response']}"})

 from llama_index.core.llms import ChatMessage
 from llama_index.llms.anthropic import Anthropic
 from llama_index.llms.mistralai import MistralAI
+from transformers import AutoTokenizer
+import torch
+from llama_index.llms.huggingface import HuggingFaceLLM
+from transformers import BitsAndBytesConfig
 import nest_asyncio
 nest_asyncio.apply()
 # os.environ["ANTHROPIC_API_KEY"] = key
 # Mistral
+# key = os.getenv('MISTRAL_API_KEY')
+# os.environ["MISTRAL_API_KEY"] = key
+# Hugging Face token
 key = os.getenv('MISTRAL_API_KEY')
 # Streamlit UI
                 ChatMessage(role="user", content=prompt),
             ]
             # resp = Anthropic(model=model).chat(response)
+            # resp = MistralAI(model).chat(response)
+            res = model.chat(response)
             return resp
         # Initialize session state for chat messages
                 # model = "claude-3-haiku-20240307"
                 # model = "claude-3-sonnet-20240229"
                 # model = "claude-3-opus-20240229"
+                # model = "codestral-latest"
+                # Llama
+                model = "codellama/CodeLlama-7b-Instruct-hf"
+                tokenizer = AutoTokenizer.from_pretrained(
+                    model,
+                    token=key,
+                )
+                stopping_ids = [
+                    tokenizer.eos_token_id,
+                    tokenizer.convert_tokens_to_ids("<|eot_id|>"),
+                ]
+                quantization_config = BitsAndBytesConfig(
+                    load_in_4bit=True,
+                    bnb_4bit_compute_dtype=torch.float16,
+                    bnb_4bit_quant_type="nf4",
+                    bnb_4bit_use_double_quant=True,
+                )
+                llm = HuggingFaceLLM(
+                    model_name=model,
+                    model_kwargs={
+                        "token": key,
+                        # "torch_dtype": torch.bfloat16,  # comment this line and uncomment below to use 4bit
+                        "quantization_config": quantization_config
+                    },
+                    generate_kwargs={
+                        "do_sample": True,
+                        "temperature": 0.6,
+                        "top_p": 0.9,
+                    },
+                    tokenizer_name=model,
+                    tokenizer_kwargs={"token": key},
+                    stopping_ids=stopping_ids,
+                )
                 # Generation
                 # resp = ollama.generate(model='codellama',
                 # print(prompt)
+                resp = res(prompt = prompt, model = llm)
                 st.session_state.messages.append({"role": "assistant", "content": f"{resp}"})
                 st.markdown(resp)
                 # st.session_state.messages.append({"role": "assistant", "content": f"{resp['response']}"})