Spaces:

vectara
/

leaderboard

Running on CPU Upgrade

App Files Files Community

Miaoran000 commited on Aug 22

Commit

d4bf693

•

1 Parent(s): 1557ad2

minor fix

Browse files

Files changed (2) hide show

requirements.txt +4 -2
src/backend/model_operations.py +24 -16

requirements.txt CHANGED Viewed

@@ -12,11 +12,13 @@ pandas==2.0.0
 python-dateutil==2.8.2
 requests==2.28.2
 tqdm==4.65.0
-transformers==4.35.2
 tokenizers>=0.15.0
 sentence-transformers==2.2.2
 google-generativeai
 replicate
 anthropic
 openai
-cohere

 python-dateutil==2.8.2
 requests==2.28.2
 tqdm==4.65.0
+transformers
 tokenizers>=0.15.0
 sentence-transformers==2.2.2
 google-generativeai
 replicate
 anthropic
 openai
+cohere
+mistralai
+peft

src/backend/model_operations.py CHANGED Viewed

@@ -9,19 +9,17 @@ import json
 import numpy as np
 import pandas as pd
 import spacy
-from sentence_transformers import CrossEncoder
 import litellm
 from tqdm import tqdm
-from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, AutoModelForTokenClassification
 import torch
 import cohere
 from openai import OpenAI
 import anthropic
 import replicate
-# pip install -U google-generativeai
 import google.generativeai as genai
-from mistralai.client import MistralClient
-from mistralai.models.chat_completion import ChatMessage
 import src.backend.util as util
@@ -330,15 +328,21 @@ class SummaryGenerator:
         elif 'mistral-large' in self.model_id.lower():
             api_key = os.environ["MISTRAL_API_KEY"]
-            client = MistralClient(api_key=api_key)
             messages = [
-                ChatMessage(role="system", content=system_prompt),
-                ChatMessage(role="user", content=user_prompt)
             ]
             # No streaming
-            chat_response = client.chat(
                 model=self.model_id,
                 messages=messages,
             )
@@ -373,6 +377,7 @@ class SummaryGenerator:
                 self.local_pipeline = pipeline(
                     "text-generation",
                     model=self.model_id,
                     model_kwargs={"torch_dtype": torch.bfloat16},
                     device_map="auto",
                     trust_remote_code=True
@@ -384,7 +389,8 @@ class SummaryGenerator:
                     self.local_model = AutoModelForCausalLM.from_pretrained(self.model_id,
                                              torch_dtype=torch.bfloat16,
                                              attn_implementation="flash_attention_2",
-                                             device_map="auto")
                 else:
                     self.local_model = AutoModelForCausalLM.from_pretrained(self.model_id, trust_remote_code=True, device_map="auto", torch_dtype="auto")
                 # print(self.local_model.device)
@@ -401,7 +407,7 @@ class SummaryGenerator:
             outputs = self.local_pipeline(
                 messages,
                 max_new_tokens=250,
-                temperature=0.0,
                 do_sample=False
             )
             result = outputs[0]["generated_text"][-1]['content']
@@ -434,15 +440,16 @@ class SummaryGenerator:
             input_ids = self.tokenizer(prompt, return_tensors="pt").to(self.device)
             with torch.no_grad():
                 outputs = self.local_model.generate(**input_ids, max_new_tokens=250, do_sample=True, temperature=0.01, pad_token_id=self.tokenizer.eos_token_id)
-                if 'glm' in self.model_id.lower():
-                    outputs = outputs[:, input_ids['input_ids'].shape[1]:]
             result = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
             if 'gemma-2' in self.model_id.lower():
                 result = result.split(user_prompt + '\nmodel')[-1].strip()
             elif 'intel' in self.model_id.lower():
                 result = result.split("### Assistant:\n")[-1]
             else:
                 # print(prompt)
                 # print('-'*50)
@@ -496,7 +503,8 @@ class EvaluationModel:
         Args:
             model_path (str): Path to the CrossEncoder model.
         """
-        self.model = AutoModelForTokenClassification.from_pretrained(model_path)
         self.device = device
         self.model.to(self.device)
         self.scores = []

 import numpy as np
 import pandas as pd
 import spacy
 import litellm
 from tqdm import tqdm
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, AutoModelForTokenClassification, AutoConfig
+from peft import PeftModel
 import torch
 import cohere
 from openai import OpenAI
 import anthropic
 import replicate
 import google.generativeai as genai
+from mistralai import Mistral
 import src.backend.util as util
         elif 'mistral-large' in self.model_id.lower():
             api_key = os.environ["MISTRAL_API_KEY"]
+            client = Mistral(api_key=api_key)
             messages = [
+                {
+                    "role":"system",
+                    "content":system_prompt
+                },
+                {
+                    "role":"user",
+                    "content":user_prompt
+                }
             ]
             # No streaming
+            chat_response = client.chat.complete(
                 model=self.model_id,
                 messages=messages,
             )
                 self.local_pipeline = pipeline(
                     "text-generation",
                     model=self.model_id,
+                    tokenizer=AutoTokenizer.from_pretrained(self.model_id),
                     model_kwargs={"torch_dtype": torch.bfloat16},
                     device_map="auto",
                     trust_remote_code=True
                     self.local_model = AutoModelForCausalLM.from_pretrained(self.model_id,
                                              torch_dtype=torch.bfloat16,
                                              attn_implementation="flash_attention_2",
+                                             device_map="auto",
+                                             use_mamba_kernels=False)
                 else:
                     self.local_model = AutoModelForCausalLM.from_pretrained(self.model_id, trust_remote_code=True, device_map="auto", torch_dtype="auto")
                 # print(self.local_model.device)
             outputs = self.local_pipeline(
                 messages,
                 max_new_tokens=250,
+                # return_full_text=False,
                 do_sample=False
             )
             result = outputs[0]["generated_text"][-1]['content']
             input_ids = self.tokenizer(prompt, return_tensors="pt").to(self.device)
             with torch.no_grad():
                 outputs = self.local_model.generate(**input_ids, max_new_tokens=250, do_sample=True, temperature=0.01, pad_token_id=self.tokenizer.eos_token_id)
+            if 'glm' in self.model_id.lower():
+                outputs = outputs[:, input_ids['input_ids'].shape[1]:]
             result = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
             if 'gemma-2' in self.model_id.lower():
                 result = result.split(user_prompt + '\nmodel')[-1].strip()
             elif 'intel' in self.model_id.lower():
                 result = result.split("### Assistant:\n")[-1]
+            elif 'jamba' in self.model_id.lower():
+                result = result.split(messages[-1]['content'])[1].strip()
             else:
                 # print(prompt)
                 # print('-'*50)
         Args:
             model_path (str): Path to the CrossEncoder model.
         """
+        config = AutoConfig.from_pretrained('google/flan-t5-large')
+        self.model = AutoModelForTokenClassification.from_pretrained(model_path, config=config)
         self.device = device
         self.model.to(self.device)
         self.scores = []