Spaces:

akfung
/

phoenix-byte

Sleeping

App Files Files Community

akfung commited on Jan 25, 2024

Commit

afc30f3

1 Parent(s): b0be57c

fixed chat template issues

Browse files

Files changed (3) hide show

setup.py +1 -1
src/config.py +3 -2
src/model.py +18 -7

setup.py CHANGED Viewed

@@ -1,4 +1,4 @@
 from sentence_transformers import SentenceTransformer
 embedding_model = SentenceTransformer('multi-qa-mpnet-base-dot-v1')
-embedding_model.save('/embedding_model/')

 from sentence_transformers import SentenceTransformer
 embedding_model = SentenceTransformer('multi-qa-mpnet-base-dot-v1')
+embedding_model.save('embedding_model/')

src/config.py CHANGED Viewed

@@ -48,11 +48,12 @@ headers = {
 }
 embedding_path = os.environ.get('EMBEDDING_PATH')
-streaming_url = "https://api.runpod.ai/v2/o4tke61qpopsz0/stream/"
-job_url = "https://api.runpod.ai/v2/o4tke61qpopsz0/run"
 default_payload = { "input": {
         "prompt": "Who is the president of the United States?",
         "sampling_params": {
             "max_tokens": os.environ.get('max_new_tokens', 400),
             "n": 1,

 }
 embedding_path = os.environ.get('EMBEDDING_PATH')
+streaming_url = os.environ.get('STREAMING_URL')
+job_url = os.environ.get('JOB_URL')
 default_payload = { "input": {
         "prompt": "Who is the president of the United States?",
+        "apply_chat_template": True,
         "sampling_params": {
             "max_tokens": os.environ.get('max_new_tokens', 400),
             "n": 1,

src/model.py CHANGED Viewed

@@ -15,7 +15,7 @@ class Model:
                  max_new_tokens:int=max_new_tokens):
         self.max_new_tokens = max_new_tokens
         # self.embedding_model = SentenceTransformer('multi-qa-mpnet-base-dot-v1')
-        self.embedding_model = SentenceTransformer("/embedding_model/")
     def inference(self, query:str, table:str):
@@ -35,19 +35,30 @@ class Model:
         if len(matches) > 0:
             match = '"""' + matches[0][0] + '"""'
-            context = "Use the following historical opinion delimited by tripple quotes to give your ruling on a court case description. " + match + " Description: "
         else:
-            context = 'Give your ruling on a court case description. Description:'
-        return context + query + " Answer in less than 400 words and without a self introduction."
     def query_model(self, query:str, table:str, default_payload:dict=default_payload, timeout:int=60, **kwargs) -> str:
         """Query the model api on runpod. Runs for 60s by default. Generator response until job is complete"""
-        augmented_prompt = self.get_context(query=query, table=table)
         for k,v in kwargs:
             default_payload['input']['sampling_params'][k] = v
-        default_payload["input"]["prompt"] = augmented_prompt
         job_id = requests.post(job_url, json=default_payload, headers=headers).json()['id']
         for i in range(timeout):
             time.sleep(1)
@@ -77,4 +88,4 @@ class Model:
     #     for object_name in model_file_paths:
     #         blob = bucket.blob(object_name)
     #         blob.download_to_filename(object_name)

                  max_new_tokens:int=max_new_tokens):
         self.max_new_tokens = max_new_tokens
         # self.embedding_model = SentenceTransformer('multi-qa-mpnet-base-dot-v1')
+        self.embedding_model = SentenceTransformer("embedding_model/")
     def inference(self, query:str, table:str):
         if len(matches) > 0:
             match = '"""' + matches[0][0] + '"""'
+            context = "You are the United States Supreme Court. Use the following historical opinion delimited by triple quotes to give your ruling on a court case description. Historical opinion: " + match
         else:
+            context = 'You are the United States Supreme Court. Give your ruling on a court case description.'
+        return context + " Answer in less than 400 words. Do not introduce yourself"
     def query_model(self, query:str, table:str, default_payload:dict=default_payload, timeout:int=60, **kwargs) -> str:
         """Query the model api on runpod. Runs for 60s by default. Generator response until job is complete"""
+        context = self.get_context(query=query, table=table)
         for k,v in kwargs:
             default_payload['input']['sampling_params'][k] = v
+        augmented_prompt_template = [
+            {
+                "role": "system",
+                "content": context,
+            },
+            {
+                "role": "user",
+                "content": query,
+            }
+        ]
+        print(augmented_prompt_template)
+        default_payload["input"]["prompt"] = augmented_prompt_template
         job_id = requests.post(job_url, json=default_payload, headers=headers).json()['id']
         for i in range(timeout):
             time.sleep(1)
     #     for object_name in model_file_paths:
     #         blob = bucket.blob(object_name)
     #         blob.download_to_filename(object_name)