Final_Assignment_Template

Running

App Files Files Community

Sandiago21 commited on 1 day ago

Commit

b8579ec

verified ·

1 Parent(s): d66c78b

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -72

app.py CHANGED Viewed

@@ -26,6 +26,7 @@ from langchain_core.documents import Document
 from langgraph.prebuilt import ToolNode, tools_condition
 from sentence_transformers import SentenceTransformer
 from sklearn.metrics.pairwise import cosine_similarity
 # from langchain.agents import create_tool_calling_agent
 # (Keep Constants as is)
@@ -42,14 +43,13 @@ class Config(object):
         self.random_state = 42
         self.max_len = 256
         self.reasoning_max_len = 256
-        self.temperature = 0.1
         self.repetition_penalty = 1.2
         self.DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
         self.model_name = "Qwen/Qwen2.5-7B-Instruct"
-        # self.model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
         # self.reasoning_model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
         # self.reasoning_model_name = "Qwen/Qwen2.5-7B-Instruct"
-        # self.reasoning_model_name = "mistralai/Mistral-7B-Instruct-v0.2"
 config = Config()
@@ -59,14 +59,14 @@ tokenizer = AutoTokenizer.from_pretrained(config.model_name)
 model = AutoModelForCausalLM.from_pretrained(
     config.model_name,
     torch_dtype=torch.float16,
-    device_map="auto"
 )
 # reasoning_tokenizer = AutoTokenizer.from_pretrained(config.reasoning_model_name)
 # reasoning_model = AutoModelForCausalLM.from_pretrained(
 #     config.reasoning_model_name,
 #     torch_dtype=torch.float16,
-#     device_map="auto"
 # )
 def generate(prompt):
@@ -128,37 +128,6 @@ def reasoning_generate(prompt):
     generated = outputs[0][inputs["input_ids"].shape[-1]:]
     return tokenizer.decode(generated, skip_special_tokens=True).strip()
-def reasoning_generate(prompt):
-    """
-    Generate a text completion from a causal language model given a prompt.
-    Parameters
-    ----------
-    prompt : str
-        Input text prompt used to condition the language model.
-    Returns
-    -------
-    str
-        The generated continuation text, decoded into a string with special
-        tokens removed and leading/trailing whitespace stripped.
-    """
-    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
-    with torch.no_grad():
-        outputs = model.generate(
-            **inputs,
-            max_new_tokens=config.reasoning_max_len,
-            temperature=config.temperature,
-            repetition_penalty = config.repetition_penalty,
-        )
-    generated = outputs[0][inputs["input_ids"].shape[-1]:]
-    return tokenizer.decode(generated, skip_special_tokens=True).strip()
 class Action(BaseModel):
@@ -880,45 +849,52 @@ def tool_executor(state: AgentState):
         elif action.tool == "visit_webpage":
             try:
-                webpage_results = visit_webpage_wiki(result)
-                webpage_result = " \n ".join(webpage_results)
-                # for webpage_result in webpage_results:
-                query_embeddings = sentence_transformer_model.encode_query(state["messages"][-1].content).reshape(1, -1)
-                webpage_information_embeddings = sentence_transformer_model.encode_query(webpage_result).reshape(1, -1)
-                query_webpage_information_similarity_score = float(cosine_similarity(query_embeddings, webpage_information_embeddings)[0][0])
-                # logger.info(f"Webpage Information and Similarity Score: {result} - {webpage_result} - {query_webpage_information_similarity_score}")
-                if query_webpage_information_similarity_score > 0.65:
-                    webpage_information_complete += webpage_result
-                    webpage_information_complete += " \n "
-                    webpage_information_complete += " \n "
-                if query_webpage_information_similarity_score > best_query_webpage_information_similarity_score:
-                    best_query_webpage_information_similarity_score = query_webpage_information_similarity_score
-                    best_webpage_information = webpage_result
-                webpage_results = visit_webpage_main(result)
-                webpage_result = " \n ".join(webpage_results)
-                # for webpage_result in webpage_results:
-                query_embeddings = sentence_transformer_model.encode_query(state["messages"][-1].content).reshape(1, -1)
-                webpage_information_embeddings = sentence_transformer_model.encode_query(webpage_result).reshape(1, -1)
-                query_webpage_information_similarity_score = float(cosine_similarity(query_embeddings, webpage_information_embeddings)[0][0])
-                # logger.info(f"Webpage Information and Similarity Score: {result} - {webpage_result} - {query_webpage_information_similarity_score}")
-                if query_webpage_information_similarity_score > 0.65:
-                    webpage_information_complete += webpage_result
-                    webpage_information_complete += " \n "
-                    webpage_information_complete += " \n "
-                if query_webpage_information_similarity_score > best_query_webpage_information_similarity_score:
-                    best_query_webpage_information_similarity_score = query_webpage_information_similarity_score
-                    best_webpage_information = webpage_result
             except:
                 pass
         else:
@@ -985,9 +961,10 @@ class BasicAgent:
         # if question == "Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2016?" or question == "What is the first name of the only Malko Competition recipient from the 20th Century (after 1977) whose nationality on record is a country that no longer exists?":
         # if question == "The attached Excel file contains the sales of menu items for a local fast-food chain. What were the total sales that the chain made from food (not including drinks)? Express your answer in USD with two decimal places.":
         # if question == "Where were the Vietnamese specimens described by Kuznetzov in Nedoshivina's 2010 paper eventually deposited? Just give me the city name without abbreviations.":
-        if question != "aalskdalsdh" and filename == "":
-            time.sleep(60)
             state = {

 from langgraph.prebuilt import ToolNode, tools_condition
 from sentence_transformers import SentenceTransformer
 from sklearn.metrics.pairwise import cosine_similarity
+from youtube_transcript_api import YouTubeTranscriptApi
 # from langchain.agents import create_tool_calling_agent
 # (Keep Constants as is)
         self.random_state = 42
         self.max_len = 256
         self.reasoning_max_len = 256
+        self.temperature = 0.01
         self.repetition_penalty = 1.2
         self.DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
         self.model_name = "Qwen/Qwen2.5-7B-Instruct"
+        # self.reasoning_model_name = "mistralai/Mistral-7B-Instruct-v0.2"
         # self.reasoning_model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
         # self.reasoning_model_name = "Qwen/Qwen2.5-7B-Instruct"
 config = Config()
 model = AutoModelForCausalLM.from_pretrained(
     config.model_name,
     torch_dtype=torch.float16,
+    device_map=config.DEVICE
 )
 # reasoning_tokenizer = AutoTokenizer.from_pretrained(config.reasoning_model_name)
 # reasoning_model = AutoModelForCausalLM.from_pretrained(
 #     config.reasoning_model_name,
 #     torch_dtype=torch.float16,
+#     device_map=config.DEVICE
 # )
 def generate(prompt):
     generated = outputs[0][inputs["input_ids"].shape[-1]:]
     return tokenizer.decode(generated, skip_special_tokens=True).strip()
 class Action(BaseModel):
         elif action.tool == "visit_webpage":
             try:
+                if "www.youtube.com" in str(action.args["url"]):
+                    video_id = action.args["url"].split("www.youtube.com/watch?v=")[-1]
+                    api = YouTubeTranscriptApi()
+                    transcript = api.fetch(video_id)
+                    texts = [x.text for x in transcript]
+                    webpage_information_complete = " \n ".join([x.text for x in transcript])
+                else:
+                    webpage_results = visit_webpage_wiki(result)
+                    webpage_result = " \n ".join(webpage_results)
+                    # for webpage_result in webpage_results:
+                    query_embeddings = sentence_transformer_model.encode_query(state["messages"][-1].content).reshape(1, -1)
+                    webpage_information_embeddings = sentence_transformer_model.encode_query(webpage_result).reshape(1, -1)
+                    query_webpage_information_similarity_score = float(cosine_similarity(query_embeddings, webpage_information_embeddings)[0][0])
+                    # logger.info(f"Webpage Information and Similarity Score: {result} - {webpage_result} - {query_webpage_information_similarity_score}")
+                    if query_webpage_information_similarity_score > 0.65:
+                        webpage_information_complete += webpage_result
+                        webpage_information_complete += " \n "
+                        webpage_information_complete += " \n "
+                    if query_webpage_information_similarity_score > best_query_webpage_information_similarity_score:
+                        best_query_webpage_information_similarity_score = query_webpage_information_similarity_score
+                        best_webpage_information = webpage_result
+                    webpage_results = visit_webpage_main(result)
+                    webpage_result = " \n ".join(webpage_results)
+                    # for webpage_result in webpage_results:
+                    query_embeddings = sentence_transformer_model.encode_query(state["messages"][-1].content).reshape(1, -1)
+                    webpage_information_embeddings = sentence_transformer_model.encode_query(webpage_result).reshape(1, -1)
+                    query_webpage_information_similarity_score = float(cosine_similarity(query_embeddings, webpage_information_embeddings)[0][0])
+                    # logger.info(f"Webpage Information and Similarity Score: {result} - {webpage_result} - {query_webpage_information_similarity_score}")
+                    if query_webpage_information_similarity_score > 0.65:
+                        webpage_information_complete += webpage_result
+                        webpage_information_complete += " \n "
+                        webpage_information_complete += " \n "
+                    if query_webpage_information_similarity_score > best_query_webpage_information_similarity_score:
+                        best_query_webpage_information_similarity_score = query_webpage_information_similarity_score
+                        best_webpage_information = webpage_result
             except:
                 pass
         else:
         # if question == "Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2016?" or question == "What is the first name of the only Malko Competition recipient from the 20th Century (after 1977) whose nationality on record is a country that no longer exists?":
         # if question == "The attached Excel file contains the sales of menu items for a local fast-food chain. What were the total sales that the chain made from food (not including drinks)? Express your answer in USD with two decimal places.":
         # if question == "Where were the Vietnamese specimens described by Kuznetzov in Nedoshivina's 2010 paper eventually deposited? Just give me the city name without abbreviations.":
+        if question == "Examine the video at https://www.youtube.com/watch?v=1htKBjuUWec.\n\nWhat does Teal\'c say in response to the question 'Isn\'t that hot?'":
+        # if question != "aalskdalsdh" and filename == "":
+            time.sleep(120)
             state = {