Spaces:
Sleeping
Sleeping
| import os | |
| import string | |
| from typing import Any, Dict, List, Tuple, Union | |
| import chromadb | |
| import numpy as np | |
| import openai | |
| import pandas as pd | |
| import requests | |
| import streamlit as st | |
| from datasets import load_dataset | |
| from langchain.document_loaders import TextLoader | |
| from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings | |
| from langchain.text_splitter import CharacterTextSplitter | |
| from langchain.vectorstores import Chroma | |
| from scipy.spatial.distance import cosine | |
| openai.api_key = os.environ["OPENAI_API_KEY"] | |
| def call_chatgpt(prompt: str) -> str: | |
| """ | |
| Uses the OpenAI API to generate an AI response to a prompt. | |
| Args: | |
| prompt: A string representing the prompt to send to the OpenAI API. | |
| Returns: | |
| A string representing the AI's generated response. | |
| """ | |
| # Use the OpenAI API to generate a response based on the input prompt. | |
| response = openai.Completion.create( | |
| model="gpt-3.5-turbo-instruct", | |
| prompt=prompt, | |
| temperature=0.5, | |
| max_tokens=500, | |
| top_p=1, | |
| frequency_penalty=0, | |
| presence_penalty=0, | |
| ) | |
| # Extract the text from the first (and only) choice in the response output. | |
| ans = response.choices[0]["text"] | |
| # Return the generated AI response. | |
| return ans | |
| def openai_text_embedding(prompt: str) -> str: | |
| return openai.Embedding.create(input=prompt, model="text-embedding-ada-002")[ | |
| "data" | |
| ][0]["embedding"] | |
| def calculate_sts_openai_score(sentence1: str, sentence2: str) -> float: | |
| # Compute sentence embeddings | |
| embedding1 = openai_text_embedding(sentence1) # Flatten the embedding array | |
| embedding2 = openai_text_embedding(sentence2) # Flatten the embedding array | |
| # Convert to array | |
| embedding1 = np.asarray(embedding1) | |
| embedding2 = np.asarray(embedding2) | |
| # Calculate cosine similarity between the embeddings | |
| similarity_score = 1 - cosine(embedding1, embedding2) | |
| return similarity_score | |
| def query(payload: Dict[str, Any]) -> Dict[str, Any]: | |
| """ | |
| Sends a JSON payload to a predefined API URL and returns the JSON response. | |
| Args: | |
| payload (Dict[str, Any]): The JSON payload to be sent to the API. | |
| Returns: | |
| Dict[str, Any]: The JSON response received from the API. | |
| """ | |
| # API endpoint URL | |
| API_URL = "https://sks7h7h5qkhoxwxo.us-east-1.aws.endpoints.huggingface.cloud" | |
| # Headers to indicate both the request and response formats are JSON | |
| headers = {"Accept": "application/json", "Content-Type": "application/json"} | |
| # Sending a POST request with the JSON payload and headers | |
| response = requests.post(API_URL, headers=headers, json=payload) | |
| # Returning the JSON response | |
| return response.json() | |
| def llama2_7b_ysa(prompt: str) -> str: | |
| """ | |
| Queries a model and retrieves the generated text based on the given prompt. | |
| This function sends a prompt to a model (presumably named 'llama2_7b') and extracts | |
| the generated text from the model's response. It's tailored for handling responses | |
| from a specific API or model query structure where the response is expected to be | |
| a list of dictionaries, with at least one dictionary containing a key 'generated_text'. | |
| Parameters: | |
| - prompt (str): The text prompt to send to the model. | |
| Returns: | |
| - str: The generated text response from the model. | |
| Note: | |
| - The function assumes that the 'query' function is previously defined and accessible | |
| within the same scope or module. It should send a request to the model and return | |
| the response in a structured format. | |
| - The 'parameters' dictionary is passed empty but can be customized to include specific | |
| request parameters as needed by the model API. | |
| """ | |
| # Define the query payload with the prompt and any additional parameters | |
| query_payload: Dict[str, Any] = { | |
| "inputs": prompt, | |
| "parameters": {"max_new_tokens": 200}, | |
| } | |
| # Send the query to the model and store the output response | |
| output = query(query_payload) | |
| # Extract the 'generated_text' from the first item in the response list | |
| response: str = output[0]["generated_text"] | |
| return response | |