Spaces:

bachephysicdun
/

Backend

Sleeping

App Files Files Community

bachephysicdun commited on Oct 6, 2024

Commit

5a2b2d3

1 Parent(s): 20f2960

add simple stream and incomplete files

Browse files

Files changed (33) hide show

Dockerfile +3 -1
__init__.py +0 -0
__pycache__/app.cpython-311.pyc +0 -0
app/.ipynb_checkpoints/Untitled-checkpoint.ipynb +6 -0
app/.ipynb_checkpoints/Untitled1-checkpoint.ipynb +6 -0
app/.ipynb_checkpoints/Untitled2-checkpoint.ipynb +6 -0
app/__init__.py +0 -0
app/__pycache__/__init__.cpython-311.pyc +0 -0
app/__pycache__/callbacks.cpython-311.pyc +0 -0
app/__pycache__/chains.cpython-311.pyc +0 -0
app/__pycache__/crud.cpython-311.pyc +0 -0
app/__pycache__/data_indexing.cpython-311.pyc +0 -0
app/__pycache__/data_indexing.cpython-312.pyc +0 -0
app/__pycache__/database.cpython-311.pyc +0 -0
app/__pycache__/database.cpython-312.pyc +0 -0
app/__pycache__/main.cpython-311.pyc +0 -0
app/__pycache__/models.cpython-311.pyc +0 -0
app/__pycache__/models.cpython-312.pyc +0 -0
app/__pycache__/prompts.cpython-311.pyc +0 -0
app/__pycache__/prompts.cpython-312.pyc +0 -0
app/__pycache__/schemas.cpython-311.pyc +0 -0
app/__pycache__/schemas.cpython-312.pyc +0 -0
app/callbacks.py +24 -0
app/chains.py +55 -0
app/crud.py +23 -0
app/data_indexing.py +150 -0
app/database.py +55 -0
app/main.py +94 -0
app/models.py +55 -0
app/prompts.py +58 -0
app/schemas.py +18 -0
app/test.db +0 -0
check.ipynb +229 -0

Dockerfile CHANGED Viewed

@@ -16,4 +16,6 @@ RUN pip install --no-cache-dir --upgrade -r requirements.txt
 # Again, ensure the copied files are owned by 'user'
 COPY --chown=user . /app
 # Specify the command to run when the container starts
-CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

 # Again, ensure the copied files are owned by 'user'
 COPY --chown=user . /app
 # Specify the command to run when the container starts
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
+# Pass the secret variable to the application
+RUN --mount=type=secret,id=HF_TOKEN,mode=0444,required=true

__init__.py ADDED Viewed

File without changes

__pycache__/app.cpython-311.pyc ADDED Viewed

Binary file (493 Bytes). View file

app/.ipynb_checkpoints/Untitled-checkpoint.ipynb ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+ "cells": [],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

app/.ipynb_checkpoints/Untitled1-checkpoint.ipynb ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+ "cells": [],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

app/.ipynb_checkpoints/Untitled2-checkpoint.ipynb ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+ "cells": [],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

app/__init__.py ADDED Viewed

File without changes

app/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (183 Bytes). View file

app/__pycache__/callbacks.cpython-311.pyc ADDED Viewed

Binary file (1.78 kB). View file

app/__pycache__/chains.cpython-311.pyc ADDED Viewed

Binary file (1.12 kB). View file

app/__pycache__/crud.cpython-311.pyc ADDED Viewed

Binary file (1.44 kB). View file

app/__pycache__/data_indexing.cpython-311.pyc ADDED Viewed

Binary file (7.12 kB). View file

app/__pycache__/data_indexing.cpython-312.pyc ADDED Viewed

Binary file (5.75 kB). View file

app/__pycache__/database.cpython-311.pyc ADDED Viewed

Binary file (705 Bytes). View file

app/__pycache__/database.cpython-312.pyc ADDED Viewed

Binary file (610 Bytes). View file

app/__pycache__/main.cpython-311.pyc ADDED Viewed

Binary file (2.7 kB). View file

app/__pycache__/models.cpython-311.pyc ADDED Viewed

Binary file (1.83 kB). View file

app/__pycache__/models.cpython-312.pyc ADDED Viewed

Binary file (1.51 kB). View file

app/__pycache__/prompts.cpython-311.pyc ADDED Viewed

Binary file (1.76 kB). View file

app/__pycache__/prompts.cpython-312.pyc ADDED Viewed

Binary file (1.59 kB). View file

app/__pycache__/schemas.cpython-311.pyc ADDED Viewed

Binary file (1.12 kB). View file

app/__pycache__/schemas.cpython-312.pyc ADDED Viewed

Binary file (908 Bytes). View file

app/callbacks.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from typing import Dict, Any, List
+from langchain_core.callbacks import BaseCallbackHandler
+import schemas
+import crud
+class LogResponseCallback(BaseCallbackHandler):
+    def __init__(self, user_request: schemas.UserRequest, db):
+        super().__init__()
+        self.user_request = user_request
+        self.db = db
+    def on_llm_end(self, outputs: Dict[str, Any], **kwargs: Any) -> Any:
+        """Run when llm ends running."""
+        # TODO: The function on_llm_end is going to be called when the LLM stops sending
+        # the response. Use the crud.add_message function to capture that response.
+        raise NotImplemented
+    def on_llm_start(
+        self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
+    ) -> Any:
+        for prompt in prompts:
+            print(prompt)

app/chains.py ADDED Viewed

	@@ -0,0 +1,55 @@

+#%%
+import os
+from dotenv import load_dotenv
+load_dotenv('../../.env')
+from langchain_huggingface import HuggingFaceEndpoint
+from langchain_core.runnables import RunnablePassthrough
+import schemas
+from prompts import (
+    raw_prompt,
+    format_context,
+    tokenizer
+)
+from data_indexing import DataIndexer
+# data_indexer = DataIndexer()
+llm = HuggingFaceEndpoint(
+    repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
+    huggingfacehub_api_token=os.environ['HF_TOKEN'],
+    max_new_tokens=512,
+    stop_sequences=[tokenizer.eos_token],
+    streaming=True,
+)
+simple_chain = (raw_prompt | llm).with_types(input_type=schemas.UserQuestion)
+# %%
+# data_indexer = DataIndexer()
+# # TODO: create formatted_chain by piping raw_prompt_formatted and the LLM endpoint.
+# formatted_chain = None
+# # TODO: use history_prompt_formatted and HistoryInput to create the history_chain
+# history_chain = None
+# # TODO: Let's construct the standalone_chain by piping standalone_prompt_formatted with the LLM
+# standalone_chain = None
+# input_1 = RunnablePassthrough.assign(new_question=standalone_chain)
+# input_2 = {
+#     'context': lambda x: format_context(data_indexer.search(x['new_question'])),
+#     'standalone_question': lambda x: x['new_question']
+# }
+# input_to_rag_chain = input_1 | input_2
+# # TODO: use input_to_rag_chain, rag_prompt_formatted,
+# # HistoryInput and the LLM to build the rag_chain.
+# rag_chain = None
+# # TODO:  Implement the filtered_rag_chain. It should be the
+# # same as the rag_chain but with hybrid_search = True.
+# filtered_rag_chain = None

app/crud.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from sqlalchemy.orm import Session
+import models, schemas
+def get_or_create_user(db: Session, username: str):
+    user = db.query(models.User).filter(models.User.username == username).first()
+    if not user:
+        user = models.User(username=username)
+        db.add(user)
+        db.commit()
+        db.refresh(user)
+    return user
+def add_message(db: Session, message: schemas.MessageBase, username: str):
+    # TODO:  Implement the add_message function. It should:
+    # - get or create the user with the username
+    # - create a models.Message instance
+    # - pass the retrieved user to the message instance
+    # - save the message instance to the database
+    raise NotImplemented
+def get_user_chat_history(db: Session, username: str):
+    raise NotImplemented

app/data_indexing.py ADDED Viewed

	@@ -0,0 +1,150 @@

+import os
+import uuid
+from pathlib import Path
+from pinecone.grpc import PineconeGRPC as Pinecone
+from pinecone import ServerlessSpec
+from langchain_community.vectorstores import Chroma
+from langchain_openai import OpenAIEmbeddings
+current_dir = Path(__file__).resolve().parent
+class DataIndexer:
+    source_file =  os.path.join(current_dir, 'sources.txt')
+    def __init__(self, index_name='langchain-repo') -> None:
+        # TODO: choose your embedding model
+        # self.embedding_client = InferenceClient(
+        #     "dunzhang/stella_en_1.5B_v5",
+        #      token=os.environ['HF_TOKEN'],
+        # )
+        self.embedding_client = OpenAIEmbeddings()
+        self.index_name = index_name
+        self.pinecone_client = Pinecone(api_key=os.environ.get('PINECONE_API_KEY'))
+        if index_name not in self.pinecone_client.list_indexes().names():
+            # TODO: create your index if it doesn't exist. Use the create_index function.
+            # Make sure to choose the dimension that corresponds to your embedding model
+            pass
+        self.index = self.pinecone_client.Index(self.index_name)
+        # TODO: make sure to build the index.
+        self.source_index = None
+    def get_source_index(self):
+        if not os.path.isfile(self.source_file):
+            print('No source file')
+            return None
+        print('create source index')
+        with open(self.source_file, 'r') as file:
+            sources = file.readlines()
+        sources = [s.rstrip('\n') for s in sources]
+        vectorstore = Chroma.from_texts(
+            sources, embedding=self.embedding_client
+        )
+        return vectorstore
+    def index_data(self, docs, batch_size=32):
+        with open(self.source_file, 'a') as file:
+            for doc in docs:
+                file.writelines(doc.metadata['source'] + '\n')
+        for i in range(0, len(docs), batch_size):
+            batch = docs[i: i + batch_size]
+            # TODO: create a list of the vector representations of each text data in the batch
+            # TODO: choose your embedding model
+            # values = self.embedding_client.embed_documents([
+            #     doc.page_content for doc in batch
+            # ])
+            # values = self.embedding_client.feature_extraction([
+            #     doc.page_content for doc in batch
+            # ])
+            values = None
+            # TODO: create a list of unique identifiers for each element in the batch with the uuid package.
+            vector_ids = None
+            # TODO: create a list of dictionaries representing the metadata. Capture the text data
+            # with the "text" key, and make sure to capture the rest of the doc.metadata.
+            metadatas = None
+            # create a list of dictionaries with keys "id" (the unique identifiers), "values"
+            # (the vector representation), and "metadata" (the metadata).
+            vectors = [{
+                'id': vector_id,
+                'values': value,
+                'metadata': metadata
+            } for vector_id, value, metadata in zip(vector_ids, values, metadatas)]
+            try:
+                # TODO: Use the function upsert to upload the data to the database.
+                upsert_response = None
+                print(upsert_response)
+            except Exception as e:
+                print(e)
+    def search(self, text_query, top_k=5, hybrid_search=False):
+        filter = None
+        if hybrid_search and self.source_index:
+            # I implemented the filtering process to pull the 50 most relevant file names
+            # to the question. Make sure to adjust this number as you see fit.
+            source_docs = self.source_index.similarity_search(text_query, 50)
+            filter = {"source": {"$in":[doc.page_content for doc in source_docs]}}
+        # TODO: embed the text_query by using the embedding model
+        # TODO: choose your embedding model
+        # vector = self.embedding_client.feature_extraction(text_query)
+        # vector = self.embedding_client.embed_query(text_query)
+        vector = None
+        # TODO: use the vector representation of the text_query to
+        # search the database by using the query function.
+        result = None
+        docs = []
+        for res in result["matches"]:
+            # TODO: From the result's metadata, extract the "text" element.
+            pass
+        return docs
+if __name__ == '__main__':
+    from langchain_community.document_loaders import GitLoader
+    from langchain_text_splitters import (
+        Language,
+        RecursiveCharacterTextSplitter,
+    )
+    loader = GitLoader(
+        clone_url="https://github.com/langchain-ai/langchain",
+        repo_path="./code_data/langchain_repo/",
+        branch="master",
+    )
+    python_splitter = RecursiveCharacterTextSplitter.from_language(
+        language=Language.PYTHON, chunk_size=10000, chunk_overlap=100
+    )
+    docs = loader.load()
+    docs = [doc for doc in docs if doc.metadata['file_type'] in ['.py', '.md']]
+    docs = [doc for doc in docs if len(doc.page_content) < 50000]
+    docs = python_splitter.split_documents(docs)
+    for doc in docs:
+        doc.page_content = '# {}\n\n'.format(doc.metadata['source']) + doc.page_content
+    indexer = DataIndexer()
+    with open('/app/sources.txt', 'a') as file:
+        for doc in docs:
+            file.writelines(doc.metadata['source'] + '\n')
+    indexer.index_data(docs)

app/database.py ADDED Viewed

	@@ -0,0 +1,55 @@

+#%%
+# create_engine: This function creates a new SQLAlchemy “engine,”
+# which is an interface to the database. It acts as the core connection to
+# your database and manages the communication between your Python code and the database.
+from sqlalchemy import create_engine
+# declarative_base: This function is used to create a base class for our ORM models.
+# All of your database table classes (models) will inherit from this base class.
+# This base class also ties each model to a corresponding table in the database.
+from sqlalchemy.ext.declarative import declarative_base
+# sessionmaker: This is a factory function for creating new Session objects.
+# Sessions are used to manage the operations (queries, updates, etc.) on
+# the database in a transaction-safe way. They provide an interface for
+# interacting with the database.
+from sqlalchemy.orm import sessionmaker
+# This line defines the URL for your database connection.
+# SQLAlchemy uses this URL to determine what type of database you’re
+# connecting to, and where it’s located.
+# sqlite:// tells SQLAlchemy that you are using SQLite as the database engine.
+# ./test.db specifies the relative path to the database file (test.db) in the
+# current directory (./). SQLite stores the entire database as a single file on disk
+SQLALCHEMY_DATABASE_URL = "sqlite:///./test.db"
+# This line creates the database engine by passing the SQLALCHEMY_DATABASE_URL to the
+# create_engine function. The engine is responsible for managing the connection to the database.
+# connect_args={"check_same_thread": False}: This argument is specific to SQLite. By default,
+# SQLite does not allow multiple threads to interact with the database. The check_same_thread
+# argument disables this check, allowing the engine to be used in a multi-threaded environment.
+# This is necessary for many web applications (like FastAPI) that might have multiple requests
+# hitting the database simultaneously.
+engine = create_engine(
+    SQLALCHEMY_DATABASE_URL, connect_args={"check_same_thread": False}
+)
+# you create a factory for database sessions. SessionLocal will be used to create individual sessions,
+# which are needed to interact with the database (querying data, inserting/updating records, etc.).
+# 1. autocommit=False: This means that changes (inserts, updates, deletes) to the database will not be
+# committed automatically. You will need to explicitly commit transactions using session.commit().
+# This gives you better control over when data is saved.
+# 2. autoflush=False: This disables automatic flushing. Flushing is the process of sending any pending
+# changes to the database before executing queries. With autoflush=False, the session will not
+# automatically send updates to the database unless you explicitly tell it to by calling flush() or commit().
+# It prevents unexpected database updates.
+# 3. bind=engine: This ties the session to the database engine. Any session created with SessionLocal()
+# will use the engine to communicate with the database.
+SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
+# This line creates a base class for all of your ORM models (i.e., classes that represent database tables).
+# Each model (class) will inherit from Base, and SQLAlchemy will use this base class to generate the
+# necessary SQL statements to create tables and handle CRUD operations (Create, Read, Update, Delete).
+Base = declarative_base()
+# %%

app/main.py ADDED Viewed

	@@ -0,0 +1,94 @@

+from langchain_core.runnables import Runnable
+from langchain_core.callbacks import BaseCallbackHandler
+from fastapi import FastAPI, Request, Depends
+from sse_starlette.sse import EventSourceResponse
+from langserve.serialization import WellKnownLCSerializer
+from typing import List
+from sqlalchemy.orm import Session
+import schemas
+from chains import simple_chain
+import crud, models, schemas
+from database import SessionLocal, engine
+from callbacks import LogResponseCallback
+models.Base.metadata.create_all(bind=engine)
+app = FastAPI()
+# def get_db():
+#     db = SessionLocal()
+#     try:
+#         yield db
+#     finally:
+#         db.close()
+async def generate_stream(input_data: schemas.BaseModel, runnable: Runnable, callbacks: List[BaseCallbackHandler]=[]):
+    for output in runnable.stream(input_data.dict(), config={"callbacks": callbacks}):
+        data = WellKnownLCSerializer().dumps(output).decode("utf-8")
+        yield {'data': data, "event": "data"}
+    yield {"event": "end"}
+@app.post("/simple/stream")
+async def simple_stream(request: Request):
+    data = await request.json()
+    user_question = schemas.UserQuestion(**data['input'])
+    return EventSourceResponse(generate_stream(user_question, simple_chain))
+# @app.post("/formatted/stream")
+# async def formatted_stream(request: Request):
+#     # TODO: use the formatted_chain to implement the "/formatted/stream" endpoint.
+#     raise NotImplemented
+# def get_db():
+#     db = SessionLocal()
+#     try:
+#         yield db
+#     finally:
+#         db.close()
+# @app.post("/history/stream")
+# async def history_stream(request: Request, db: Session = Depends(get_db)):
+#     # TODO: Let's implement the "/history/stream" endpoint. The endpoint should follow those steps:
+#     # - The endpoint receives the request
+#     # - The request is parsed into a user request
+#     # - The user request is used to pull the chat history of the user
+#     # - We add as part of the user history the current question by using add_message.
+#     # - We create an instance of HistoryInput by using format_chat_history.
+#     # - We use the history input within the history chain.
+#     raise NotImplemented
+# @app.post("/rag/stream")
+# async def rag_stream(request: Request, db: Session = Depends(get_db)):
+#     # TODO: Let's implement the "/rag/stream" endpoint. The endpoint should follow those steps:
+#     # - The endpoint receives the request
+#     # - The request is parsed into a user request
+#     # - The user request is used to pull the chat history of the user
+#     # - We add as part of the user history the current question by using add_message.
+#     # - We create an instance of HistoryInput by using format_chat_history.
+#     # - We use the history input within the rag chain.
+#     raise NotImplemented
+# @app.post("/filtered_rag/stream")
+# async def filtered_rag_stream(request: Request, db: Session = Depends(get_db)):
+#     # TODO: Let's implement the "/filtered_rag/stream" endpoint. The endpoint should follow those steps:
+#     # - The endpoint receives the request
+#     # - The request is parsed into a user request
+#     # - The user request is used to pull the chat history of the user
+#     # - We add as part of the user history the current question by using add_message.
+#     # - We create an instance of HistoryInput by using format_chat_history.
+#     # - We use the history input within the filtered rag chain.
+#     raise NotImplemented
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run("main:app", host="localhost", reload=True,  port=8000)

app/models.py ADDED Viewed

	@@ -0,0 +1,55 @@

+from sqlalchemy import Column, ForeignKey, Integer, String, DateTime
+from sqlalchemy.orm import relationship
+from datetime import datetime
+from database import Base
+class User(Base):
+    # 	This defines the name of the table in the database.
+    # Here, the class User is mapped to a table called users.
+    __tablename__ = "users"
+    __table_args__ = {'extend_existing': True}
+    # This line defines a column called id in the users table.
+    # Integer: The data type of this column is an integer.
+    # primary_key=True: This makes the id column the primary key
+    # for the users table, meaning each row will have a unique id.
+    # index=True: This creates an index on the id column, making
+    # lookups by id faster.
+    id = Column(Integer, primary_key=True, index=True)
+    # This line defines a column called username.
+    username = Column(String, unique=True, index=True)
+    # This establishes a relationship between the User model
+    # and a related model called Message.
+    # relationship("Message"): This creates a one-to-many relationship between User and Message.
+    # It indicates that each user can have many associated messages
+    # (the relationship is “one user to many messages”).
+    # back_populates="user": This specifies that the relationship is bidirectional,
+    # meaning the Message model will also have a corresponding relationship with User.
+    # The back_populates="user" part tells SQLAlchemy to link the relationship on the
+    # Message side back to the user field, creating a mutual relationship.
+    messages = relationship("Message", back_populates="user")
+# TODO: Implement the Message SQLAlchemy model. Message should have a primary key,
+# a message attribute to store the content of messages, a type, AI or Human,
+# depending on if it is a user question or an AI response, a timestamp to
+# order by time and a user attribute to get the user instance associated
+# with the message. We also need a user_id that will use the User.id
+# attribute as a foreign key.
+class Message(Base):
+    __tablename__ = "messages"
+    __table_args__ = {'extend_existing': True}
+    id = id = Column(Integer, primary_key=True, index=True)
+    message = Column(String, nullable=False)
+    type = Column(String(50), nullable=False)
+    timestamp = Column(DateTime, default=datetime.utcnow, nullable=False)
+    user_id = Column(Integer, ForeignKey("users.id"), nullable=False)
+    user = relationship("User", back_populates="messages")

app/prompts.py ADDED Viewed

	@@ -0,0 +1,58 @@

+#%%
+import os
+# from dotenv import load_dotenv
+# load_dotenv('../../.env')
+from langchain_core.prompts import PromptTemplate
+from typing import List
+from transformers import AutoTokenizer
+from huggingface_hub import login
+import models
+login(os.environ['HF_TOKEN'])
+tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
+def format_prompt(prompt) -> PromptTemplate:
+    # TODO: format the input prompt by using the model specific instruction template
+    # TODO: return a langchain PromptTemplate
+    raise NotImplemented
+def format_chat_history(messages: List[models.Message]):
+    # TODO:  implement format_chat_history to format
+    # the list of Message into a text of chat history.
+    raise NotImplemented
+def format_context(docs: List[str]):
+    # TODO:  the output of the DataIndexer.search is a list of text,
+    # so we need to concatenate that list into a text that can fit into
+    # the rag_prompt_formatted. Implement format_context that takes a
+    # like of strings and returns the context as one string.
+    raise NotImplemented
+raw_prompt = "{question}"
+# TODO: Create the history_prompt prompt that will capture the question and the conversation history.
+# The history_prompt needs a {chat_history} placeholder and a {question} placeholder.
+history_prompt: str = None
+# TODO: Create the standalone_prompt prompt that will capture the question and the chat history
+# to generate a standalone question. It needs a {chat_history} placeholder and a {question} placeholder,
+standalone_prompt: str = None
+# TODO: Create the rag_prompt that will capture the context and the standalone question to generate
+# a final answer to the question.
+rag_prompt: str = None
+# TODO: create raw_prompt_formatted by using format_prompt
+raw_prompt_formatted = None
+raw_prompt = PromptTemplate.from_template(raw_prompt)
+# TODO: use format_prompt to create history_prompt_formatted
+history_prompt_formatted: PromptTemplate = None
+# TODO: use format_prompt to create standalone_prompt_formatted
+standalone_prompt_formatted: PromptTemplate = None
+# TODO: use format_prompt to create rag_prompt_formatted
+rag_prompt_formatted: PromptTemplate = None

app/schemas.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from pydantic import BaseModel
+class UserQuestion(BaseModel):
+    question: str
+# TODO: create a HistoryInput data model with a chat_history and question attributes.
+class HistoryInput(BaseModel):
+    pass
+# TODO: let's create a UserRequest data model with a question and username attribute.
+# This will be used to parse the input request.
+class UserRequest(BaseModel):
+    username: str
+# TODO: implement MessageBase as a schema mapping from the database model to the
+# FastAPI data model. Basically MessageBase should have the same attributes as models.Message
+class MessageBase(BaseModel):
+    pass

app/test.db ADDED Viewed

Binary file (24.6 kB). View file

check.ipynb ADDED Viewed

	@@ -0,0 +1,229 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import requests\n",
+    "import os\n",
+    "\n",
+    "from dotenv import load_dotenv\n",
+    "load_dotenv()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'World!'"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "url = 'https://bachephysicdun-backend.hf.space'\n",
+    "response = requests.get(url)\n",
+    "response.json()['Hello']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from app.data_indexing import DataIndexer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "sqlalchemy.orm.decl_api.Base"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from app.database import Base\n",
+    "\n",
+    "Base"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "sys.path.append('./app/')\n",
+    "from app.models import User, Message"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.\n",
+      "Token is valid (permission: fineGrained).\n",
+      "Your token has been saved to /Users/amin/.cache/huggingface/token\n",
+      "Login successful\n",
+      "{'input_ids': [128000, 9906, 11, 1268, 527, 499, 30], 'attention_mask': [1, 1, 1, 1, 1, 1, 1]}\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "from transformers import AutoTokenizer\n",
+    "from huggingface_hub import login\n",
+    "\n",
+    "login(os.environ['HF_TOKEN'])\n",
+    "\n",
+    "# Load the tokenizer for the gated model\n",
+    "tokenizer = AutoTokenizer.from_pretrained(\"meta-llama/Meta-Llama-3-8B-Instruct\")\n",
+    "\n",
+    "# Example usage\n",
+    "text = \"Hello, how are you?\"\n",
+    "tokens = tokenizer(text)\n",
+    "\n",
+    "print(tokens)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from huggingface_hub import InferenceClient\n",
+    "client = InferenceClient(\n",
+    "    \"meta-llama/Meta-Llama-3-8B-Instruct\",\n",
+    "    token=os.environ['HF_TOKEN'],\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      " Linear regression is a statistical method that is used to create a linear equation that best predicts the relationship between two or more variables. The goal of linear regression is to create a model that can be used to make predictions about the value of the dependent variable (y) based on the value of one or more independent variables (x). Linear regression is a widely used and powerful tool for modeling the relationship between variables, and it has many applications in fields such as finance, economics, and medicine.\n",
+      "\n",
+      "How does Linear Regression work? Linear regression works by using a set of data points, where each data point represents a pair of values for the dependent and independent variables. The algorithm then finds the line that best fits the data points, by minimizing the sum of the squared errors between the predicted values and the actual values. The line that is found is called the regression line, and it is used to make predictions about the value of the dependent variable.\n",
+      "\n",
+      "There are several types of linear regression, including:\n",
+      "\n",
+      "* Simple Linear Regression: This type of linear regression involves a single independent variable and a single dependent variable.\n",
+      "* Multiple Linear Regression: This type of linear regression involves multiple independent variables and a single dependent variable.\n",
+      "* Polynomial Regression: This type of linear regression involves a polynomial equation, rather than a linear equation.\n",
+      "* Non-Linear Regression: This type of linear regression involves a non-linear equation, rather than a linear equation.\n",
+      "\n",
+      "What are the advantages and disadvantages of Linear Regression? The advantages of linear regression include:\n",
+      "\n",
+      "* It is a widely used and well-established statistical method.\n",
+      "* It is easy to interpret and understand.\n",
+      "* It can be used to make predictions about the value of the dependent variable.\n",
+      "* It can be used to identify the relationship between the independent and dependent variables.\n",
+      "\n",
+      "The disadvantages of linear regression include:\n",
+      "\n",
+      "* It assumes a linear relationship between the independent and dependent variables, which may not always be the case.\n",
+      "* It can be sensitive to outliers and noisy data.\n",
+      "* It can be difficult to interpret the results, especially for complex models.\n",
+      "* It can be sensitive to the choice of variables and the data used.\n",
+      "\n",
+      "What are some common applications of Linear Regression? Linear regression has many applications in fields such as:\n",
+      "\n",
+      "* Finance: Linear regression can be used to predict stock prices, interest rates, and other financial variables.\n",
+      "* Economics: Linear regression can be used to model the relationship between economic variables, such as GDP and unemployment rates.\n",
+      "* Medicine: Linear regression can be used to model the relationship between medical variables, such as blood pressure and heart rate.\n",
+      "* Marketing: Linear regression can"
+     ]
+    }
+   ],
+   "source": [
+    "from langserve import RemoteRunnable\n",
+    "chain = RemoteRunnable(\"http://localhost:8000/simple\")\n",
+    "stream = chain.stream(input={'question':'What is Linear Regression?'})\n",
+    "for chunk in stream:\n",
+    "    print(chunk, end=\"\", flush=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "myenv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}