xangma
commited on
Commit
•
a52c7ce
1
Parent(s):
a835cf0
cleanup
Browse files
.gitignore
CHANGED
@@ -3,4 +3,5 @@
|
|
3 |
downloaded/*
|
4 |
__pycache__/*
|
5 |
launch.json
|
6 |
-
.DS_Store
|
|
|
|
3 |
downloaded/*
|
4 |
__pycache__/*
|
5 |
launch.json
|
6 |
+
.DS_Store
|
7 |
+
devcode.py
|
app.py
CHANGED
@@ -1,20 +1,13 @@
|
|
|
|
1 |
import datetime
|
2 |
import os
|
3 |
import gradio as gr
|
4 |
-
from abc import ABC
|
5 |
-
from typing import List, Optional, Any
|
6 |
-
import asyncio
|
7 |
-
import langchain
|
8 |
import chromadb
|
9 |
from chromadb.config import Settings
|
10 |
# logging.basicConfig(stream=sys.stdout, level=logging.INFO)
|
11 |
# logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
|
12 |
-
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceEmbeddings
|
13 |
from langchain.vectorstores import Chroma
|
14 |
-
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter, PythonCodeTextSplitter
|
15 |
-
from langchain.document_loaders import TextLoader
|
16 |
from langchain.docstore.document import Document
|
17 |
-
from langchain.embeddings.base import Embeddings
|
18 |
import shutil
|
19 |
import random, string
|
20 |
from chain import get_new_chain1
|
|
|
1 |
+
# chat-pykg/app.py
|
2 |
import datetime
|
3 |
import os
|
4 |
import gradio as gr
|
|
|
|
|
|
|
|
|
5 |
import chromadb
|
6 |
from chromadb.config import Settings
|
7 |
# logging.basicConfig(stream=sys.stdout, level=logging.INFO)
|
8 |
# logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
|
|
|
9 |
from langchain.vectorstores import Chroma
|
|
|
|
|
10 |
from langchain.docstore.document import Document
|
|
|
11 |
import shutil
|
12 |
import random, string
|
13 |
from chain import get_new_chain1
|
chain.py
CHANGED
@@ -1,10 +1,6 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
import pathlib
|
4 |
-
from typing import Dict, List, Tuple
|
5 |
from langchain.chains.base import Chain
|
6 |
-
import os
|
7 |
-
import langchain
|
8 |
# logging.basicConfig(stream=sys.stdout, level=logging.INFO)
|
9 |
# logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
|
10 |
from langchain import HuggingFaceHub
|
@@ -17,45 +13,12 @@ from langchain.callbacks.base import CallbackManager
|
|
17 |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
18 |
from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT, QA_PROMPT
|
19 |
|
20 |
-
from abc import ABC
|
21 |
-
from typing import List, Optional, Any
|
22 |
-
|
23 |
-
from langchain.vectorstores import Chroma
|
24 |
-
|
25 |
def get_new_chain1(vectorstore, model_selector, k_textbox) -> Chain:
|
26 |
max_tokens_dict = {'gpt-4': 2000, 'gpt-3.5-turbo': 1000}
|
27 |
|
28 |
-
# These templates aren't used for the moment.
|
29 |
-
_eg_template = """## Example:
|
30 |
-
|
31 |
-
Chat History:
|
32 |
-
{chat_history}
|
33 |
-
Follow Up Input: {question}
|
34 |
-
Standalone question: {answer}"""
|
35 |
-
_prefix = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question. You should assume that the question is related to PyCBC."""
|
36 |
-
_suffix = """## Example:
|
37 |
-
|
38 |
-
Chat History:
|
39 |
-
{chat_history}
|
40 |
-
Follow Up Input: {question}
|
41 |
-
Standalone question:"""
|
42 |
-
|
43 |
-
template = """You are an AI assistant for various open source libraries.
|
44 |
-
You are given the following extracted parts of a long document and a question. Provide a conversational answer to the question.
|
45 |
-
You should only use hyperlinks that are explicitly listed as a source in the context. Do NOT make up a hyperlink that is not listed.
|
46 |
-
If you don't know the answer, just say "Hmm, I'm not sure." Don't try to make up an answer.
|
47 |
-
If the question is not about the package documentation, politely inform them that you are tuned to only answer questions about the package documentationz.
|
48 |
-
Question: {question}
|
49 |
-
=========
|
50 |
-
{context}
|
51 |
-
=========
|
52 |
-
Answer in Markdown:"""
|
53 |
-
|
54 |
-
# Construct a ChatVectorDBChain with a streaming llm for combine docs
|
55 |
-
# and a separate, non-streaming llm for question generation
|
56 |
if model_selector in ['gpt-4', 'gpt-3.5-turbo']:
|
57 |
llm = ChatOpenAI(client = None, temperature=0.7, model_name=model_selector)
|
58 |
-
doc_chain_llm = ChatOpenAI(client = None, streaming=True, callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]), verbose=True, temperature=0.7, model_name=model_selector, max_tokens=
|
59 |
if model_selector == 'other':
|
60 |
llm = HuggingFaceHub(repo_id="chavinlo/gpt4-x-alpaca")#, model_kwargs={"temperature":0, "max_length":64})
|
61 |
doc_chain_llm = HuggingFaceHub(repo_id="chavinlo/gpt4-x-alpaca")
|
|
|
1 |
+
# chat-pykg/chain.py
|
2 |
+
|
|
|
|
|
3 |
from langchain.chains.base import Chain
|
|
|
|
|
4 |
# logging.basicConfig(stream=sys.stdout, level=logging.INFO)
|
5 |
# logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
|
6 |
from langchain import HuggingFaceHub
|
|
|
13 |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
14 |
from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT, QA_PROMPT
|
15 |
|
|
|
|
|
|
|
|
|
|
|
16 |
def get_new_chain1(vectorstore, model_selector, k_textbox) -> Chain:
|
17 |
max_tokens_dict = {'gpt-4': 2000, 'gpt-3.5-turbo': 1000}
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
if model_selector in ['gpt-4', 'gpt-3.5-turbo']:
|
20 |
llm = ChatOpenAI(client = None, temperature=0.7, model_name=model_selector)
|
21 |
+
doc_chain_llm = ChatOpenAI(client = None, streaming=True, callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]), verbose=True, temperature=0.7, model_name=model_selector, max_tokens=max_tokens_dict[model_selector])
|
22 |
if model_selector == 'other':
|
23 |
llm = HuggingFaceHub(repo_id="chavinlo/gpt4-x-alpaca")#, model_kwargs={"temperature":0, "max_length":64})
|
24 |
doc_chain_llm = HuggingFaceHub(repo_id="chavinlo/gpt4-x-alpaca")
|
ingest.py
CHANGED
@@ -1,22 +1,14 @@
|
|
1 |
-
|
2 |
import tempfile
|
3 |
from langchain.document_loaders import SitemapLoader, ReadTheDocsLoader, TextLoader
|
4 |
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceEmbeddings
|
5 |
-
from langchain.text_splitter import RecursiveCharacterTextSplitter,
|
6 |
from langchain.vectorstores.faiss import FAISS
|
7 |
-
import chromadb
|
8 |
import os
|
9 |
from langchain.vectorstores import Chroma
|
10 |
import shutil
|
11 |
from pathlib import Path
|
12 |
import subprocess
|
13 |
-
import tarfile
|
14 |
-
# import chromadb
|
15 |
-
from abc import ABC
|
16 |
-
from typing import List, Optional, Any
|
17 |
-
from langchain.docstore.document import Document
|
18 |
-
from langchain.embeddings.base import Embeddings
|
19 |
-
from chromadb.config import Settings
|
20 |
|
21 |
# class CachedChroma(Chroma, ABC):
|
22 |
# """
|
|
|
1 |
+
# chat-pykg/ingest.py
|
2 |
import tempfile
|
3 |
from langchain.document_loaders import SitemapLoader, ReadTheDocsLoader, TextLoader
|
4 |
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceEmbeddings
|
5 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter, PythonCodeTextSplitter, MarkdownTextSplitter
|
6 |
from langchain.vectorstores.faiss import FAISS
|
|
|
7 |
import os
|
8 |
from langchain.vectorstores import Chroma
|
9 |
import shutil
|
10 |
from pathlib import Path
|
11 |
import subprocess
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
# class CachedChroma(Chroma, ABC):
|
14 |
# """
|