xangma commited on
Commit
a52c7ce
1 Parent(s): a835cf0
Files changed (4) hide show
  1. .gitignore +2 -1
  2. app.py +1 -8
  3. chain.py +3 -40
  4. ingest.py +2 -10
.gitignore CHANGED
@@ -3,4 +3,5 @@
3
  downloaded/*
4
  __pycache__/*
5
  launch.json
6
- .DS_Store
 
 
3
  downloaded/*
4
  __pycache__/*
5
  launch.json
6
+ .DS_Store
7
+ devcode.py
app.py CHANGED
@@ -1,20 +1,13 @@
 
1
  import datetime
2
  import os
3
  import gradio as gr
4
- from abc import ABC
5
- from typing import List, Optional, Any
6
- import asyncio
7
- import langchain
8
  import chromadb
9
  from chromadb.config import Settings
10
  # logging.basicConfig(stream=sys.stdout, level=logging.INFO)
11
  # logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
12
- from langchain.embeddings import OpenAIEmbeddings, HuggingFaceEmbeddings
13
  from langchain.vectorstores import Chroma
14
- from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter, PythonCodeTextSplitter
15
- from langchain.document_loaders import TextLoader
16
  from langchain.docstore.document import Document
17
- from langchain.embeddings.base import Embeddings
18
  import shutil
19
  import random, string
20
  from chain import get_new_chain1
 
1
+ # chat-pykg/app.py
2
  import datetime
3
  import os
4
  import gradio as gr
 
 
 
 
5
  import chromadb
6
  from chromadb.config import Settings
7
  # logging.basicConfig(stream=sys.stdout, level=logging.INFO)
8
  # logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
 
9
  from langchain.vectorstores import Chroma
 
 
10
  from langchain.docstore.document import Document
 
11
  import shutil
12
  import random, string
13
  from chain import get_new_chain1
chain.py CHANGED
@@ -1,10 +1,6 @@
1
- import json
2
- import os
3
- import pathlib
4
- from typing import Dict, List, Tuple
5
  from langchain.chains.base import Chain
6
- import os
7
- import langchain
8
  # logging.basicConfig(stream=sys.stdout, level=logging.INFO)
9
  # logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
10
  from langchain import HuggingFaceHub
@@ -17,45 +13,12 @@ from langchain.callbacks.base import CallbackManager
17
  from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
18
  from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT, QA_PROMPT
19
 
20
- from abc import ABC
21
- from typing import List, Optional, Any
22
-
23
- from langchain.vectorstores import Chroma
24
-
25
  def get_new_chain1(vectorstore, model_selector, k_textbox) -> Chain:
26
  max_tokens_dict = {'gpt-4': 2000, 'gpt-3.5-turbo': 1000}
27
 
28
- # These templates aren't used for the moment.
29
- _eg_template = """## Example:
30
-
31
- Chat History:
32
- {chat_history}
33
- Follow Up Input: {question}
34
- Standalone question: {answer}"""
35
- _prefix = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question. You should assume that the question is related to PyCBC."""
36
- _suffix = """## Example:
37
-
38
- Chat History:
39
- {chat_history}
40
- Follow Up Input: {question}
41
- Standalone question:"""
42
-
43
- template = """You are an AI assistant for various open source libraries.
44
- You are given the following extracted parts of a long document and a question. Provide a conversational answer to the question.
45
- You should only use hyperlinks that are explicitly listed as a source in the context. Do NOT make up a hyperlink that is not listed.
46
- If you don't know the answer, just say "Hmm, I'm not sure." Don't try to make up an answer.
47
- If the question is not about the package documentation, politely inform them that you are tuned to only answer questions about the package documentationz.
48
- Question: {question}
49
- =========
50
- {context}
51
- =========
52
- Answer in Markdown:"""
53
-
54
- # Construct a ChatVectorDBChain with a streaming llm for combine docs
55
- # and a separate, non-streaming llm for question generation
56
  if model_selector in ['gpt-4', 'gpt-3.5-turbo']:
57
  llm = ChatOpenAI(client = None, temperature=0.7, model_name=model_selector)
58
- doc_chain_llm = ChatOpenAI(client = None, streaming=True, callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]), verbose=True, temperature=0.7, model_name=model_selector, max_tokens=1000)
59
  if model_selector == 'other':
60
  llm = HuggingFaceHub(repo_id="chavinlo/gpt4-x-alpaca")#, model_kwargs={"temperature":0, "max_length":64})
61
  doc_chain_llm = HuggingFaceHub(repo_id="chavinlo/gpt4-x-alpaca")
 
1
+ # chat-pykg/chain.py
2
+
 
 
3
  from langchain.chains.base import Chain
 
 
4
  # logging.basicConfig(stream=sys.stdout, level=logging.INFO)
5
  # logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
6
  from langchain import HuggingFaceHub
 
13
  from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
14
  from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT, QA_PROMPT
15
 
 
 
 
 
 
16
  def get_new_chain1(vectorstore, model_selector, k_textbox) -> Chain:
17
  max_tokens_dict = {'gpt-4': 2000, 'gpt-3.5-turbo': 1000}
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  if model_selector in ['gpt-4', 'gpt-3.5-turbo']:
20
  llm = ChatOpenAI(client = None, temperature=0.7, model_name=model_selector)
21
+ doc_chain_llm = ChatOpenAI(client = None, streaming=True, callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]), verbose=True, temperature=0.7, model_name=model_selector, max_tokens=max_tokens_dict[model_selector])
22
  if model_selector == 'other':
23
  llm = HuggingFaceHub(repo_id="chavinlo/gpt4-x-alpaca")#, model_kwargs={"temperature":0, "max_length":64})
24
  doc_chain_llm = HuggingFaceHub(repo_id="chavinlo/gpt4-x-alpaca")
ingest.py CHANGED
@@ -1,22 +1,14 @@
1
- import pickle
2
  import tempfile
3
  from langchain.document_loaders import SitemapLoader, ReadTheDocsLoader, TextLoader
4
  from langchain.embeddings import OpenAIEmbeddings, HuggingFaceEmbeddings
5
- from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter, PythonCodeTextSplitter, MarkdownTextSplitter
6
  from langchain.vectorstores.faiss import FAISS
7
- import chromadb
8
  import os
9
  from langchain.vectorstores import Chroma
10
  import shutil
11
  from pathlib import Path
12
  import subprocess
13
- import tarfile
14
- # import chromadb
15
- from abc import ABC
16
- from typing import List, Optional, Any
17
- from langchain.docstore.document import Document
18
- from langchain.embeddings.base import Embeddings
19
- from chromadb.config import Settings
20
 
21
  # class CachedChroma(Chroma, ABC):
22
  # """
 
1
+ # chat-pykg/ingest.py
2
  import tempfile
3
  from langchain.document_loaders import SitemapLoader, ReadTheDocsLoader, TextLoader
4
  from langchain.embeddings import OpenAIEmbeddings, HuggingFaceEmbeddings
5
+ from langchain.text_splitter import RecursiveCharacterTextSplitter, PythonCodeTextSplitter, MarkdownTextSplitter
6
  from langchain.vectorstores.faiss import FAISS
 
7
  import os
8
  from langchain.vectorstores import Chroma
9
  import shutil
10
  from pathlib import Path
11
  import subprocess
 
 
 
 
 
 
 
12
 
13
  # class CachedChroma(Chroma, ABC):
14
  # """