aklai commited on
Commit
29616b4
·
1 Parent(s): 4629373

Update space

Browse files
Files changed (2) hide show
  1. app.py +19 -13
  2. requirements.txt +2 -3
app.py CHANGED
@@ -5,23 +5,29 @@ from datasets import load_dataset
5
  from langchain_huggingface import HuggingFaceEmbeddings
6
  from langchain_ollama.llms import OllamaLLM
7
 
8
- from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
9
-
10
- #from langchain import hub
11
  from langchain_core.runnables import RunnableParallel
12
  from langchain_core.runnables import RunnablePassthrough
13
  from langchain_core.output_parsers import StrOutputParser
14
  from langchain_core.prompts import ChatPromptTemplate
15
  from langchain_chroma import Chroma
16
 
 
 
 
 
 
 
 
17
 
18
- """
19
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
20
- """
21
- llm = HuggingFacePipeline.from_model_id(
22
- model_id="meta-llama/Llama-3.2-3B",
23
- task="text-generation",
24
- pipeline_kwargs={"max_new_tokens": 10},
 
 
25
  )
26
 
27
 
@@ -55,10 +61,10 @@ qa_chain_with_sources = (
55
  # Function to call a RAG LLM query
56
  def rag_query(query, history):
57
  # Invoke the chain
58
- r = qa_chain_with_sources.invoke(query)
59
 
60
- answer = r["answer"]
61
- unique_sources = list(set(r["sources"]))
62
 
63
  # Print answers + sources
64
  output = f"Answer: {answer}\n\nSources:\n" + "\n".join(unique_sources)
 
5
  from langchain_huggingface import HuggingFaceEmbeddings
6
  from langchain_ollama.llms import OllamaLLM
7
 
 
 
 
8
  from langchain_core.runnables import RunnableParallel
9
  from langchain_core.runnables import RunnablePassthrough
10
  from langchain_core.output_parsers import StrOutputParser
11
  from langchain_core.prompts import ChatPromptTemplate
12
  from langchain_chroma import Chroma
13
 
14
+ from transformers import AutoModelForCausalLM, AutoTokenizer
15
+ from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
16
+
17
+ # Load the model and tokenizer
18
+ MODEL = "llmware/bling-phi-3-gguf"
19
+ tokenizer = AutoTokenizer.from_pretrained(MODEL)
20
+ model = AutoModelForCausalLM.from_pretrained(MODEL)
21
 
22
+ # Create a pipeline
23
+ from transformers import pipeline
24
+
25
+ pipe = pipeline(
26
+ "text-generation",
27
+ model=model,
28
+ tokenizer=tokenizer,
29
+ torch_dtype=torch.float16,
30
+ device_map="auto",
31
  )
32
 
33
 
 
61
  # Function to call a RAG LLM query
62
  def rag_query(query, history):
63
  # Invoke the chain
64
+ response = qa_chain_with_sources.invoke(query)
65
 
66
+ answer = response["answer"]
67
+ unique_sources = list(set(response["sources"]))
68
 
69
  # Print answers + sources
70
  output = f"Answer: {answer}\n\nSources:\n" + "\n".join(unique_sources)
requirements.txt CHANGED
@@ -7,8 +7,7 @@ chromadb
7
  ollama
8
  sentence-transformers
9
  langchain-huggingface
10
- langchain-ollama
11
  chromadb
12
- pypdf
13
- bs4
14
  langchain-chroma
 
 
 
7
  ollama
8
  sentence-transformers
9
  langchain-huggingface
 
10
  chromadb
 
 
11
  langchain-chroma
12
+ torch
13
+ transformers