halyn commited on
Commit
a80fb91
1 Parent(s): 13553fe

code update

Browse files
Files changed (2) hide show
  1. app.py +10 -6
  2. requirements.txt +2 -1
app.py CHANGED
@@ -1,4 +1,3 @@
1
- import os
2
  import io
3
  import streamlit as st
4
  from PyPDF2 import PdfReader
@@ -8,6 +7,7 @@ from langchain.vectorstores import FAISS
8
  from langchain.chains.question_answering import load_qa_chain
9
  from langchain.llms import HuggingFacePipeline
10
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 
11
 
12
  # Global variables
13
  knowledge_base = None
@@ -22,7 +22,10 @@ def load_pdf(pdf_file):
22
  # 텍스트를 청크로 분할
23
  def split_text(text):
24
  text_splitter = CharacterTextSplitter(
25
- separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len
 
 
 
26
  )
27
  return text_splitter.split_text(text)
28
 
@@ -35,9 +38,11 @@ def create_knowledge_base(chunks):
35
  def load_model():
36
  model_name = "halyn/gemma2-2b-it-finetuned-paperqa"
37
  tokenizer = AutoTokenizer.from_pretrained(model_name)
38
- model = AutoModelForCausalLM.from_pretrained(model_name)
39
- return pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=150, temperature=0.1)
 
40
 
 
41
  # QA 체인 설정
42
  def setup_qa_chain():
43
  global qa_chain
@@ -46,7 +51,6 @@ def setup_qa_chain():
46
  qa_chain = load_qa_chain(llm, chain_type="stuff")
47
 
48
 
49
-
50
  # 메인 페이지 UI
51
  def main_page():
52
  st.title("Welcome to GemmaPaperQA")
@@ -90,7 +94,7 @@ def main_page():
90
  st.session_state.page = "chat"
91
  setup_qa_chain()
92
  st.success("PDF successfully processed! You can now ask questions.")
93
-
94
  except Exception as e:
95
  st.error(f"Failed to process the PDF: {str(e)}")
96
 
 
 
1
  import io
2
  import streamlit as st
3
  from PyPDF2 import PdfReader
 
7
  from langchain.chains.question_answering import load_qa_chain
8
  from langchain.llms import HuggingFacePipeline
9
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
10
+ from peft import PeftModel, PeftConfig
11
 
12
  # Global variables
13
  knowledge_base = None
 
22
  # 텍스트를 청크로 분할
23
  def split_text(text):
24
  text_splitter = CharacterTextSplitter(
25
+ separator="\n",
26
+ chunk_size=1000,
27
+ chunk_overlap=200,
28
+ length_function=len
29
  )
30
  return text_splitter.split_text(text)
31
 
 
38
  def load_model():
39
  model_name = "halyn/gemma2-2b-it-finetuned-paperqa"
40
  tokenizer = AutoTokenizer.from_pretrained(model_name)
41
+ config = PeftConfig.from_pretrained(model_name)
42
+ model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)
43
+ model = PeftModel.from_pretrained(model, model_name)
44
 
45
+ return pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=150, temperature=0.1)
46
  # QA 체인 설정
47
  def setup_qa_chain():
48
  global qa_chain
 
51
  qa_chain = load_qa_chain(llm, chain_type="stuff")
52
 
53
 
 
54
  # 메인 페이지 UI
55
  def main_page():
56
  st.title("Welcome to GemmaPaperQA")
 
94
  st.session_state.page = "chat"
95
  setup_qa_chain()
96
  st.success("PDF successfully processed! You can now ask questions.")
97
+
98
  except Exception as e:
99
  st.error(f"Failed to process the PDF: {str(e)}")
100
 
requirements.txt CHANGED
@@ -9,4 +9,5 @@ torch==2.4.1
9
  faiss-cpu==1.8.0.post1
10
  requests==2.32.3
11
  huggingface-hub==0.25.1
12
- sentence-transformers==3.1.1
 
 
9
  faiss-cpu==1.8.0.post1
10
  requests==2.32.3
11
  huggingface-hub==0.25.1
12
+ sentence-transformers==3.1.1
13
+ peft==0.2.0