leandrocarneiro commited on
Commit
f966467
1 Parent(s): f0500d9

Upload 3 files

Browse files
Files changed (3) hide show
  1. constants.py +1 -1
  2. main.py +6 -5
  3. rag.py +9 -8
constants.py CHANGED
@@ -5,5 +5,5 @@
5
  #subject = 'Guerra entre Irã e Paquistão'
6
 
7
  #sites = ['https://www.cnnbrasil.com.br/']#, 'https://g1.globo.com/', 'https://www.metropoles.com/']
8
- num_sites = 5
9
  local_base = 'local_base'
 
5
  #subject = 'Guerra entre Irã e Paquistão'
6
 
7
  #sites = ['https://www.cnnbrasil.com.br/']#, 'https://g1.globo.com/', 'https://www.metropoles.com/']
8
+ num_sites = 2 #5
9
  local_base = 'local_base'
main.py CHANGED
@@ -45,11 +45,12 @@ def generate_news(subject, min_words, max_words, sites):
45
  print('\n\n' + '*' * 50 + '\n\n')
46
  print(result_news[0])
47
 
48
- print('\n\nFontes: ')
49
- print(result_news[1])
50
-
51
- return result_news[0] + '\n\n' + 'Fontes: ' + '\n' + result_news[1]
52
-
 
53
 
54
  def call_llm(context, prompt):
55
  print('\nChamando o modelo de linguagem...')
 
45
  print('\n\n' + '*' * 50 + '\n\n')
46
  print(result_news[0])
47
 
48
+ if result_news[1]:
49
+ print('\n\nFontes: ')
50
+ print(result_news[1])
51
+ return result_news[0] + '\n\n' + 'Fontes: ' + '\n' + result_news[1]
52
+ else:
53
+ return result_news[0]
54
 
55
  def call_llm(context, prompt):
56
  print('\nChamando o modelo de linguagem...')
rag.py CHANGED
@@ -28,7 +28,7 @@ def generate_embeddings_and_vectorstore(path):
28
  corpus = loader.load()
29
  print(f' Total de documentos antes do text_split = {len(corpus)}')
30
 
31
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=400)
32
  docs = text_splitter.split_documents(corpus)
33
  num_total_characters = sum([len(x.page_content) for x in docs])
34
  print(f" Total de chunks depois do text_split = {len(docs)}")
@@ -57,10 +57,11 @@ class Rag:
57
  self.vectorstore = vectorstore
58
  self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True, output_key="answer")
59
 
60
- #Do not make up any information, create the news just based on the given information on the pieces of texts delimited by <>.
 
61
  prompt_template = """Your task is to create news for a newspaper based on pieces of text delimited by <> and a question delimited by <>.
62
- Do not use only your knowledge to make the news. Make the news based on the pieces of text delimited by <>.
63
- If the pieces of text don't have relevant information about the question, just say that you need more information to make the news.
64
  The news should have a title.
65
  The news should be written in a formal language.
66
  The news should have between {min_words} and {max_words} words and it should be in Portuguese language.
@@ -72,13 +73,13 @@ class Rag:
72
  partial_variables={"min_words": min_words, "max_words": max_words})
73
 
74
  self.qa = ConversationalRetrievalChain.from_llm(
75
- llm=ChatOpenAI(model_name="gpt-3.5-turbo-0125",
76
- temperature=0.1,
77
  openai_api_key=os.environ['OPENAI_KEY'],
78
  max_tokens=int(int(max_words) + (int(max_words) / 2))), #número máximo de tokens para a resposta
79
  memory=self.memory,
80
- #retriever=vectorstore.as_retriever(search_type='similarity_score_threshold',
81
- # search_kwargs={'k':4, 'score_threshold':0.5}), #search_kwargs={'k': 3}
82
  retriever=vectorstore.as_retriever(),
83
  combine_docs_chain_kwargs={"prompt": self.prompt},
84
  chain_type="stuff",#map_reduce, refine, map_rerank
 
28
  corpus = loader.load()
29
  print(f' Total de documentos antes do text_split = {len(corpus)}')
30
 
31
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=4000, chunk_overlap=400)
32
  docs = text_splitter.split_documents(corpus)
33
  num_total_characters = sum([len(x.page_content) for x in docs])
34
  print(f" Total de chunks depois do text_split = {len(docs)}")
 
57
  self.vectorstore = vectorstore
58
  self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True, output_key="answer")
59
 
60
+ #
61
+ #Do not use only your knowledge to make the news.
62
  prompt_template = """Your task is to create news for a newspaper based on pieces of text delimited by <> and a question delimited by <>.
63
+ Do not use only your knowledge to make the news. Make the news based on the question, but using the pieces of text.
64
+ If the pieces of text don't enough information about the question to create the news, just say that you need more sources of information, nothing more.
65
  The news should have a title.
66
  The news should be written in a formal language.
67
  The news should have between {min_words} and {max_words} words and it should be in Portuguese language.
 
73
  partial_variables={"min_words": min_words, "max_words": max_words})
74
 
75
  self.qa = ConversationalRetrievalChain.from_llm(
76
+ llm=ChatOpenAI(model_name="gpt-3.5-turbo-0125", #0125 #1106
77
+ temperature=0,
78
  openai_api_key=os.environ['OPENAI_KEY'],
79
  max_tokens=int(int(max_words) + (int(max_words) / 2))), #número máximo de tokens para a resposta
80
  memory=self.memory,
81
+ # retriever=vectorstore.as_retriever(search_type='similarity_score_threshold',
82
+ # search_kwargs={'k':4, 'score_threshold':0.8}), #search_kwargs={'k': 3}
83
  retriever=vectorstore.as_retriever(),
84
  combine_docs_chain_kwargs={"prompt": self.prompt},
85
  chain_type="stuff",#map_reduce, refine, map_rerank