Spaces:
Restarting
Restarting
leandrocarneiro
commited on
Commit
•
f966467
1
Parent(s):
f0500d9
Upload 3 files
Browse files- constants.py +1 -1
- main.py +6 -5
- rag.py +9 -8
constants.py
CHANGED
@@ -5,5 +5,5 @@
|
|
5 |
#subject = 'Guerra entre Irã e Paquistão'
|
6 |
|
7 |
#sites = ['https://www.cnnbrasil.com.br/']#, 'https://g1.globo.com/', 'https://www.metropoles.com/']
|
8 |
-
num_sites = 5
|
9 |
local_base = 'local_base'
|
|
|
5 |
#subject = 'Guerra entre Irã e Paquistão'
|
6 |
|
7 |
#sites = ['https://www.cnnbrasil.com.br/']#, 'https://g1.globo.com/', 'https://www.metropoles.com/']
|
8 |
+
num_sites = 2 #5
|
9 |
local_base = 'local_base'
|
main.py
CHANGED
@@ -45,11 +45,12 @@ def generate_news(subject, min_words, max_words, sites):
|
|
45 |
print('\n\n' + '*' * 50 + '\n\n')
|
46 |
print(result_news[0])
|
47 |
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
|
|
53 |
|
54 |
def call_llm(context, prompt):
|
55 |
print('\nChamando o modelo de linguagem...')
|
|
|
45 |
print('\n\n' + '*' * 50 + '\n\n')
|
46 |
print(result_news[0])
|
47 |
|
48 |
+
if result_news[1]:
|
49 |
+
print('\n\nFontes: ')
|
50 |
+
print(result_news[1])
|
51 |
+
return result_news[0] + '\n\n' + 'Fontes: ' + '\n' + result_news[1]
|
52 |
+
else:
|
53 |
+
return result_news[0]
|
54 |
|
55 |
def call_llm(context, prompt):
|
56 |
print('\nChamando o modelo de linguagem...')
|
rag.py
CHANGED
@@ -28,7 +28,7 @@ def generate_embeddings_and_vectorstore(path):
|
|
28 |
corpus = loader.load()
|
29 |
print(f' Total de documentos antes do text_split = {len(corpus)}')
|
30 |
|
31 |
-
text_splitter = RecursiveCharacterTextSplitter(chunk_size=
|
32 |
docs = text_splitter.split_documents(corpus)
|
33 |
num_total_characters = sum([len(x.page_content) for x in docs])
|
34 |
print(f" Total de chunks depois do text_split = {len(docs)}")
|
@@ -57,10 +57,11 @@ class Rag:
|
|
57 |
self.vectorstore = vectorstore
|
58 |
self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True, output_key="answer")
|
59 |
|
60 |
-
#
|
|
|
61 |
prompt_template = """Your task is to create news for a newspaper based on pieces of text delimited by <> and a question delimited by <>.
|
62 |
-
Do not use only your knowledge to make the news. Make the news based on the pieces of text
|
63 |
-
If the pieces of text don't
|
64 |
The news should have a title.
|
65 |
The news should be written in a formal language.
|
66 |
The news should have between {min_words} and {max_words} words and it should be in Portuguese language.
|
@@ -72,13 +73,13 @@ class Rag:
|
|
72 |
partial_variables={"min_words": min_words, "max_words": max_words})
|
73 |
|
74 |
self.qa = ConversationalRetrievalChain.from_llm(
|
75 |
-
llm=ChatOpenAI(model_name="gpt-3.5-turbo-0125",
|
76 |
-
temperature=0
|
77 |
openai_api_key=os.environ['OPENAI_KEY'],
|
78 |
max_tokens=int(int(max_words) + (int(max_words) / 2))), #número máximo de tokens para a resposta
|
79 |
memory=self.memory,
|
80 |
-
#retriever=vectorstore.as_retriever(search_type='similarity_score_threshold',
|
81 |
-
#
|
82 |
retriever=vectorstore.as_retriever(),
|
83 |
combine_docs_chain_kwargs={"prompt": self.prompt},
|
84 |
chain_type="stuff",#map_reduce, refine, map_rerank
|
|
|
28 |
corpus = loader.load()
|
29 |
print(f' Total de documentos antes do text_split = {len(corpus)}')
|
30 |
|
31 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=4000, chunk_overlap=400)
|
32 |
docs = text_splitter.split_documents(corpus)
|
33 |
num_total_characters = sum([len(x.page_content) for x in docs])
|
34 |
print(f" Total de chunks depois do text_split = {len(docs)}")
|
|
|
57 |
self.vectorstore = vectorstore
|
58 |
self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True, output_key="answer")
|
59 |
|
60 |
+
#
|
61 |
+
#Do not use only your knowledge to make the news.
|
62 |
prompt_template = """Your task is to create news for a newspaper based on pieces of text delimited by <> and a question delimited by <>.
|
63 |
+
Do not use only your knowledge to make the news. Make the news based on the question, but using the pieces of text.
|
64 |
+
If the pieces of text don't enough information about the question to create the news, just say that you need more sources of information, nothing more.
|
65 |
The news should have a title.
|
66 |
The news should be written in a formal language.
|
67 |
The news should have between {min_words} and {max_words} words and it should be in Portuguese language.
|
|
|
73 |
partial_variables={"min_words": min_words, "max_words": max_words})
|
74 |
|
75 |
self.qa = ConversationalRetrievalChain.from_llm(
|
76 |
+
llm=ChatOpenAI(model_name="gpt-3.5-turbo-0125", #0125 #1106
|
77 |
+
temperature=0,
|
78 |
openai_api_key=os.environ['OPENAI_KEY'],
|
79 |
max_tokens=int(int(max_words) + (int(max_words) / 2))), #número máximo de tokens para a resposta
|
80 |
memory=self.memory,
|
81 |
+
# retriever=vectorstore.as_retriever(search_type='similarity_score_threshold',
|
82 |
+
# search_kwargs={'k':4, 'score_threshold':0.8}), #search_kwargs={'k': 3}
|
83 |
retriever=vectorstore.as_retriever(),
|
84 |
combine_docs_chain_kwargs={"prompt": self.prompt},
|
85 |
chain_type="stuff",#map_reduce, refine, map_rerank
|