In [1]:
import google.generativeai as genai
import arxiv_bot_utils as utils
import os
from getpass import getpass
import json
#chỉ là import một cách bình thường
#nội dung là 

 from .autonotebook import tqdm as notebook_tqdm


In [2]:
os.environ['GEMINI_API_KEY'] = getpass("Input your API key: ")
# gán biến môi trường luôn
gemini_api_key = os.getenv("GEMINI_API_KEY") # string trong môi trường
if not gemini_api_key:
 raise ValueError(
 "Gemini API Key not provided. Please provide GEMINI_API_KEY as an environment variable"
 )
genai.configure(api_key=gemini_api_key)
for m in genai.list_models():
 if 'generateContent' in m.supported_generation_methods:
 print(m.name)
 #models nằm trên máy chủ


models/gemini-1.0-pro
models/gemini-1.0-pro-001
models/gemini-1.0-pro-latest
models/gemini-1.0-pro-vision-latest
models/gemini-1.5-pro-latest
models/gemini-pro
models/gemini-pro-vision


In [3]:
config = genai.GenerationConfig(max_output_tokens=2048,
 temperature=0.7)
safety_settings = [
 {
 "category": "HARM_CATEGORY_DANGEROUS",
 "threshold": "BLOCK_NONE",
 },
 {
 "category": "HARM_CATEGORY_HARASSMENT",
 "threshold": "BLOCK_NONE",
 },
 {
 "category": "HARM_CATEGORY_HATE_SPEECH",
 "threshold": "BLOCK_NONE",
 },
 {
 "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
 "threshold": "BLOCK_NONE",
 },
 {
 "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
 "threshold": "BLOCK_NONE",
 },
]
model = genai.GenerativeModel("gemini-pro",
 generation_config=config,
 safety_settings=safety_settings)

In [4]:
def extract_keyword_prompt(query):
 """A prompt that return a JSON block as arguments for querying database"""

 prompt = (
 """[INST] SYSTEM: You are an assistant that choose only one action below based on guest question.
 1. If the guest question is asking for a single specific document or article with explicit title, you need to respond the information in JSON format with 2 keys "title", "author" if found any above. The authors are separated with the word 'and'. 
 2. If the guest question is asking for relevant informations about a topic, you need to respond the information in JSON format with 2 keys "keywords", "description", include a list of keywords represent the main academic topic, \
 and a description about the main topic. You may paraphrase the keywords to add more. \
 3. If the guest is not asking for any informations or documents, you need to respond with a polite answer in JSON format with 1 key "answer".
 QUESTION: '{query}'
 [/INST]
 ANSWER: 
 """
 ).format(query=query)

 return prompt

def make_answer_prompt(input, contexts):
 """A prompt that return the final answer, based on the queried context"""

 prompt = (
 """[INST] You are a library assistant that help to search articles and documents based on user's question.
 From guest's question, you have found some records and documents that may help. Now you need to answer the guest with the information found.
 If no information found in the database, you may generate some other recommendation related to user's question using your own knowledge. Each article or paper must have a link to the pdf download page.
 You should answer in a conversational form politely.
 QUESTION: '{input}'
 INFORMATION: '{contexts}'
 [/INST]
 ANSWER:
 """
 ).format(input=input, contexts=contexts)

 return prompt

In [5]:
def response(args):
 """Create response context, based on input arguments"""
 keys = list(dict.keys(args))
 if "answer" in keys:
 return args['answer'], None # trả lời trực tiếp
 
 if "keywords" in keys:
 # perform query
 query_texts = args["description"]
 keywords = args["keywords"]
 results = utils.db.query_relevant(keywords=keywords, query_texts=query_texts)
 # print(results)
 ids = results['metadatas'][0]
 if len(ids) == 0:
 # go crawl some
 new_records = utils.crawl_arxiv(keyword_list=keywords, max_results=10)
 print("Got new records: ",len(new_records))
 if type(new_records) == str:
 return "Error occured, information not found", new_records
 utils.db.add(new_records)
 utils.sqldb.add(new_records)
 results = utils.db.query_relevant(keywords=keywords, query_texts=query_texts)
 ids = results['metadatas'][0]
 print("Re-queried on chromadb, results: ",ids)
 paper_id = [id['paper_id'] for id in ids]
 paper_info = utils.sqldb.query_id(paper_id)
 print(paper_info)
 records = [] # get title (2), author (3), link (6)
 result_string = ""
 if paper_info:
 for i in range(len(paper_info)):
 result_string += "Title: {}, Author: {}, Link: {}".format(paper_info[i][2],paper_info[i][3],paper_info[i][6])
 records.append([paper_info[i][2],paper_info[i][3],paper_info[i][6]])
 return result_string, records
 else:
 return "Information not found", "Information not found"
 # invoke llm and return result

 if "title" in keys:
 title = args['title']
 authors = utils.authors_str_to_list(args['author'])
 paper_info = utils.sqldb.query(title = title,author = authors)
 # if query not found then go crawl brh
 # print(paper_info)

 if len(paper_info) == 0:
 new_records = utils.crawl_exact_paper(title=title,author=authors)
 print("Got new records: ",len(new_records))
 if type(new_records) == str:
 # print(new_records)
 return "Error occured, information not found", "Information not found"
 utils.db.add(new_records)
 utils.sqldb.add(new_records)
 paper_info = utils.sqldb.query(title = title,author = authors)
 print("Re-queried on chromadb, results: ",paper_info)
 # -------------------------------------
 records = [] # get title (2), author (3), link (6)
 result_string = ""
 for i in range(len(paper_info)):
 result_string += "Title: {}, Author: {}, Link: {}".format(paper_info[i][2],paper_info[i][3],paper_info[i][6])
 records.append([paper_info[i][2],paper_info[i][3],paper_info[i][6]])
 # process results:
 if len(result_string) == 0:
 return "Information not found", "Information not found"
 return result_string, records
 # invoke llm and return result

In [6]:
def full_chain_single_question(input_prompt):
 try:
 first_prompt = extract_keyword_prompt(input_prompt)
 temp_answer = model.generate_content(first_prompt).text

 args = json.loads(utils.trimming(temp_answer))
 contexts, results = response(args)
 if not results:
 print(contexts)
 else:
 output_prompt = make_answer_prompt(input_prompt,contexts)
 answer = model.generate_content(output_prompt).text
 return temp_answer, answer
 except Exception as e:
 print(e)
 return temp_answer, "Error occured: " + str(e)

In [27]:
# test response, second step
input_prompt = "Can you suggest some key papers on model predictive control for nonlinear systems, and are there any recent reviews on the application of control theory to robotics?"
args = "{\n \"keywords\": [\"Model predictive control\", \"Nonlinear systems\", \"Robotics\", \"Control theory\"],\n \"description\": \"Model predictive control (MPC) is a control algorithm that uses a model of the system to predict future behavior and optimize the control inputs. MPC is particularly well-suited for nonlinear systems, as it can handle the complex dynamics of these systems. In recent years, MPC has been increasingly applied to robotics, as it can improve the performance and safety of robotic systems. Control theory is a branch of mathematics that deals with the analysis and design of control systems. Control theory has been applied to a wide range of problems in robotics, including motion planning, trajectory tracking, and force control.\"\n}"
args = json.loads(args)
contexts, results = response(args)
if not results:
 # direct answer
 print(contexts)
else:
 output_prompt = make_answer_prompt(input_prompt,contexts)
 answer = model.generate_content(output_prompt).text
 print(answer)

[('1903.04824v1', 'computer science', 'Proceedings of the Fifth International Conference on Cloud and Robotics (ICCR2018)', ' Huaxi, Zhang, Jacques Malenfan', '2019-03-12', '2019-03-12', 'http://arxiv.org/pdf/1903.04824v1'), ('1709.07597v1', 'economics', 'Inverse Reinforcement Learning with Conditional Choice Probabilities', 'Mohit Sharma, Kris M. Kitani, Joachim Groege', '2017-09-22', '2017-09-22', 'http://arxiv.org/pdf/1709.07597v1')]
Sure, here are some key papers on model predictive control for nonlinear systems:

* **Nonlinear Model Predictive Control: A Survey** by Garcia, P.D., Prett, D.M., and Morari, M. (1989)
* **Model Predictive Control for Nonlinear Systems** by Camacho, E.F. and Bordons, C. (1999)
* **Nonlinear Model Predictive Control** by Rawlings, J.B. and Mayne, D.Q. (2009)

As for recent reviews on the application of control theory to robotics, here are a few:

* **Control of Robot Manipulators** by Spong, M.W., Hutchinson, S., and Vidyasagar, M. (2006)
* **Robotics: 

In [7]:
with open("test_questions.txt","r") as infile:
 data = json.load(infile)
print(data[0])

test_log = []
for i,t in enumerate(data):
 print(i)
 temp_answer, answer = full_chain_single_question(t['question'])
 test_log.append({'desired topic':t['desired'],
 'question':t['question'],
 'first answer':temp_answer,
 'final answer':answer})
with open("test_results.json","w") as outfile:
 json.dump(test_log,outfile)

{'desired': 'Natural Language Processing (Computer Science)', 'question': 'What are some recent papers on deep learning architectures for text classification, and can you recommend any surveys or reviews on the topic?'}
0
[('1808.08121v1', 'computer science', 'An Improvement of Data Classification Using Random Multimodel Deep Learning (RMDL)', 'Mojtaba Heidarysafa, Kamran Kowsari, Donald E. Brown, Kiana Jafari Meimandi, Laura E. Barne', '2018-08-23', '2018-08-23', 'http://arxiv.org/pdf/1808.08121v1'), ('1904.08067v5', 'computer science', 'Text Classification Algorithms: A Survey', 'Kamran Kowsari, Kiana Jafari Meimandi, Mojtaba Heidarysafa, Sanjana Mendu, Laura E. Barnes, Donald E. Brow', '2020-05-20', '2019-04-17', 'http://arxiv.org/pdf/1904.08067v5'), ('2202.09144v1', 'computer science', 'Modelling the semantics of text in complex document layouts using graph transformer networks', 'Thomas Roland Barillot, Jacob Saks, Polena Lilyanova, Edward Torgas, Yachen Hu, Yuanqing Liu, Varun Ba