File size: 3,485 Bytes
1197cde 46823f8 0b69770 fa5de89 e794787 dab1590 0dda6bc 90757b8 0dda6bc 46823f8 1197cde 46823f8 1197cde 46823f8 1197cde 46823f8 1197cde 46823f8 1197cde 46823f8 1197cde 4853459 46823f8 dab1590 46823f8 dab1590 1197cde dab1590 1197cde dab1590 1197cde 46823f8 1197cde 4853459 90757b8 dab1590 c655ea8 2553939 dab1590 46823f8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain
import os
import fitz
import re
import gradio as gr
import time
def preprocess(text):
text = text.replace('\n', '')
return text
def pdf_to_text(path, start_page=1, end_page=None):
doc = fitz.open(path)
total_pages = doc.page_count
if end_page is None:
end_page = total_pages
text_list = []
for i in range(start_page - 1, end_page):
text = doc.load_page(i).get_text("text")
text_list.append(text)
doc.close()
return text_list
def law_split(path,name):
text_list=pdf_to_text(path)
text= ''.join(text_list)
text_split=re.split(r'第.+条\s',text)[1:]
for index, text in enumerate(text_split):
text=preprocess(text)
text_split[index]=f'《中华人民共和国{name}》 第{index+1}条 '+text
return text_split
def folder_read(path):
text_list=[]
paths=os.listdir(path)
for file in paths:
name=file.split('.')[0]
suffix=file.split('.')[-1]
if suffix=='pdf':
text_list+=law_split(f'{path}/{file}',name)
return text_list
text_list=folder_read('laws')
embeddings = OpenAIEmbeddings()
vectordb = Chroma.from_texts(texts=text_list, embedding=embeddings)
llm = OpenAI(temperature=0.5,max_tokens=1024)
prompt='''
请根据给出的法律条文回答问题,给出适当的法律建议。回答时要说出你引用的法律条文是第几条,并说出引用的每一条是哪部法律中的。
引用的法律条文不要超过两条,回答尽量简明扼要
如果问题与搜索结果无关,就仅回答"该问题与青少年法律无关"即可。
'''
def generate_answer(question):
start_time = time.time()
most_relevant_texts = vectordb.max_marginal_relevance_search(question, k=5)
print(most_relevant_texts)
chain = load_qa_chain(llm)
answer = chain.run(input_documents=most_relevant_texts, question=question+prompt)
run_time = time.time() - start_time
return (answer,
'\n'.join([t.page_content for t in most_relevant_texts]),
run_time
)
def ask_api(question):
if question.strip() == '':
return '[ERROR]: 未输入问题'
return generate_answer(question)
with gr.Blocks() as demo:
gr.Markdown(
"""
# 青少年法律科普问答
本bot旨在根据中华人民共和国的法律回答有关青少年的问题,目前囊括的法律有
《未成年人保护法》
《义务教育法》
《预防未成年人犯罪法》
《妇女儿童权益保护法》
""")
with gr.Column():
text_input = gr.Textbox(label='请输入与青少年法律相关的问题')
text_button = gr.Button("提交")
gr.Examples(
[["未成年遭受网络欺凌该怎么办?"],['年满多少岁的儿童应当接受义务教育?'],['遇到离家出走的未成年应该如何处理?']],
[text_input],
label='示例问题'
)
text_output = [gr.Textbox(label='参考回答'),
gr.Textbox(label='相关法律原文'),
gr.Number(label="运行时长(s)")]
text_button.click(generate_answer, inputs=text_input, outputs=text_output)
demo.launch()
|