Spaces:

longquan
/

cobol_analysis

Paused

cobol_analysis / cobol_analysis_with_azure.py

qiulongquan

Add application file

1e88633 10 months ago

11.6 kB

	import os
	import openai
	import json
	import tiktoken
	import gradio as gr
	import time

	"""
	使用azure openai作为GPT模型
	进行cobol代码分析
	UI采用gradio框架
	UI使用chatbot进行交互
	已经实现chatbot的交互问答以及历史记录显示和历史内容保存
	chatbot上面不显示prompt内容
	实现稳定输出和创造性输出的切换

	TODO:
	1.还需要一个 stop 生成
	2.流式stream输出
	3.few-shot learning sample
	"""

	# 通过max_response_tokens控制回复的长度
	max_response_tokens = 8000
	history_show = []
	temperature=0.5
	top_p=0.95

	# Load config values
	with open('config.json') as config_file:
	config_details = json.load(config_file)

	# Setting up the deployment name 这个地方不是模型名字，是Azure OpenAI的部署名字
	chatgpt_model_name = config_details['CHATGPT_MODEL']
	openai.api_type = "azure"
	# The API key for your Azure OpenAI resource.
	openai.api_key = config_details['OPENAI_API_KEY']
	# The base URL for your Azure OpenAI resource. e.g. "https://<your resource name>.openai.azure.com"
	openai.api_base = config_details['OPENAI_API_BASE']
	# Currently Chat Completions API have the following versions available: 2023-03-15-preview
	openai.api_version = config_details['OPENAI_API_VERSION']

	def radio_change(choice):
	global temperature,top_p
	if choice=="安定出力":
	temperature=0.5
	top_p=0.95
	elif choice=="積極出力":
	temperature=0.7
	top_p=0.95

	# Defining a function to send the prompt to the ChatGPT model
	# More info : https://learn.microsoft.com/en-us/azure/cognitive-services/openai/how-to/chatgpt?pivots=programming-language-chat-completions
	def cobol_analysis_process(history, messages, model_name, max_response_tokens=500):
	print("temperature=",temperature,"top_p=",top_p)
	response = openai.ChatCompletion.create(
	engine=model_name,
	messages=messages,
	temperature=temperature,
	top_p=top_p,
	# temperature=0.7,
	# top_p=0.95,
	max_tokens=max_response_tokens,
	frequency_penalty=0,
	presence_penalty=0,
	# stop="非非",
	stream=True,
	)
	# print("response",response)
	print("===========history",history)
	history[-1][1] = ""
	history_show[-1][1] = ""
	for response_ in response:
	for choice in response_.choices:
	history[-1][1] += choice.delta.content if "content" in choice.delta else ""
	history_show[-1][1] += choice.delta.content if "content" in choice.delta else ""

	# Defining a function to print out the conversation in a readable format
	# def print_conversation(messages):
	# for message in messages:
	# print(f"[{message['role'].upper()}]")
	# print(message['content'])
	# print()

	def preprocess(history):
	# print("history",history)
	base_system_message = "あなたは優秀なCOBOLコード分析者です。あなたの仕事は要件に基づいてCOBOLコードを分析し、結果を出力することです。結果は日本語で出力する必要があります。"
	messages=[{"role": "system", "content": base_system_message}]
	for content in history:
	messages.append({"role": "user", "content": content[0]})
	if content[1] is not None:
	messages.append({"role": "assistant", "content": content[1]})
	print("messages",messages)
	# response = cobol_analysis_process(messages, chatgpt_model_name, max_response_tokens)
	# history[-1] = (history[-1][0], response)
	cobol_analysis_process(history,messages, chatgpt_model_name, max_response_tokens)
	# print_conversation(messages)

	# 点击【提出】按钮后调用greet函数进行处理
	def greet(history,user_input,analysis_options):
	# print("==========analysis_options=============",analysis_options)
	analysis_content=""
	if analysis_options=="全体概要入出力 COPY句サブルーチン解析":
	analysis_content="""
	1.概述一下这个程序主要做了什么,全体程序的数据流程以及每个模块的主要内容。全体概要进行说明并使用table表格输出内容。\n
	2.程序中所有的入力参数和出力参数,要求使用table表格分别表示,要求每一个对象要有简要的介绍。要再次确认不能有遗漏项目。\n
	3.程序中所有的COPY句(COPY文),总结成list表格显示。要求每一个对象要有简要的介绍。要再次确认不能有遗漏项目,所有的COPY句都要总结并在list中输出。\n
	4.全体程序中使用的子程序,包括CALL呼叫的子程序,调用外部文件的子程序。这些子程序总结成list表格显示。要求每一个对象要有简要的介绍。要再次确认不能有遗漏项目。
	"""
	elif analysis_options=="データ定義分析":
	analysis_content="""
	1.要求分析每一行COBOL代码,不能遗漏任何数据定义行,分析内容使用table表格输出
	2.数据定义内容输出格式[等级][项目名][数据类型][长度][初期値]
	3.PIC Xデータ型は文字型，PIC 9データ型は数値型
	"""
	elif analysis_options=="IF ELSE END解析":
	analysis_content="""
	要求：根据下面的要求以及分析例子分析上面的COBOL代码并使用table表格输出结果
	1. 分析每一行cobol代码
	2. 分析WHILE语句中条件内容
	3. 全部IF ... OR ... ELSE ... END条件语句中条件，变量名，变量数值或者字段内容变化，MOVEコマンド内容，DISPLAY显示的内容，VCALL调用的子程序内容，PERFORM调用内容，RETURN返回内容。这些内容要使用table表格简要表示(tabel列内容包括 [行番号],[コマンド/条件],[層級],[変数名],[変数の変化],[MOVEコマンド内容],[DISPLAY内容],[CALL内容],[PERFORM内容],[RETURN内容])
	4. [コマンド/条件]列需要把条件语句的全部内容都写入，条件语句结束标志END和ELSE需要单独一行加入[コマンド/条件]列，嵌套多层IF条件语句中的每一个ELSE，END都不能省略。
	5. 程序中注释的语句不需要分析，不需要输出结果
	6. 如果有嵌套IF ... ELSE ... IF ... ELSE ... END ... END 需要table中明确表示层级关系
	7. 如果是同级别IF ... ELSE ... END table中层级关系数字相同
	8. 如果有嵌套 WHILE 需要table中明确表示层级关系
	9. CASE OF END语句不要表示[層級]数值
	10. 如果是同级别WHILE, table中层级关系数字相同
	11. RETURN: S 表示程序终了，在[RETURN内容]列输出[プログラム終了]
	12. DISPLAY语句需要把全部内容显示在[DISPLAY内容]列，不能遗漏内容
	例：DISPLAY "FMクブンエラー4 HINCODE = " L-HINCODE
	输出 '"FMクブンエラー4 HINCODE = " L-HINCODE'
	13. [変数の変化]列需要明确表示变数的变化状况。
	例：IF: NB-CNT > 0
	输出 NB-CNTが0より大きい場合
	例：IF: L-FM = "1"
	输出 L-FMが1となる場合
	"""
	elif analysis_options=="TABLE COND ACT END解析":
	analysis_content="""
	要求分析每一行cobol代码,结果使用table表格显示
	如果有嵌套TABLE COND ACT END需要table中明确表示层级关系
	同一个TABLE COND ACT END中所有的层级都相同
	全部TABLE COND ACT END语句中条件，变量名，判断条件，判断结果。这些内容要使用table表格简要表示(tabel列内容包括 [行番号],[条件],[層級],[変数名],[判断条件],[判断结果])
	例：
	005070 TABLE: MSKSJ010
	005080 COND: MSKSJ010
	005090 NHINW-KBN2 (9) = "1" :Y,Y,N,N,N: MSKSJ010
	005130 ACT: MSKSJ010
	005230 NSKD1-KBN12 := "3" :-,-,-,-,X: MSKSJ010
	005240 END: MSKSJ010

	[行番号] 005090
	[条件] NHINW-KBN2 (9) = "1"
	[層級] 1
	[変数名] NHINW-KBN2 (9)
	[判断条件/変数値変化] "1" かどうかのチェック
	[判断結果] :Y,Y,N,N,N:
	"""
	elif analysis_options=="コード解析":
	analysis_content="""分析上面每一行cobol代码，不能有遗漏的代码行，使用table输出结果。table表格的列名[行番号 COBOLコードコード解析結果]
	sample 1:
	clang0 DS_START_PROC SECTION.
	行番号：clang0
	COBOLコード：DS_START_PROC SECTION
	コード解析結果：DS_START_PROCというセクションの開始を宣言しています。
	sample 2:
	001120 UNTIL: X = MTOSM2W-KOSU
	行番号：001120
	COBOLコード：UNTIL: X = MTOSM2W-KOSU
	コード解析結果：この行は、XがMTOSM2W-KOSUと等しいまでのループを示しています。
	"""
	elif analysis_options=="カスタマイズprompt":
	analysis_content=""
	history_show.append([analysis_options+"\n\n"+user_input, None])
	if user_input != "":
	user_input = user_input+"\n\n"+analysis_content
	else:
	user_input = ""
	print("user_input==========",user_input)
	history.append([user_input, None])
	# print("history", history)
	preprocess(history)
	return history_show, gr.Textbox(value="", interactive=False)

	def bot(history_show):
	yield history_show

	def print_like_dislike(x: gr.LikeData):
	print(x.index, x.value, x.liked)

	# 页面内容输出控制
	with gr.Blocks() as demo:
	gr.Markdown("""
	<h1 style="text-align: center;">COBOL解析</h1>
	""") # 设置标题可以使用markdown语法
	chatbot = gr.Chatbot(
	[],
	elem_id="chatbot",
	bubble_full_width=False,
	show_copy_button=True,
	avatar_images=(None, (os.path.join(os.path.dirname(__file__), "1.png"))),
	)
	analysis_options = gr.Dropdown(['全体概要入出力 COPY句サブルーチン解析', 'データ定義分析', 'IF ELSE END解析', 'TABLE COND ACT END解析', 'コード解析', 'カスタマイズprompt'], label="解析タイプ選択")
	radio=gr.Radio(["安定出力", "積極出力"], label="ランダム性制御", info="「安定出力」を採用するとモデルはより多くの決定論的な応答を生成します。「積極出力」を採用するとより多くの創造的な応答が生じます。")
	user_input = gr.Textbox(scale=4,show_label=False,placeholder="user input", container=False,lines=1) # 设置输入框
	# 使用gr.ClearButton来清空chatbot记录的内容
	clear1 = gr.ClearButton([user_input],value="入力コンテンツクリア")
	clear2 = gr.ClearButton([user_input, chatbot],value="Chatコンテンツクリア")
	radio.change(fn=radio_change, inputs=radio)
	txt_msg = user_input.submit(greet, [chatbot,user_input,analysis_options],[chatbot,user_input], queue=False).then(
	bot, chatbot, chatbot, api_name="bot_response"
	)
	txt_msg.then(lambda: gr.Textbox(interactive=True), None, [user_input], queue=False)

	chatbot.like(print_like_dislike, None, None)


	demo.queue()
	if __name__ == "__main__":
	demo.launch(share=True)