import xlrd import os import pandas as pd from langchain_core.prompts import PromptTemplate from langchain_openai import ChatOpenAI from langchain_core.output_parsers import StrOutputParser import requests from flask import request import json import xlwings as xw llm = ChatOpenAI( openai_api_key="123", base_url="http://100.100.100.18:8000/v1", temperature=0.1, model_name="/home/kemove/skz/models/Qwen1.5-32b-Chat", max_tokens=None, ) def read_excel_from_url(url, columns_to_keep=None, header_row=None): # 处理http格式的excel文件 response = requests.get(url) temp_file_path = "temp_file.xls" with open(temp_file_path, "wb") as f: f.write(response.content) try: # 读取Excel文件 if columns_to_keep is not None: data = pd.read_excel(temp_file_path, header=header_row, usecols=columns_to_keep) else: data = pd.read_excel(temp_file_path, header=header_row) except Exception as e: print(f"Error reading Excel file: {e}") return None # 删除临时文件 os.remove(temp_file_path) pd.options.display.max_rows = None pd.options.display.max_columns = None pd.options.display.width = 2000 return data def convert_to_json(data_str): # 把输出转换成json格式 # 分割字符串,按照换行符分割成多行 lines = data_str.strip().split('\n') # 初始化结果列表 result = [] # 遍历每一行,提取数字和值 for line in lines: # 分割每一行,按照逗号分割成两部分 parts = line.split(',') if len(parts) == 2: # 提取文本中的数字和@后面的数字 text_part, value_part = parts key = value_part.split('@')[0] value = value_part.split('@')[1] result.append([key, value]) # 检查是否为 ['期末数', '年初数'] for pair in result: # 检查元素是否等于['期末数', '年初数'] if pair == ['期末数', '年初数']: # 如果是,则用['0', '0']替换当前元素 pair[:] = ['0', '0'] # 检查是否有NAN for i, pair in enumerate(result): # 遍历每个元素内的子元素 for j, element in enumerate(pair): # 检查子元素是否为字符串且等于'NaN' if isinstance(element, str) and element.lower() == 'nan': # 如果是,则替换为'0' result[i][j] = '--' print('result:',result) # 将结果转换为JSON格式 json_data = json.dumps({'data': result}, ensure_ascii=False) return json_data # 使用函数 def balance(): # 处理资产负债表 balance_path = request.json.get('url') print(balance_path,'balance') exl_balance = read_excel_from_url(balance_path) print('exl_balance',exl_balance) with open("read_balance_statement.txt", "r", encoding="utf-8") as file: template_content = file.read() template = PromptTemplate(input_variables=['balance'], template=template_content) output_parser = StrOutputParser() chain = template | llm | output_parser output = chain.invoke({'balance': exl_balance}) print(output,'ballllllllllllllllllllllllllllllllllance') if output == '错误': output = '表格不准确,请重新传' else: output = convert_to_json(output) if len(json.loads(output).get('data')) != 58: output = '表格不准确,请重新传' else: output = output print(type(output)) return output def cash(): #处理现金流量表 cash_path = request.json.get('url') print(cash_path,'cash') exl_cash = read_excel_from_url(cash_path) with open("pdf_reader/read_cash_statement.txt", "r", encoding="utf-8") as file: template_content = file.read() template = PromptTemplate(input_variables=['cash'], template=template_content) output_parser = StrOutputParser() chain = template | llm | output_parser output = chain.invoke({'cash': exl_cash}) print(output,'casssssssssssh') if output == '错误': output = '表格不准确,请重新传' else: output = convert_to_json(output) if len(json.loads(output).get('data')) != 25: output = '表格不准确,请重新传' else: output = output return output def income(): # 处理利润表 income_path = request.json.get('url') print(income_path,'income') exl_income = read_excel_from_url(income_path) with open("read_income_statement.txt", "r", encoding="utf-8") as file: template_content = file.read() template = PromptTemplate(input_variables=['income'], template=template_content) output_parser = StrOutputParser() chain = template | llm | output_parser output = chain.invoke({'income': exl_income}) print(output,'incooooooooooooooooooooooooome') if output == '错误': output = '表格不准确,请重新传' else: output = convert_to_json(output) if len(json.loads(output).get('data')) != 31: output = '表格不准确,请重新传' else: output = output return output