File size: 4,449 Bytes
5cc9ba2
90cd969
 
0b4e109
90cd969
 
 
 
 
 
 
 
 
 
 
 
 
9504053
 
 
 
 
443db8e
 
 
 
90cd969
d74d42e
90cd969
 
 
 
 
 
 
 
 
 
 
 
dd974ce
 
 
 
 
549a722
 
 
 
 
 
 
dd974ce
 
 
 
 
 
549a722
dd974ce
90cd969
 
 
 
 
25352c2
90cd969
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95cbd86
90cd969
 
 
 
 
 
443db8e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4f70ac9
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
from openai import AzureOpenAI
from PyPDF2 import PdfReader
import os
import gradio as gr

class HeadingsExtractor:

    def __init__(self):
        """
        Extract headings from a given paragraph using OpenAI's GPT-3.

        Args:
            contract_page (str): The paragraph from which headings need to be extracted.

        Returns:
            str: Extracted headings.
        """
        # openai.api_type = os.getenv['api_type']
        # openai.api_base = os.getenv['api_base']
        # openai.api_version = os.getenv['api_version']
        # openai.api_key = os.getenv['api_key']
        pass
        
    def file_output_fnn(self,file_path):
        file_path = file_path.name
        return file_path        

    def extract_headings(self,contract_page: str) -> str:

        """
        Extract headings from a given paragraph using OpenAI's GPT-3.

        Args:
            contract_page (str): The paragraph from which headings need to be extracted.

        Returns:
            str: Extracted headings.
        """
        try:
            #get response from openai api
            client = AzureOpenAI(api_key=os.getenv("AZURE_OPENAI_KEY"),  
                                api_version="2023-07-01-preview",
                                azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
                                 )
                        
            conversation = [
                            {"role": "system", "content": "You are a helpful assistant."},
                            {"role": "user", "content": f"""Extract Headings from given paragraph do not generate jsu extract the headings from paragraph.
                                    ```paragraph :{contract_page}```"""}
                        ]
            
            # Call OpenAI GPT-3.5-turbo
            chat_completion = client.chat.completions.create(
                model = "ChatGPT",
                messages = conversation,
                max_tokens=1000,
                temperature=0
            )
            response = chat_completion.choices[0].message.content
            return response

        except Exception as e:
            # If an error occurs during the key-value extraction process, log the error
            print(f"Error while extracting headings: {str(e)}")

    def extract_text(self,pdf_file_path: str) -> str:

        """
        Extract text from a PDF document and extract headings from each page.

        Args:
            pdf_file_path (str): Path to the PDF file to extract text from.

        Returns:
            str: Extracted headings from the PDF document.
        """
        try:
            # Open the multi-page PDF using PdfReader

            print("path",pdf_file_path)
            pdf = PdfReader(pdf_file_path.name)
            headings = ''
            # Extract text from each page and pass it to the process_text function
            for page_number in range(len(pdf.pages)):
                # Extract text from the page
                page = pdf.pages[page_number]
                text = page.extract_text()

                # Pass the text to the process_text function for further processing
                result = self.extract_headings(text)
                headings = headings + result
            return headings

        except Exception as e:
            # If an error occurs during the key-value extraction process, log the error
            print(f"Error while extracting text from PDF: {str(e)}")

    def gradio_interface(self):
        with gr.Blocks(css="style.css",theme='xiaobaiyuan/theme_brief') as demo:    
            with gr.Row(elem_id = "col-container",scale=0.80):
              with gr.Column(elem_id = "col-container",scale=0.80):
                file1 = gr.File(label="File",elem_classes="filenameshow")
            
              with gr.Column(elem_id = "col-container",scale=0.20):  
                upload_button1 = gr.UploadButton(
                    "Browse File",file_types=[".txt", ".pdf", ".doc", ".docx",".json",".csv"],
                    elem_classes="uploadbutton")
                headings_btn = gr.Button("Get Headings",elem_classes="uploadbutton")
            
            with gr.Row(elem_id = "col-container",scale=0.60):    
                headings = gr.Textbox(label = "Headings")

        upload_button1.upload(self.file_output_fnn,upload_button1,file1)
        headings_btn.click(self.extract_text,upload_button1,headings)