File size: 6,824 Bytes
eb437a2
 
 
696a6a0
eb437a2
 
 
696a6a0
 
 
 
 
eb437a2
 
 
14eeed2
 
eb437a2
6e9cefe
eb437a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e185f3f
b6db11b
3920d8c
eb437a2
 
 
b6db11b
eb437a2
 
 
e185f3f
b6db11b
eb437a2
b6db11b
e185f3f
 
eb437a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b6db11b
e185f3f
b6db11b
 
 
 
 
e185f3f
b6db11b
d6eea76
b6db11b
 
 
b008a57
b6db11b
 
eb437a2
 
 
e185f3f
eb437a2
 
 
e185f3f
eb437a2
 
b6db11b
a96927b
eb437a2
 
 
e185f3f
eb437a2
 
 
b6db11b
eb437a2
 
 
 
 
 
 
b6db11b
eb437a2
e185f3f
eb437a2
7d0a72e
cf9972b
eb437a2
cf9972b
eb437a2
 
cf9972b
 
eb437a2
 
 
 
 
 
cf9972b
 
 
ce45f50
 
eb437a2
 
cf9972b
 
 
eb437a2
 
 
 
 
 
 
 
 
 
 
 
cf9972b
eb437a2
 
 
3218f60
 
32e57d3
3218f60
 
32e57d3
5c7c2b0
32e57d3
 
3218f60
 
eb437a2
cf9972b
eb437a2
 
 
 
 
 
 
 
cf9972b
eb437a2
cf9972b
41c9265
61d19c5
cf9972b
61d19c5
 
b008a57
cf9972b
 
 
 
 
239cc24
7d0a72e
 
239cc24
cf9972b
239cc24
cf9972b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
import os


# βœ… Load secrets from Hugging Face Spaces environment
MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY")
LLAMA_CLOUD_API_KEY = os.getenv("LLAMA_CLOUD_API_KEY")

# βœ… Verify that keys are loaded (prints only in development mode)
if MISTRAL_API_KEY is None or LLAMA_CLOUD_API_KEY is None:
    print("🚨 ERROR: Missing API keys. Please set them in Hugging Face Secrets.")


import nest_asyncio
nest_asyncio.apply()

# imports
from llama_index.embeddings.mistralai import MistralAIEmbedding
from llama_index.core import Settings
Settings.embed_model = MistralAIEmbedding(model_name="mistral-embed")
from llama_index.core import VectorStoreIndex
from llama_parse import LlamaParse

from llama_index.llms.mistralai import MistralAI
llm = MistralAI(model="mistral-large-latest", api_key=MISTRAL_API_KEY)

from llama_index.core.workflow import (
    StartEvent,
    StopEvent,
    Workflow,
    step,
    Event,
    Context
)

class QueryEvent(Event):
    query: str

from llama_index.core import StorageContext, load_index_from_storage

import os
import hashlib
from pathlib import Path


class RAGWorkflow(Workflow):
    storage_dir = "./storage"
    hash_file = "./last_resume_hash.txt"
    llm: MistralAI
    query_engine: VectorStoreIndex

    def compute_file_hash(self, file_path):
        """Compute SHA256 hash of a file from its path."""
        hasher = hashlib.sha256()
        with open(file_path, "rb") as f:  # Read file in binary mode
            while chunk := f.read(8192):
                hasher.update(chunk)
        return hasher.hexdigest()

    def get_last_stored_hash(self):
        """Retrieve the last stored resume hash, if available."""
        if os.path.exists(self.hash_file):
            with open(self.hash_file, "r") as f:
                return f.read().strip()
        return None

    def update_stored_hash(self, new_hash):
        """Update the stored resume hash after processing a new file."""
        with open(self.hash_file, "w") as f:
            f.write(new_hash)

    @step
    async def set_up(self, ctx: Context, ev: StartEvent) -> QueryEvent:
        if not ev.resume_file:
            raise ValueError("❌ No resume file provided")

        # βœ… Extract the correct file path
        if isinstance(ev.resume_file, gr.utils.NamedString):
            file_path = ev.resume_file.name
        elif isinstance(ev.resume_file, str) and os.path.exists(ev.resume_file):
            file_path = ev.resume_file
        else:
            raise ValueError("⚠️ Invalid file format received!")

        print(f"βœ… Resume File Path: {file_path}")

        self.llm = MistralAI(model="mistral-large-latest")

        # βœ… Compute hash of the uploaded resume file
        new_resume_hash = self.compute_file_hash(file_path)
        last_stored_hash = self.get_last_stored_hash()

        if os.path.exists(self.storage_dir) and last_stored_hash == new_resume_hash:
            # Resume hasn't changed; load the existing index
            storage_context = StorageContext.from_defaults(persist_dir=self.storage_dir)
            index = load_index_from_storage(storage_context)
        else:
            # Resume is new; process and update storage
            documents = LlamaParse(
                result_type="markdown",
                content_guideline_instruction="Extract structured bullet points from the resume."
            ).load_data(file_path, extra_info={"file_name": os.path.basename(file_path)})

            index = VectorStoreIndex.from_documents(
                documents,
                embed_model=Settings.embed_model  # Use Hugging Face embeddings
            )
            index.storage_context.persist(persist_dir=self.storage_dir)

            # βœ… Update stored hash
            self.update_stored_hash(new_resume_hash)

        self.query_engine = index.as_query_engine(llm=self.llm, similarity_top_k=5)
        return QueryEvent(query=ev.query)

    @step
    async def ask_question(self, ctx: Context, ev: QueryEvent) -> StopEvent:
        response = self.query_engine.query(f"This is a question about the resume: {ev.query}")
        return StopEvent(result=response.response)

import gradio as gr
import asyncio
import os

# βœ… Ensure you have your RAGWorkflow properly initialized
w = RAGWorkflow(timeout=120, verbose=False)

async def process_resume(file, query):
    """Handles Gradio file upload and query processing (Async)."""
    if file is None:
        return "❌ Please upload a resume."
    if not query:
        return "❌ Please enter a question."

    try:
        # βœ… Use the actual file path from Gradio
        file_path = file.name  
        
        # βœ… Debugging information
        print(f"βœ… File uploaded: {file_path}")
        print(f"βœ… File size: {os.path.getsize(file_path)} bytes")

        # βœ… Run the RAG workflow with the actual file path
        result = await w.run(
            resume_file=file_path,  # βœ… Pass file path, not BytesIO
            query=query
        )

        print("βœ… Result:", result)  # Debug output
        return result if result else "⚠️ No relevant information found."

    except Exception as e:
        print("🚨 Error:", str(e))
        return f"🚨 Error occurred: {str(e)}"

# βœ… Function to clear inputs
def clear_inputs():
    return None, "", ""

# βœ… Create Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("# πŸ“„ RAGResume")
    gr.Markdown("""
    **Upload a resume and ask questions about it!**            
    """)
    gr.Markdown("""
    1. Upload a resume in PDF format.
    2. Enter a question about the resume (example: where does the applicant currently work?).
    3. Click on the "Submit" button to get the response.
    4. Click on the "Clear" button to reset the inputs.            
    """)

    with gr.Row():
        file_input = gr.File(label="πŸ“„ Upload Resume (PDF)")
        query_input = gr.Textbox(label="πŸ’¬ Enter your question")
    output = gr.Textbox(label="πŸ“ Response")

    with gr.Row():
        submit_btn = gr.Button("πŸš€ Submit")
        clear_btn = gr.Button("🧹 Clear")

    submit_btn.click(process_resume, inputs=[file_input, query_input], outputs=output)
    clear_btn.click(clear_inputs, outputs=[file_input, query_input, output])

# βœ… Fix for Colab & Hugging Face Spaces
try:
    import nest_asyncio
    nest_asyncio.apply()  # βœ… Fix for Jupyter/Colab Notebooks
except ImportError:
    pass

# βœ… Launch Gradio with proper Async Handling
def run_demo():
    demo.queue()  # Enables async functions
    demo.launch(share=True)  # βœ… Public link enabled

if __name__ == "__main__":
    loop = asyncio.get_event_loop()

    try:
        loop.run_until_complete(run_demo())  # βœ… Runs correctly in scripts
    except RuntimeError:
        asyncio.run(run_demo())  # βœ… Fallback for runtime errors