File size: 4,528 Bytes
d5b5b3a
 
 
 
 
 
 
 
 
 
 
 
dd4319f
d5b5b3a
dd4319f
d5b5b3a
 
dd4319f
d5b5b3a
dd4319f
d5b5b3a
 
 
 
 
 
 
 
 
 
 
 
 
fffc505
d5b5b3a
 
 
041332b
d5b5b3a
348d568
d5b5b3a
 
123e041
041332b
d5b5b3a
 
 
dd4319f
d5b5b3a
348d568
 
d5b5b3a
348d568
 
 
 
fffc505
348d568
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fffc505
348d568
 
 
fffc505
348d568
 
 
 
d5b5b3a
348d568
041332b
348d568
 
 
 
 
 
 
 
 
d5b5b3a
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import streamlit as st
from PIL import Image
from transformers import AutoProcessor, AutoModelForCausalLM, AutoConfig

import subprocess
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)

# Function to load the model and processor
@st.cache_resource
def load_model_and_processor():
    config = AutoConfig.from_pretrained("microsoft/Florence-2-base-ft", trust_remote_code=True)
    config.vision_config.model_type = "davit"
    model = AutoModelForCausalLM.from_pretrained("sujet-ai/Lutece-Vision-Base", config=config, trust_remote_code=True).eval()
    processor = AutoProcessor.from_pretrained("sujet-ai/Lutece-Vision-Base", config=config, trust_remote_code=True)
    return model, processor

# Function to generate answer
def generate_answer(model, processor, image, prompt):
    task = "<FinanceQA>"
    inputs = processor(text=prompt, images=image, return_tensors="pt")
    generated_ids = model.generate(
        input_ids=inputs["input_ids"],
        pixel_values=inputs["pixel_values"],
        max_new_tokens=1024,
        do_sample=False,
        num_beams=3,
    )
    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
    parsed_answer = processor.post_process_generation(generated_text, task=task, image_size=(image.width, image.height))
    return parsed_answer[task]

# Streamlit app
def main():
    st.set_page_config(page_title="Lutece-Vision-Base Demo", page_icon="πŸ—Ό", layout="wide", initial_sidebar_state="expanded")

    # Title and description
    st.title("πŸ—Ό Lutece-Vision-Base Demo")
    st.markdown("Please keep in mind that inference might be slower since this Huggingface space is running on CPU only.")

    # Sidebar with SujetAI watermark
    st.sidebar.image("sujetAI.svg", use_column_width=True)
    st.sidebar.markdown("---")
    st.sidebar.markdown("Sujet AI is on a noble mission to democratize investment opportunities by leveraging built-in models and cutting-edge technologies. Committed to open-sourcing its technology, Sujet AI aims to contribute to the research and development communities, ultimately serving the greater good of humanity.")
    st.sidebar.markdown("---")
    st.sidebar.markdown("Our website : [sujet.ai](https://sujet.ai)")

    # Load model and processor
    model, processor = load_model_and_processor()

    # Two-column layout
    col1, col2 = st.columns(2)

    with col1:
        st.subheader("πŸ“„ Financial Document")
        # Option to use example image or upload new one
        use_example = st.checkbox("Use example image", value=True)
        
        if use_example:
            image = Image.open("test_image.png").convert('RGB')
            st.image(image, caption="Example Document", use_column_width=True)
        else:
            uploaded_file = st.file_uploader("Upload a financial document", type=["png", "jpg", "jpeg"])
            if uploaded_file is not None:
                image = Image.open(uploaded_file).convert('RGB')
                st.image(image, caption="Uploaded Document", use_column_width=True)
            else:
                image = None

    with col2:
        st.subheader("❓ Ask a Question")
        # Predefined questions
        example_questions = [
            "What's the current expenses amount?",
            "When was this document produced?",
            "Who is this document addressed to?",
            "What is the amount that's circled?",
            "What's the project's identifier?"
        ]
        
        selected_question = st.selectbox("Select a question or type your own:", 
                                         [""] + example_questions,
                                         index=0)
        
        if selected_question:
            question = selected_question
        else:
            question = st.text_input("Type your question here:")

        submit_button = st.button("πŸ” Generate Answer")

        # Answer section
        if submit_button and question and image is not None:
            with st.spinner("Generating answer..."):
                answer = generate_answer(model, processor, image, question)
            st.success(f"## πŸ’‘ {answer}")
        elif submit_button and image is None:
            st.warning("Please upload an image or use the example image before asking a question.")
        elif submit_button and not question:
            st.warning("Please enter a question or select one from the examples.")

if __name__ == "__main__":
    main()