# Import necessary libraries import streamlit as st from transformers import AutoModelForSeq2SeqLM, AutoTokenizer import PyPDF2 import pytesseract from PIL import Image # Load StarCoder model and tokenizer model_name = "starcoder-plus" # Replace with the correct model name model = AutoModelForSeq2SeqLM.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name) # Streamlit app st.title("Conversational AI Chatbot") # User input for code-related question user_input = st.text_input("Enter your code-related question:") if user_input: # Tokenize and generate code inputs = tokenizer(user_input, return_tensors="pt") outputs = model.generate(**inputs) code = tokenizer.decode(outputs[0]) st.code(code) # Display the generated code # User input for PDF reading pdf_file = st.file_uploader("Upload a PDF file:", type=["pdf"]) if pdf_file: pdf_reader = PyPDF2.PdfFileReader(pdf_file) pdf_text = "" for page_num in range(pdf_reader.numPages): page = pdf_reader.getPage(page_num) pdf_text += page.extractText() st.text(pdf_text) # Display the extracted text # User input for OCR (image to text) image_file = st.file_uploader("Upload an image for OCR:", type=["png", "jpg", "jpeg"]) if image_file: image = Image.open(image_file) ocr_text = pytesseract.image_to_string(image) st.text(ocr_text) # Display the extracted text