video

File size: 17,922 Bytes

# Text to Vedio
# import torch
# from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
# from diffusers.utils import export_to_video
# import streamlit as st
# import numpy as np 

# # Title and User Input
# st.title("Text-to-Video with Streamlit")
# prompt = st.text_input("Enter your text prompt:", "Spiderman is surfing")

# # Button to trigger generation
# if st.button("Generate Video"):  
#     # Ensure you have 'accelerate' version 0.17.0 or higher 
#     import accelerate 
#     if accelerate.__version__ < "0.17.0":
#         st.warning("Please upgrade 'accelerate' to version 0.17.0 or higher for CPU offloading.")
#     else:
#         with st.spinner("Generating video..."):
#             # Define the pipeline for image generation
#             pipe = DiffusionPipeline.from_pretrained("damo-vilab/text-to-video-ms-1.7b", 
#                                                  torch_dtype=torch.float16,                                                 variant="fp16",                                                 device="cpu") 
#             pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
#             pipe.enable_model_cpu_offload()  

#             # Generate video frames
#             video_frames = pipe(prompt, num_inference_steps=25).frames  

#             # Create dummy frames for testing (replace with actual manipulation later)
#             dummy_frames = [np.ones((256, 256, 3), dtype=np.uint8) for _ in range(20)]  

#             # Export to video
#             video_path = export_to_video(dummy_frames)

#             # Display the video in the Streamlit app  
#             st.video(video_path) 





# Text to 3D

# import streamlit as st
# import torch
# from diffusers import ShapEPipeline
# from diffusers.utils import export_to_gif
# from PIL import Image
# import numpy as np
# # import PyTorch

# # Model loading (Ideally done once at the start for efficiency)
# ckpt_id = "openai/shap-e"  

# @st.cache_resource  # Caches the model for faster subsequent runs

# def process_image_for_pil(image): 
#     if isinstance(image, torch.Tensor):   
#         # Your PyTorch conversion logic here (with correct indentation)
#     # elif isinstance(image, np.ndarray):   
#         # Your Numpy conversion logic here (with correct indentation)
#         image_array = image.astype('uint8')  # Assuming 8-bit conversion is needed
#         return Image.fromarray(image_array) 
#     else: 
#         raise TypeError("Unsupported image format. Please provide conversion logic.")

# test_image = np.random.randint(0, 256, size=(256, 256, 3), dtype=np.uint8)  # Placeholder image  
# result = process_image_for_pil(test_image)





# def should_resize(image):  # Add 'image' as an argument
#     """Determines whether to resize images (replace with your own logic)"""
#     if image.width > 512 or image.height > 512: 
#         return True   
#     else:
#         return False  
# def load_model():    
#     return ShapEPipeline.from_pretrained(ckpt_id).to("cuda")  

# pipe = load_model()

# # App Title
# st.title("Shark 3D Image Generator")

# # User Inputs
# prompt = st.text_input("Enter your prompt:", "a shark")
# guidance_scale = st.slider("Guidance Scale", 0.0, 20.0, 15.0, step=0.5)

# # Generate and Display Images 
# if st.button("Generate"):
#     with st.spinner("Generating images..."):
#         images = pipe(prompt, guidance_scale=guidance_scale, num_inference_steps=64).images

#         # ... (Process images for PIL conversion)

#         # Resize Images (Optional)
#         pil_images = []  # Modified to store resized images if needed
#         for image in images:
#             processed_image = process_image_for_pil(image)
#             if should_resize(processed_image):  # Pass image to should_resize
#                 resized_image = processed_image.resize((256, 256))
#                 pil_images.append(resized_image) 
#             else:
#                 pil_images.append(processed_image)  # Append without resizing

#         gif_path = export_to_gif(pil_images, "shark_3d.gif")
#         st.image(pil_images[0]) 
#         st.success("GIF saved as shark_3d.gif")



# # Visual Qa
# import streamlit as st
# import requests
# from PIL import Image
# from transformers import BlipProcessor, BlipForQuestionAnswering

# # Model Loading
# processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-capfilt-large")
# model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-capfilt-large")

# # Streamlit App Structure
# st.title("Visual Question Answering ")

# def get_image():
#     img_url = st.text_input("Enter Image URL", value='https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg')
#     if img_url:
#         raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')  
#         st.image(raw_image)
#         return raw_image

# def process_vqa(image, question):
#     if image and question:
#         inputs = processor(image, question, return_tensors="pt")
#         output = model.generate(**inputs)
#         answer = processor.decode(output[0], skip_special_tokens=True)
#         st.write("Answer:", answer)

# # User Input
# image = get_image()
# question = st.text_input("Ask your question about the image:")

# # Process Question and Generate Answer
# process_vqa(image, question) 

# # Chat with pdf
# import gradio as gr
# import streamlit as st
# from langchain.embeddings.openai import OpenAIEmbeddings
# from langchain.text_splitter import CharacterTextSplitter
# from langchain.vectorstores import Chroma
# from langchain.chains import ConversationalRetrievalChain
# from langchain.chat_models import ChatOpenAI
# from langchain.document_loaders import PyPDFLoader
# import os
# import fitz
# from PIL import Image

# # Global variables
# COUNT, N = 0, 0
# chat_history = []
# chain = None  # Initialize chain as None

# # Function to set the OpenAI API key
# def set_apikey(api_key):
#     os.environ['OPENAI_API_KEY'] = api_key
#     return disable_box  

# # Function to enable the API key input box
# def enable_api_box():
#     return enable_box

# # Function to add text to the chat history
# def add_text(history, text):
#     if not text:
#         raise gr.Error('Enter text')
#     history = history + [(text, '')]
#     return history

# # Function to process the PDF file and create a conversation chain
# def process_file(file):
#     global chain
#     if 'OPENAI_API_KEY' not in os.environ:
#         raise gr.Error('Upload your OpenAI API key')

#     # Replace with your actual PDF processing logic
#     loader = PyPDFLoader(file.name)
#     documents = loader.load()
#     embeddings = OpenAIEmbeddings()
#     pdfsearch = Chroma.from_documents(documents, embeddings)

#     chain = ConversationalRetrievalChain.from_llm(ChatOpenAI(temperature=0.3),
#                                     retriever=pdfsearch.as_retriever(search_kwargs={"k": 1}),
#                                     return_source_documents=True)
#     return chain

# # Function to generate a response based on the chat history and query
# def generate_response(history, query, pdf_upload):
#     global COUNT, N, chat_history, chain
#     if not pdf_upload:
#         raise gr.Error(message='Upload a PDF')

#     if COUNT == 0:
#         chain = process_file(pdf_upload)
#         COUNT += 1

#     # Replace with your LangChain logic to generate a response 
#     result = chain({"question": query, 'chat_history': chat_history}, return_only_outputs=True)  
#     chat_history += [(query, result["answer"])]
#     N = list(result['source_documents'][0])[1][1]['page']  # Adjust as needed

#     for char in result['answer']:
#         history[-1][-1] += char  
#     return history, ''  

# # Function to render a specific page of a PDF file as an image
# def render_file(file):
#     global N
#     doc = fitz.open(file.name)
#     page = doc[N]
#     pix = page.get_pixmap(matrix=fitz.Matrix(300/72, 300/72)) 
#     image = Image.frombytes('RGB', [pix.width, pix.height], pix.samples)
#     return image

# # Function to render initial content from the PDF
# def render_first(pdf_file): 
#     # Replace with logic to process the PDF and generate an initial image
#     image = Image.new('RGB', (600, 400), color = 'white') # Placeholder
#     return image

# # Streamlit & Gradio Interface

# st.title("PDF-Powered Chatbot") 

# with st.container():      
#   gr.Markdown("""     
#   <style>       
#   .image-container { height: 680px; }     
#   </style>     
#   """)    

#   with gr.Blocks() as demo:
#     pdf_upload1 = gr.UploadButton("📁 Upload PDF 1", file_types=[".pdf"])  # Define pdf_upload1

#     # ... (rest of your interface creation)

#     txt = gr.Textbox(label="Enter your query", placeholder="Ask a question...")     
#     submit_btn = gr.Button('Submit')

#     @submit_btn.click()
#     def on_submit():
#       add_text(chatbot, txt)
#       generate_response(chatbot, txt, pdf_upload1)  # Use pdf_upload1 here
#       render_file(pdf_upload1)  # Use pdf_upload1 here

# if __name__ == "__main__":
#     gr.Interface(         
#         fn=generate_response,
#         inputs=[
#             "file",  # Define pdf_upload1
#             "text",  # Define chatbot output
#             "text"   # Define txt
#         ],
#         outputs=[
#             "image",  # Define show_img
#             "text",   # Define chatbot output
#             "text"    # Define txt
#         ],   
#         title="PDF-Powered Chatbot"     
#     ).launch(server_port=8888)

# # Text to audio
# from transformers import AutoProcessor , BarkModel
# import scipy

# processor = AutoProcessor.from_pretrained("suno/bark")
# model = BarkModel.from_pretrained("suno/bark")
# model.to("cuda")

# def generate_audio(text,preset, output):
#     inputs = processor(text, voice_preset=preset)
#     for k , v in inputs.items():
#         inputs[k] = v.to("cuda")
#         audio_array = model.generate(**inputs)
#         audio_array = audio_array.cuda().numpy().squeeze()
#         scipy.io.wavfile.write(output,rate= sample_rate,data= audio_array)


# generate_audio(

#       text= "HI, welcome to our app hope you enjoy our app ,Thankyou for using our app YOURS Sincerely, Cosmo",
#       preset= "v2/en_speaker_3",
#       output= "output.wav", 


# )


# Fbgroup


# from selenium import webdriver
# from selenium.webdriver.common.by import By
# from selenium.webdriver.common.keys import Keys
# import time
# from time import sleep
# from selenium.webdriver.chrome.options import Options
# from selenium.webdriver.chrome.service import Service
# from webdriver_manager.chrome import ChromeDriverManager
# import streamlit as st

# import setuptools

# with open("README.md", encoding="utf-8") as readme_file:
#     readme = readme_file.read()

# setuptools.setup(
#     name='webdriver_manager',
#     python_requires=">=3.7",
#     long_description=readme,
#     long_description_content_type="text/markdown",
#     packages=setuptools.find_packages(include=['webdriver_manager*']),
#     include_package_data=True,
#     version='4.0.1',
#     description='Library provides the way to automatically manage drivers for different browsers',
#     author='Sergey Pirogov',
#     author_email='automationremarks@gmail.com',
#     url='https://github.com/SergeyPirogov/webdriver_manager',
#     keywords=['testing', 'selenium', 'driver', 'test automation'],
#     classifiers=[
#         'License :: OSI Approved :: Apache Software License',
#         'Intended Audience :: Information Technology',
#         'Intended Audience :: Developers',
#         'Programming Language :: Python :: 3.7',
#         'Programming Language :: Python :: 3.8',
#         'Programming Language :: Python :: 3.9',
#         'Programming Language :: Python :: 3.10',
#         'Programming Language :: Python :: 3.11',
#         'Topic :: Software Development :: '
#         'Libraries :: Python Modules',
#         'Operating System :: Microsoft :: Windows',
#         'Operating System :: POSIX',
#         'Operating System :: Unix',
#         'Operating System :: MacOS',
#     ],
#     install_requires=[
#         'requests',
#         'python-dotenv',
#         'packaging'
#     ],
#     package_data={
#         "webdriver_manager": ["py.typed"]
#     },
# )


# def main():
#     # Input fields
#     st.title("Facebook Group Poster")
#     account = st.text_input("Facebook Account Email", "sample@gmail.com")
#     password = st.text_input("Facebook Password", "sample", type="password")
#     groups_links_list = st.text_area("Facebook Group URLs (one per line)",
#                                      "https://www.facebook.com/groups/sample1\nhttps://www.facebook.com/groups/sample2")
#     message = st.text_area("Post Message", "Checkout this amazing script...")
#     images_list = st.file_uploader("Upload Images", accept_multiple_files=True)

#     if st.button('Post to Facebook Groups'):
#         if not account or not password or not groups_links_list or not message or not images_list:
#             st.error("Please fill all the fields.")
#         else:
#             chrome_options = Options()
#             prefs = {"profile.default_content_setting_values.notifications": 2}
#             chrome_options.add_experimental_option("prefs", prefs)

#             with st.spinner("Posting to Facebook..."): 
#                 driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)
#                 driver.get('https://www.facebook.com')

#                 # Login Logic (Replace XPaths if needed)
#                 emailelement = driver.find_element(By.XPATH,'//*[@id="email"]')
#                 emailelement.send_keys(account)
#                 passelement = driver.find_element(By.XPATH,'//*[@id="pass"]')
#                 passelement.send_keys(password)
#                 loginelement = driver.find_element(By.XPATH,'//*[@id="loginbutton"]')
#                 loginelement.click()

#                 # Posting Logic
#                 groups_links = groups_links_list.splitlines() 

#                 for group in groups_links:
#                     driver.get(group)
#                     time.sleep(2)

#                     try:
#                         driver.find_element(By.XPATH,'//*[@label="Start Discussion"]').click()
#                         post_box=driver.find_element_by_css_selector("[name='xhpc_message_text']")
#                     except:
#                         post_box=driver.find_element_by_css_selector("[name='xhpc_message_text']")
#                     post_box.send_keys(message)

#                     time.sleep(1)

#                     # Image Upload Logic (Adapt based on Streamlit setup)
#                     for image_file in images_list:
#                         photo_element = driver.find_element(By.XPATH,'//input[@type="file"]')
#                         image_path = image_file.name  # Placeholder! Adjust how you get the path
#                         photo_element.send_keys(image_path) 
#                         time.sleep(1)

#                     # time.sleep(6) 
#                     # post_button = driver.find_element_by_xpath("//*[@data-testid='react-composer-post-button']") 

#                     # Handle image uploads (assuming one upload field per image)
#                     for image_file in images_list:
#                         photo_element = driver.find_element(By.XPATH,'//input[@type="file"]')
#                         photo_element.send_keys(image_file.name)  # Or image_file.path, adjust as needed
#                         time.sleep(1)

#                     time.sleep(6) 
#                     post_button = driver.find_element_by_xpath("//*[@data-testid='react-composer-post-button']") 
#                     # ... (Rest of your logic to click the post button) 

#                 driver.close()

# if __name__ == '__main__':
#     main()

 # text to Image

import streamlit as st
import torch
from diffusers import StableDiffusionXLPipeline, UNet2DConditionModel, EulerDiscreteScheduler
from huggingface_hub import hf_hub_download
from safetensors.torch import load_file

# Model Path/Repo Information
base = "stabilityai/stable-diffusion-xl-base-1.0"
repo = "ByteDance/SDXL-Lightning"
ckpt = "sdxl_lightning_4step_unet.safetensors" 

# Load model (Executed only once for efficiency)
@st.cache_resource
def load_sdxl_pipeline():
    unet = UNet2DConditionModel.from_config(base, subfolder="unet").to("cuda", torch.float16)
    unet.load_state_dict(load_file(hf_hub_download(repo, ckpt), device="cuda"))
    pipe = StableDiffusionXLPipeline.from_pretrained(base, unet=unet, torch_dtype=torch.float16, variant="fp16").to("cuda")
    pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
    return pipe

# Streamlit UI
st.title("Image Generation")
prompt = st.text_input("Enter your image prompt:")

if st.button("Generate Image"):
    if not prompt:
        st.warning("Please enter a prompt.")
    else:
        pipe = load_sdxl_pipeline()  # Load the pipeline from cache
        with torch.no_grad():
            image = pipe(prompt).images[0] 

        st.image(image)


# text generation
# import streamlit as st
# from transformers import AutoTokenizer, AutoModelForCausalLM

# st.title("Text Generation with Bloom")

# tokenizer = AutoTokenizer.from_pretrained("bigscience/bloom")
# model = AutoModelForCausalLM.from_pretrained("bigscience/bloom")

# user_input = st.text_area("Enter your prompt:", height=100)

# if st.button('Generate Text'):
#     inputs = tokenizer(user_input, return_tensors="pt")
#     outputs = model.generate(**inputs, max_length=100)  # Adjust max_length as needed
#     generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) 
#     st.write("Generated Text:")
#     st.write(generated_text)