Spaces:

wolf1997
/

Image_story_generator

Running

App Files Files Community

Image_story_generator / app.py

wolf1997

Update app.py

1d2a699 verified 1 day ago

raw

history blame contribute delete

3.61 kB


	from PIL import Image
	#importing models
	from transformers import BlipProcessor, BlipForConditionalGeneration
	from langchain_google_genai import ChatGoogleGenerativeAI
	from dotenv import load_dotenv
	import os
	from langchain.output_parsers import StructuredOutputParser, ResponseSchema
	import gradio as gr
	from diffusers import DiffusionPipeline,StableDiffusion3Pipeline
	from huggingface_hub import login

	load_dotenv()

	Hugging_face_token=os.getenv('HFToken')

	login(Hugging_face_token)


	# loading image captionning model
	processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
	model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")

	# Set the model name for our LLMs.
	GEMINI_MODEL = "gemini-2.0-flash"

	# Store the API key in a variable.
	GEMINI_API_KEY = os.getenv("google_api_key")

	class stable_dif:
	def __init__(self,sizes):
	self.sizes=sizes

	def model(self):
	if self.sizes == 'medium':
	pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3.5-medium")
	elif self.sizes == 'large':
	pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3.5-large-turbo")
	elif self.sizes == 'small':
	pipe = DiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
	return pipe

	stable=stable_dif('small')
	pipe=stable.model()



	def image_story_generator(image,requirement,style):

	raw_image = Image.open(image)

	# get caption from image
	inputs = processor(raw_image, return_tensors="pt")
	out = model.generate(**inputs, min_length=20)
	model_prompt=processor.decode(out[0], skip_special_tokens=True)

	#load gemnini for creating story
	llm = ChatGoogleGenerativeAI(google_api_key=GEMINI_API_KEY, model=GEMINI_MODEL, temperature=0.3)

	query =f' Write a 4 chapters story based on {model_prompt} and\
	that fits the following requirements: {requirement}. Give a detailed\
	description of the charaters appearences.'

	result = llm.invoke(query)
	story= result.content.replace('\n',' ')

	# create promts for image gen from story
	image_prompt_llm=ChatGoogleGenerativeAI(google_api_key=GEMINI_API_KEY, model=GEMINI_MODEL, temperature=0.3)

	# create shemas to format output
	schemas=[
	ResponseSchema(name='prompt 1', description='the prompt'),
	ResponseSchema(name='prompt 2', description='the prompt'),
	ResponseSchema(name='prompt 3', description='the prompt'),
	ResponseSchema(name='prompt 4', description='the prompt')
	]

	# initialize parser for output
	parser=StructuredOutputParser.from_response_schemas(schemas)
	instructions=parser.get_format_instructions()

	query = f' Based on this story: {story}. Create 4 prompts for stable diffusion that tells of a maximum of 77 tokens\
	what happens in each chapters. Describe the characters everytime their name is mentioned. Each image should be created in the same exact style {style}.\
	'+ '\n\n'+instructions

	result=image_prompt_llm.invoke(query)
	image_prompts = parser.parse(result.content)

	# iterate through the prompts and generate new images
	images=[]
	for i in image_prompts.keys():

	image = pipe(image_prompts[i]).images[0]
	images.append(image)


	return images, story

	# gradio
	interface = gr.Interface(
	fn=image_story_generator,
	inputs=[gr.Image(type='filepath'),gr.Textbox('enter story requirements'), gr.Textbox('pick a style for the images')],

	outputs=[gr.Gallery(),
	gr.Textbox('story')
	],
	description='Upload an image to start the story generation process.'
	)

	interface.launch()