ginipick commited on
Commit
904c0a5
·
verified ·
1 Parent(s): 2645581

Update app-backup.py

Browse files
Files changed (1) hide show
  1. app-backup.py +130 -564
app-backup.py CHANGED
@@ -1,49 +1,38 @@
1
  import random
2
- import os
3
- import uuid
4
- from datetime import datetime
5
  import gradio as gr
6
  import numpy as np
7
  import spaces
8
- import torch
9
  from diffusers import DiffusionPipeline
10
  from PIL import Image
11
 
12
- # Apply more comprehensive patches to Gradio's utility functions
 
13
  import gradio_client.utils
14
- import types
15
-
16
- # Patch 1: Fix the _json_schema_to_python_type function
17
  original_json_schema = gradio_client.utils._json_schema_to_python_type
18
 
19
  def patched_json_schema(schema, defs=None):
20
- # Handle boolean values directly
21
  if isinstance(schema, bool):
22
  return "bool"
23
 
24
- # Handle cases where 'additionalProperties' is a boolean
25
  try:
26
  if "additionalProperties" in schema and isinstance(schema["additionalProperties"], bool):
27
  schema["additionalProperties"] = {"type": "any"}
28
  except (TypeError, KeyError):
29
  pass
30
 
31
- # Call the original function
32
  try:
33
  return original_json_schema(schema, defs)
34
- except Exception as e:
35
- # Fallback to a safe value when the schema can't be parsed
36
  return "any"
37
 
38
- # Replace the original function with our patched version
39
  gradio_client.utils._json_schema_to_python_type = patched_json_schema
 
40
 
41
- # Create permanent storage directory
42
- SAVE_DIR = "saved_images" # Gradio will handle the persistence
43
- if not os.path.exists(SAVE_DIR):
44
- os.makedirs(SAVE_DIR, exist_ok=True)
45
-
46
- # Safe settings for model loading
47
  device = "cuda" if torch.cuda.is_available() else "cpu"
48
  repo_id = "black-forest-labs/FLUX.1-dev"
49
  adapter_id = "openfree/flux-chatgpt-ghibli-lora"
@@ -53,15 +42,15 @@ def load_model_with_retry(max_retries=5):
53
  try:
54
  print(f"Loading model attempt {attempt+1}/{max_retries}...")
55
  pipeline = DiffusionPipeline.from_pretrained(
56
- repo_id,
57
  torch_dtype=torch.bfloat16,
58
  use_safetensors=True,
59
  resume_download=True
60
  )
61
- print("Model loaded successfully, loading LoRA weights...")
62
  pipeline.load_lora_weights(adapter_id)
63
  pipeline = pipeline.to(device)
64
- print("Pipeline ready!")
65
  return pipeline
66
  except Exception as e:
67
  if attempt < max_retries - 1:
@@ -72,43 +61,12 @@ def load_model_with_retry(max_retries=5):
72
  else:
73
  raise Exception(f"Failed to load model after {max_retries} attempts: {e}")
74
 
75
- # Load the model
76
  pipeline = load_model_with_retry()
77
 
 
78
  MAX_SEED = np.iinfo(np.int32).max
79
  MAX_IMAGE_SIZE = 1024
80
 
81
- def save_generated_image(image, prompt):
82
- # Generate unique filename with timestamp
83
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
84
- unique_id = str(uuid.uuid4())[:8]
85
- filename = f"{timestamp}_{unique_id}.png"
86
- filepath = os.path.join(SAVE_DIR, filename)
87
-
88
- # Save the image
89
- image.save(filepath)
90
-
91
- # Save metadata
92
- metadata_file = os.path.join(SAVE_DIR, "metadata.txt")
93
- with open(metadata_file, "a", encoding="utf-8") as f:
94
- f.write(f"{filename}|{prompt}|{timestamp}\n")
95
-
96
- return filepath
97
-
98
- def load_generated_images():
99
- if not os.path.exists(SAVE_DIR):
100
- return []
101
-
102
- # Load all images from the directory
103
- image_files = [
104
- os.path.join(SAVE_DIR, f)
105
- for f in os.listdir(SAVE_DIR)
106
- if f.endswith(('.png', '.jpg', '.jpeg', '.webp'))
107
- ]
108
- # Sort by creation time (newest first)
109
- image_files.sort(key=lambda x: os.path.getctime(x), reverse=True)
110
- return image_files
111
-
112
  @spaces.GPU(duration=120)
113
  def inference(
114
  prompt: str,
@@ -119,12 +77,12 @@ def inference(
119
  guidance_scale: float,
120
  num_inference_steps: int,
121
  lora_scale: float,
122
- progress: gr.Progress = gr.Progress(track_tqdm=True),
123
  ):
 
124
  if randomize_seed:
125
  seed = random.randint(0, MAX_SEED)
126
  generator = torch.Generator(device=device).manual_seed(seed)
127
-
128
  try:
129
  image = pipeline(
130
  prompt=prompt,
@@ -135,271 +93,14 @@ def inference(
135
  generator=generator,
136
  joint_attention_kwargs={"scale": lora_scale},
137
  ).images[0]
138
-
139
- # Save the generated image
140
- filepath = save_generated_image(image, prompt)
141
-
142
- # Return the image, seed, and updated gallery
143
- return image, seed, load_generated_images()
144
  except Exception as e:
145
  print(f"Error during inference: {e}")
 
146
  error_img = Image.new('RGB', (width, height), color='red')
147
- return error_img, seed, load_generated_images()
148
-
149
- examples = [
150
- "Ghibli style futuristic stormtrooper with glossy white armor and a sleek helmet, standing heroically on a lush alien planet, vibrant flowers blooming around, soft sunlight illuminating the scene, a gentle breeze rustling the leaves. The armor reflects the pink and purple hues of the alien sunset, creating an ethereal glow around the figure. [trigger]",
151
- "Ghibli style young mechanic girl in a floating workshop, surrounded by hovering tools and glowing mechanical parts, her blue overalls covered in oil stains, tinkering with a semi-transparent robot companion. Magical sparks fly as she works, while floating islands with waterfalls drift past her open workshop window. [trigger]",
152
- "Ghibli style ancient forest guardian robot, covered in moss and flowering vines, sitting peacefully in a crystal-clear lake. Its gentle eyes glow with soft blue light, while bioluminescent dragonflies dance around its weathered metal frame. Ancient tech symbols on its surface pulse with a gentle rhythm. [trigger]",
153
- "Ghibli style sky whale transport ship, its metallic skin adorned with traditional Japanese patterns, gliding through cotton candy clouds at sunrise. Small floating gardens hang from its sides, where workers in futuristic kimonos tend to glowing plants. Rainbow auroras shimmer in the background. [trigger]",
154
- "Ghibli style cyber-shrine maiden with flowing holographic robes, performing a ritual dance among floating lanterns and digital cherry blossoms. Her traditional headdress emits soft light patterns, while spirit-like AI constructs swirl around her in elegant patterns. The scene is set in a modern shrine with both ancient wood and sleek chrome elements. [trigger]",
155
- "Ghibli style robot farmer tending to floating rice paddies in the sky, wearing a traditional straw hat with advanced sensors. Its gentle movements create ripples in the water as it plants glowing rice seedlings. Flying fish leap between the terraced fields, leaving trails of sparkles in their wake, while future Tokyo's spires gleam in the distance. [trigger]"
156
- ]
157
-
158
- # Enhanced CSS for a more visually refined UI
159
- css = """
160
- :root {
161
- --primary-color: #6a92cc;
162
- --primary-hover: #557ab8;
163
- --secondary-color: #f4c062;
164
- --background-color: #f7f9fc;
165
- --panel-background: #ffffff;
166
- --text-color: #333333;
167
- --border-radius: 12px;
168
- --shadow: 0 4px 12px rgba(0,0,0,0.08);
169
- --font-main: 'Poppins', -apple-system, BlinkMacSystemFont, sans-serif;
170
- }
171
-
172
- body {
173
- background-color: var(--background-color);
174
- font-family: var(--font-main);
175
- }
176
-
177
- .gradio-container {
178
- margin: 0 auto;
179
- max-width: 1200px !important;
180
- }
181
-
182
- .main-header {
183
- text-align: center;
184
- padding: 2rem 1rem 1rem;
185
- background: linear-gradient(90deg, #6a92cc 0%, #8f7fc8 100%);
186
- color: white;
187
- margin-bottom: 2rem;
188
- border-radius: var(--border-radius);
189
- box-shadow: var(--shadow);
190
- }
191
-
192
- .main-header h1 {
193
- font-size: 2.5rem;
194
- margin-bottom: 0.5rem;
195
- font-weight: 700;
196
- text-shadow: 0 2px 4px rgba(0,0,0,0.2);
197
- }
198
-
199
- .main-header p {
200
- font-size: 1rem;
201
- margin-bottom: 0.5rem;
202
- opacity: 0.9;
203
- }
204
-
205
- .main-header a {
206
- color: var(--secondary-color);
207
- text-decoration: none;
208
- font-weight: 600;
209
- transition: all 0.2s ease;
210
- }
211
-
212
- .main-header a:hover {
213
- text-decoration: underline;
214
- opacity: 0.9;
215
- }
216
-
217
- .container {
218
- background-color: var(--panel-background);
219
- padding: 1.5rem;
220
- border-radius: var(--border-radius);
221
- box-shadow: var(--shadow);
222
- margin-bottom: 1.5rem;
223
- }
224
-
225
- button.primary {
226
- background: var(--primary-color) !important;
227
- border: none !important;
228
- color: white !important;
229
- padding: 10px 20px !important;
230
- border-radius: 8px !important;
231
- font-weight: 600 !important;
232
- box-shadow: 0 2px 5px rgba(0,0,0,0.1) !important;
233
- transition: all 0.2s ease !important;
234
- }
235
-
236
- button.primary:hover {
237
- background: var(--primary-hover) !important;
238
- transform: translateY(-2px) !important;
239
- box-shadow: 0 4px 8px rgba(0,0,0,0.15) !important;
240
- }
241
-
242
- button.secondary {
243
- background: white !important;
244
- border: 1px solid #ddd !important;
245
- color: var(--text-color) !important;
246
- padding: 10px 20px !important;
247
- border-radius: 8px !important;
248
- font-weight: 500 !important;
249
- box-shadow: 0 2px 5px rgba(0,0,0,0.05) !important;
250
- transition: all 0.2s ease !important;
251
- }
252
-
253
- button.secondary:hover {
254
- background: #f5f5f5 !important;
255
- transform: translateY(-2px) !important;
256
- }
257
-
258
- .gr-box {
259
- border-radius: var(--border-radius) !important;
260
- border: 1px solid #e0e0e0 !important;
261
- }
262
-
263
- .gr-panel {
264
- border-radius: var(--border-radius) !important;
265
- }
266
-
267
- .gr-input {
268
- border-radius: 8px !important;
269
- border: 1px solid #ddd !important;
270
- padding: 12px !important;
271
- }
272
-
273
- .gr-form {
274
- border-radius: var(--border-radius) !important;
275
- background-color: var(--panel-background) !important;
276
- }
277
-
278
- .gr-accordion {
279
- border-radius: var(--border-radius) !important;
280
- overflow: hidden !important;
281
- }
282
-
283
- .gr-button {
284
- border-radius: 8px !important;
285
- }
286
-
287
- .gallery-item {
288
- border-radius: var(--border-radius) !important;
289
- transition: all 0.3s ease !important;
290
- }
291
-
292
- .gallery-item:hover {
293
- transform: scale(1.02) !important;
294
- box-shadow: 0 6px 15px rgba(0,0,0,0.1) !important;
295
- }
296
-
297
- .tabs {
298
- border-radius: var(--border-radius) !important;
299
- overflow: hidden !important;
300
- }
301
 
302
- footer {
303
- display: none !important;
304
- }
305
-
306
- .settings-accordion legend span {
307
- font-weight: 600 !important;
308
- }
309
-
310
- .example-prompt {
311
- font-size: 0.9rem;
312
- color: #555;
313
- padding: 8px;
314
- background: #f5f7fa;
315
- border-radius: 6px;
316
- border-left: 3px solid var(--primary-color);
317
- margin-bottom: 8px;
318
- cursor: pointer;
319
- transition: all 0.2s;
320
- }
321
-
322
- .example-prompt:hover {
323
- background: #eef2f8;
324
- }
325
-
326
- .status-generating {
327
- color: #ffa200;
328
- font-weight: 500;
329
- display: flex;
330
- align-items: center;
331
- gap: 8px;
332
- }
333
-
334
- .status-generating::before {
335
- content: "";
336
- display: inline-block;
337
- width: 12px;
338
- height: 12px;
339
- border-radius: 50%;
340
- background-color: #ffa200;
341
- animation: pulse 1.5s infinite;
342
- }
343
-
344
- .status-complete {
345
- color: #00c853;
346
- font-weight: 500;
347
- display: flex;
348
- align-items: center;
349
- gap: 8px;
350
- }
351
-
352
- .status-complete::before {
353
- content: "";
354
- display: inline-block;
355
- width: 12px;
356
- height: 12px;
357
- border-radius: 50%;
358
- background-color: #00c853;
359
- }
360
-
361
- @keyframes pulse {
362
- 0% {
363
- opacity: 0.6;
364
- }
365
- 50% {
366
- opacity: 1;
367
- }
368
- 100% {
369
- opacity: 0.6;
370
- }
371
- }
372
-
373
- .gr-accordion-title {
374
- font-weight: 600 !important;
375
- color: var(--text-color) !important;
376
- }
377
-
378
- .tabs button {
379
- font-weight: 500 !important;
380
- padding: 10px 16px !important;
381
- }
382
-
383
- .tabs button.selected {
384
- font-weight: 600 !important;
385
- color: var(--primary-color) !important;
386
- background: rgba(106, 146, 204, 0.1) !important;
387
- }
388
-
389
- .gr-slider-container {
390
- padding: 10px 0 !important;
391
- }
392
-
393
- .gr-prose h3 {
394
- font-weight: 600 !important;
395
- color: var(--primary-color) !important;
396
- margin-bottom: 1rem !important;
397
- }
398
- """
399
-
400
- #######################
401
- # SECOND TAB CODE SETUP
402
- #######################
403
  import subprocess
404
  subprocess.run(
405
  'pip install flash-attn --no-build-isolation',
@@ -409,38 +110,44 @@ subprocess.run(
409
 
410
  from transformers import AutoProcessor, AutoModelForCausalLM
411
 
412
- # Load your Florence-2 models and processors
413
  models = {
414
- 'gokaygokay/Florence-2-Flux-Large': AutoModelForCausalLM.from_pretrained('gokaygokay/Florence-2-Flux-Large', trust_remote_code=True).eval(),
415
- 'gokaygokay/Florence-2-Flux': AutoModelForCausalLM.from_pretrained('gokaygokay/Florence-2-Flux', trust_remote_code=True).eval(),
 
 
 
 
416
  }
417
 
418
  processors = {
419
- 'gokaygokay/Florence-2-Flux-Large': AutoProcessor.from_pretrained('gokaygokay/Florence-2-Flux-Large', trust_remote_code=True),
420
- 'gokaygokay/Florence-2-Flux': AutoProcessor.from_pretrained('gokaygokay/Florence-2-Flux', trust_remote_code=True),
 
 
 
 
421
  }
422
 
423
- title = """<h1 align="center">Florence-2 Captioner for Flux Prompts</h1>
424
- <p><center>
425
- <a href="https://huggingface.co/gokaygokay/Florence-2-Flux-Large" target="_blank">[Florence-2 Flux Large]</a>
426
- <a href="https://huggingface.co/gokaygokay/Florence-2-Flux" target="_blank">[Florence-2 Flux Base]</a>
427
- </center></p>
428
- """
429
-
430
  @spaces.GPU
431
- def run_example(image, model_name='gokaygokay/Florence-2-Flux-Large'):
 
 
 
432
  from PIL import Image as PILImage
433
- image = PILImage.fromarray(image)
434
  task_prompt = "<DESCRIPTION>"
435
- prompt = task_prompt + "Describe this image in great detail."
436
 
 
 
437
  if image.mode != "RGB":
438
  image = image.convert("RGB")
439
 
440
  model = models[model_name]
441
  processor = processors[model_name]
442
 
443
- inputs = processor(text=prompt, images=image, return_tensors="pt")
444
  generated_ids = model.generate(
445
  input_ids=inputs["input_ids"],
446
  pixel_values=inputs["pixel_values"],
@@ -449,252 +156,111 @@ def run_example(image, model_name='gokaygokay/Florence-2-Flux-Large'):
449
  repetition_penalty=1.10,
450
  )
451
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
452
- parsed_answer = processor.post_process_generation(generated_text, task=task_prompt, image_size=(image.width, image.height))
 
 
453
  return parsed_answer["<DESCRIPTION>"]
454
 
 
 
 
 
 
 
455
 
456
- ####################################################
457
- # COMBINED DEMO WITH TWO TABS
458
- ####################################################
459
- with gr.Blocks(css=css, analytics_enabled=False, theme="soft") as demo:
460
- with gr.Column():
461
- # Custom header (Tab 1)
462
- gr.HTML('''
463
- <div class="main-header">
464
- <h1>✨ FLUX Ghibli LoRA Generator ✨</h1>
465
- <p>Community: <a href="https://discord.gg/openfreeai" target="_blank">https://discord.gg/openfreeai</a></p>
466
- </div>
467
- ''')
468
-
469
- with gr.Row():
470
- with gr.Column(scale=3):
471
- with gr.Group(elem_classes="container"):
472
  prompt = gr.Textbox(
473
- label="Enter your imagination",
474
- placeholder="Describe your Ghibli-style image here...",
475
  lines=3
476
  )
477
-
478
  with gr.Row():
479
- run_button = gr.Button("✨ Generate Image", elem_classes="primary")
480
- clear_button = gr.Button("Clear", elem_classes="secondary")
481
-
482
- with gr.Accordion("Advanced Settings", open=False, elem_classes="settings-accordion"):
483
- with gr.Row():
484
- seed = gr.Slider(
485
- label="Seed",
486
- minimum=0,
487
- maximum=MAX_SEED,
488
- step=1,
489
- value=42,
490
- )
491
- randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
492
-
493
- with gr.Row():
494
- width = gr.Slider(
495
- label="Width",
496
- minimum=256,
497
- maximum=MAX_IMAGE_SIZE,
498
- step=32,
499
- value=1024,
500
- )
501
- height = gr.Slider(
502
- label="Height",
503
- minimum=256,
504
- maximum=MAX_IMAGE_SIZE,
505
- step=32,
506
- value=768,
507
- )
508
-
509
- with gr.Row():
510
- guidance_scale = gr.Slider(
511
- label="Guidance scale",
512
- minimum=0.0,
513
- maximum=10.0,
514
- step=0.1,
515
- value=3.5,
516
- )
517
-
518
- with gr.Row():
519
- num_inference_steps = gr.Slider(
520
- label="Steps",
521
- minimum=1,
522
- maximum=50,
523
- step=1,
524
- value=30,
525
- )
526
- lora_scale = gr.Slider(
527
- label="LoRA scale",
528
- minimum=0.0,
529
- maximum=1.0,
530
- step=0.1,
531
- value=1.0,
532
- )
533
 
534
- with gr.Group(elem_classes="container"):
535
- gr.Markdown("### ✨ Example Prompts")
536
- # Create HTML for examples manually
537
- examples_html = '\n'.join([
538
- f'<div class="example-prompt">{example}</div>'
539
- for example in examples
540
- ])
541
- example_container = gr.HTML(examples_html)
542
-
543
- with gr.Column(scale=4):
544
- with gr.Group(elem_classes="container"):
545
- # Image result container with status indicator
546
- with gr.Group():
547
- generation_status = gr.HTML('<div class="status-complete">Ready to generate</div>')
548
- result = gr.Image(label="Generated Image", elem_id="result-image")
549
- seed_text = gr.Number(label="Used Seed", value=42)
550
-
551
- # TABS
552
- with gr.Tabs(elem_classes="tabs") as tabs:
553
- with gr.TabItem("Gallery"):
554
- with gr.Group(elem_classes="container"):
555
- gallery_header = gr.Markdown("### 🖼️ Your Generated Masterpieces")
556
- with gr.Row():
557
- refresh_btn = gr.Button("🔄 Refresh Gallery", elem_classes="secondary")
558
- generated_gallery = gr.Gallery(
559
- label="Generated Images",
560
- columns=3,
561
- value=load_generated_images(),
562
- height="500px",
563
- elem_classes="gallery-item"
564
- )
565
 
566
- #####################################
567
- # SECOND TAB: FLORENCE-2 CAPTIONER
568
- #####################################
569
  with gr.TabItem("Florence-2 Captioner"):
570
- # Use your custom HTML title
571
- gr.HTML(title)
572
 
573
  with gr.Row():
574
  with gr.Column():
575
- input_img = gr.Image(label="Input Picture")
576
  model_selector = gr.Dropdown(
577
  choices=list(models.keys()),
578
- label="Model",
579
- value='gokaygokay/Florence-2-Flux-Large'
580
  )
581
- submit_btn = gr.Button(value="Submit")
582
  with gr.Column():
583
- output_text = gr.Textbox(label="Output Text")
584
 
585
- gr.Examples(
586
- [["image1.jpg"],
587
- ["image2.jpg"],
588
- ["image3.png"],
589
- ["image5.jpg"]],
590
- inputs=[input_img, model_selector],
591
- outputs=[output_text],
592
- fn=run_example,
593
- label='Try captioning on below examples',
594
- cache_examples=True
595
- )
596
-
597
- submit_btn.click(run_example, [input_img, model_selector], [output_text])
598
- # END TABS
599
-
600
- # ----- EVENT HANDLERS for Tab 1 -----
601
- def refresh_gallery():
602
- return load_generated_images()
603
-
604
- def clear_output():
605
- return "", gr.update(value=None), seed, '<div class="status-complete">Ready to generate</div>'
606
-
607
- def before_generate():
608
- return '<div class="status-generating">Generating image...</div>'
609
-
610
- def after_generate(image, seed, gallery):
611
- return image, seed, gallery, '<div class="status-complete">Generation complete!</div>'
612
-
613
- refresh_btn.click(
614
- fn=refresh_gallery,
615
- inputs=None,
616
- outputs=generated_gallery,
617
- )
618
-
619
- clear_button.click(
620
- fn=clear_output,
621
- inputs=None,
622
- outputs=[prompt, result, seed_text, generation_status]
623
- )
624
-
625
- run_button.click(
626
- fn=before_generate,
627
- inputs=None,
628
- outputs=generation_status,
629
- ).then(
630
- fn=inference,
631
- inputs=[
632
- prompt,
633
- seed,
634
- randomize_seed,
635
- width,
636
- height,
637
- guidance_scale,
638
- num_inference_steps,
639
- lora_scale,
640
- ],
641
- outputs=[result, seed_text, generated_gallery],
642
- ).then(
643
- fn=after_generate,
644
- inputs=[result, seed_text, generated_gallery],
645
- outputs=[result, seed_text, generated_gallery, generation_status],
646
- )
647
-
648
- prompt.submit(
649
- fn=before_generate,
650
- inputs=None,
651
- outputs=generation_status,
652
- ).then(
653
- fn=inference,
654
- inputs=[
655
- prompt,
656
- seed,
657
- randomize_seed,
658
- width,
659
- height,
660
- guidance_scale,
661
- num_inference_steps,
662
- lora_scale,
663
- ],
664
- outputs=[result, seed_text, generated_gallery],
665
- ).then(
666
- fn=after_generate,
667
- inputs=[result, seed_text, generated_gallery],
668
- outputs=[result, seed_text, generated_gallery, generation_status],
669
- )
670
-
671
- # Custom JavaScript for handling example prompts
672
- gr.HTML("""
673
- <script>
674
- document.addEventListener('DOMContentLoaded', function() {
675
- // Add click handlers to example prompts
676
- setTimeout(() => {
677
- const examples = document.querySelectorAll('.example-prompt');
678
- const promptInput = document.querySelector('textarea');
679
-
680
- examples.forEach(example => {
681
- example.addEventListener('click', function() {
682
- promptInput.value = this.textContent.trim();
683
- // Trigger input event to update Gradio's state
684
- const event = new Event('input', { bubbles: true });
685
- promptInput.dispatchEvent(event);
686
- });
687
- });
688
- }, 1000); // Small delay to ensure elements are loaded
689
- });
690
- </script>
691
- """)
692
 
693
- # Launch with fallback options
694
- try:
695
- demo.queue(concurrency_count=1, max_size=20)
696
- demo.launch(debug=True, show_api=False)
697
- except Exception as e:
698
- print(f"Error during launch: {e}")
699
- print("Trying alternative launch configuration...")
700
- demo.launch(debug=True, show_api=False, share=False)
 
1
  import random
2
+ import torch
 
 
3
  import gradio as gr
4
  import numpy as np
5
  import spaces
 
6
  from diffusers import DiffusionPipeline
7
  from PIL import Image
8
 
9
+ # --- [Optional Patch] ---------------------------------------------------------
10
+ # This patch fixes potential JSON schema parsing issues in Gradio/Gradio-Client.
11
  import gradio_client.utils
 
 
 
12
  original_json_schema = gradio_client.utils._json_schema_to_python_type
13
 
14
  def patched_json_schema(schema, defs=None):
15
+ # Handle boolean schema directly
16
  if isinstance(schema, bool):
17
  return "bool"
18
 
19
+ # If 'additionalProperties' is a boolean, replace it with a generic type
20
  try:
21
  if "additionalProperties" in schema and isinstance(schema["additionalProperties"], bool):
22
  schema["additionalProperties"] = {"type": "any"}
23
  except (TypeError, KeyError):
24
  pass
25
 
26
+ # Attempt to parse normally; fallback to "any" on error
27
  try:
28
  return original_json_schema(schema, defs)
29
+ except Exception:
 
30
  return "any"
31
 
 
32
  gradio_client.utils._json_schema_to_python_type = patched_json_schema
33
+ # -----------------------------------------------------------------------------
34
 
35
+ # ----------------------------- Model Loading ----------------------------------
 
 
 
 
 
36
  device = "cuda" if torch.cuda.is_available() else "cpu"
37
  repo_id = "black-forest-labs/FLUX.1-dev"
38
  adapter_id = "openfree/flux-chatgpt-ghibli-lora"
 
42
  try:
43
  print(f"Loading model attempt {attempt+1}/{max_retries}...")
44
  pipeline = DiffusionPipeline.from_pretrained(
45
+ repo_id,
46
  torch_dtype=torch.bfloat16,
47
  use_safetensors=True,
48
  resume_download=True
49
  )
50
+ print("Base model loaded successfully, now loading LoRA weights...")
51
  pipeline.load_lora_weights(adapter_id)
52
  pipeline = pipeline.to(device)
53
+ print("Pipeline is ready!")
54
  return pipeline
55
  except Exception as e:
56
  if attempt < max_retries - 1:
 
61
  else:
62
  raise Exception(f"Failed to load model after {max_retries} attempts: {e}")
63
 
 
64
  pipeline = load_model_with_retry()
65
 
66
+ # ----------------------------- Inference Function -----------------------------
67
  MAX_SEED = np.iinfo(np.int32).max
68
  MAX_IMAGE_SIZE = 1024
69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  @spaces.GPU(duration=120)
71
  def inference(
72
  prompt: str,
 
77
  guidance_scale: float,
78
  num_inference_steps: int,
79
  lora_scale: float,
 
80
  ):
81
+ # If "randomize_seed" is selected, choose a random seed
82
  if randomize_seed:
83
  seed = random.randint(0, MAX_SEED)
84
  generator = torch.Generator(device=device).manual_seed(seed)
85
+
86
  try:
87
  image = pipeline(
88
  prompt=prompt,
 
93
  generator=generator,
94
  joint_attention_kwargs={"scale": lora_scale},
95
  ).images[0]
96
+ return image, seed
 
 
 
 
 
97
  except Exception as e:
98
  print(f"Error during inference: {e}")
99
+ # Return a red error image of the specified size and the used seed
100
  error_img = Image.new('RGB', (width, height), color='red')
101
+ return error_img, seed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
+ # ----------------------------- Florence-2 Captioner ---------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  import subprocess
105
  subprocess.run(
106
  'pip install flash-attn --no-build-isolation',
 
110
 
111
  from transformers import AutoProcessor, AutoModelForCausalLM
112
 
113
+ # Pre-load models and processors
114
  models = {
115
+ 'gokaygokay/Florence-2-Flux-Large': AutoModelForCausalLM.from_pretrained(
116
+ 'gokaygokay/Florence-2-Flux-Large', trust_remote_code=True
117
+ ).eval(),
118
+ 'gokaygokay/Florence-2-Flux': AutoModelForCausalLM.from_pretrained(
119
+ 'gokaygokay/Florence-2-Flux', trust_remote_code=True
120
+ ).eval(),
121
  }
122
 
123
  processors = {
124
+ 'gokaygokay/Florence-2-Flux-Large': AutoProcessor.from_pretrained(
125
+ 'gokaygokay/Florence-2-Flux-Large', trust_remote_code=True
126
+ ),
127
+ 'gokaygokay/Florence-2-Flux': AutoProcessor.from_pretrained(
128
+ 'gokaygokay/Florence-2-Flux', trust_remote_code=True
129
+ ),
130
  }
131
 
 
 
 
 
 
 
 
132
  @spaces.GPU
133
+ def caption_image(image, model_name='gokaygokay/Florence-2-Flux-Large'):
134
+ """
135
+ Runs the selected Florence-2 model to generate a detailed caption.
136
+ """
137
  from PIL import Image as PILImage
138
+
139
  task_prompt = "<DESCRIPTION>"
140
+ user_prompt = task_prompt + "Describe this image in great detail."
141
 
142
+ # Convert input to RGB if needed
143
+ image = PILImage.fromarray(image)
144
  if image.mode != "RGB":
145
  image = image.convert("RGB")
146
 
147
  model = models[model_name]
148
  processor = processors[model_name]
149
 
150
+ inputs = processor(text=user_prompt, images=image, return_tensors="pt")
151
  generated_ids = model.generate(
152
  input_ids=inputs["input_ids"],
153
  pixel_values=inputs["pixel_values"],
 
156
  repetition_penalty=1.10,
157
  )
158
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
159
+ parsed_answer = processor.post_process_generation(
160
+ generated_text, task=task_prompt, image_size=(image.width, image.height)
161
+ )
162
  return parsed_answer["<DESCRIPTION>"]
163
 
164
+ # ----------------------------- Gradio UI --------------------------------------
165
+ with gr.Blocks(analytics_enabled=False) as demo:
166
+ with gr.Tabs():
167
+ # ------------------ TAB 1: Image Generation ----------------------------
168
+ with gr.TabItem("FLUX Ghibli LoRA Generator"):
169
+ gr.Markdown("## Generate an image with the FLUX Ghibli LoRA")
170
 
171
+ with gr.Row():
172
+ with gr.Column():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
  prompt = gr.Textbox(
174
+ label="Prompt",
175
+ placeholder="Describe your Ghibli-style image...",
176
  lines=3
177
  )
 
178
  with gr.Row():
179
+ seed = gr.Slider(
180
+ label="Seed",
181
+ minimum=0,
182
+ maximum=MAX_SEED,
183
+ step=1,
184
+ value=42
185
+ )
186
+ randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
187
+ with gr.Row():
188
+ width = gr.Slider(
189
+ label="Width",
190
+ minimum=256,
191
+ maximum=MAX_IMAGE_SIZE,
192
+ step=32,
193
+ value=512
194
+ )
195
+ height = gr.Slider(
196
+ label="Height",
197
+ minimum=256,
198
+ maximum=MAX_IMAGE_SIZE,
199
+ step=32,
200
+ value=512
201
+ )
202
+ with gr.Row():
203
+ guidance_scale = gr.Slider(
204
+ label="Guidance scale",
205
+ minimum=0.0,
206
+ maximum=10.0,
207
+ step=0.1,
208
+ value=3.5
209
+ )
210
+ num_inference_steps = gr.Slider(
211
+ label="Steps",
212
+ minimum=1,
213
+ maximum=50,
214
+ step=1,
215
+ value=30
216
+ )
217
+ lora_scale = gr.Slider(
218
+ label="LoRA scale",
219
+ minimum=0.0,
220
+ maximum=1.0,
221
+ step=0.1,
222
+ value=1.0
223
+ )
224
+ generate_button = gr.Button("Generate Image")
 
 
 
 
 
 
 
 
225
 
226
+ with gr.Column():
227
+ output_image = gr.Image(label="Generated Image")
228
+ output_seed = gr.Number(label="Seed Used")
229
+
230
+ # Link the button to the inference function
231
+ generate_button.click(
232
+ inference,
233
+ inputs=[
234
+ prompt,
235
+ seed,
236
+ randomize_seed,
237
+ width,
238
+ height,
239
+ guidance_scale,
240
+ num_inference_steps,
241
+ lora_scale,
242
+ ],
243
+ outputs=[output_image, output_seed]
244
+ )
 
 
 
 
 
 
 
 
 
 
 
 
245
 
246
+ # ------------------ TAB 2: Image Captioning ---------------------------
 
 
247
  with gr.TabItem("Florence-2 Captioner"):
248
+ gr.Markdown("## Generate a caption for an uploaded image using Florence-2")
 
249
 
250
  with gr.Row():
251
  with gr.Column():
252
+ input_img = gr.Image(label="Upload an Image")
253
  model_selector = gr.Dropdown(
254
  choices=list(models.keys()),
255
+ value='gokaygokay/Florence-2-Flux-Large',
256
+ label="Select Model"
257
  )
258
+ caption_button = gr.Button("Generate Caption")
259
  with gr.Column():
260
+ caption_output = gr.Textbox(label="Caption")
261
 
262
+ caption_button.click(caption_image, [input_img, model_selector], [caption_output])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
263
 
264
+ # Launch
265
+ demo.queue(concurrency_count=1, max_size=20)
266
+ demo.launch(debug=True, show_api=False)