Buckets:
| <meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"Kandinsky","local":"kandinsky","sections":[{"title":"Text-to-image","local":"text-to-image","sections":[],"depth":2},{"title":"Image-to-image","local":"image-to-image","sections":[],"depth":2},{"title":"Inpainting","local":"inpainting","sections":[],"depth":2},{"title":"Interpolation (보간)","local":"interpolation-보간","sections":[],"depth":2},{"title":"ControlNet","local":"controlnet","sections":[{"title":"Text-to-image","local":"controlnet-text-to-image","sections":[],"depth":3},{"title":"Image-to-image","local":"controlnet-image-to-image","sections":[],"depth":3}],"depth":2},{"title":"최적화","local":"최적화","sections":[],"depth":2}],"depth":1}"> | |
| <link href="/docs/diffusers/pr_12820/ko/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_12820/ko/_app/immutable/entry/start.3d235f8e.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_12820/ko/_app/immutable/chunks/scheduler.23542ac5.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_12820/ko/_app/immutable/chunks/singletons.90d76c2a.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_12820/ko/_app/immutable/chunks/index.7166da64.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_12820/ko/_app/immutable/chunks/paths.5789b09f.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_12820/ko/_app/immutable/entry/app.86e25fc8.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_12820/ko/_app/immutable/chunks/preload-helper.b51cdadd.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_12820/ko/_app/immutable/chunks/index.9b1f405b.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_12820/ko/_app/immutable/nodes/0.49c759ef.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_12820/ko/_app/immutable/chunks/each.e59479a4.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_12820/ko/_app/immutable/nodes/42.77812495.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_12820/ko/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.af5bd6bd.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_12820/ko/_app/immutable/chunks/CodeBlock.dc8eaa1c.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_12820/ko/_app/immutable/chunks/DocNotebookDropdown.68a629d2.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_12820/ko/_app/immutable/chunks/HfOption.ea0ad09a.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_12820/ko/_app/immutable/chunks/stores.59c7d732.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"Kandinsky","local":"kandinsky","sections":[{"title":"Text-to-image","local":"text-to-image","sections":[],"depth":2},{"title":"Image-to-image","local":"image-to-image","sections":[],"depth":2},{"title":"Inpainting","local":"inpainting","sections":[],"depth":2},{"title":"Interpolation (보간)","local":"interpolation-보간","sections":[],"depth":2},{"title":"ControlNet","local":"controlnet","sections":[{"title":"Text-to-image","local":"controlnet-text-to-image","sections":[],"depth":3},{"title":"Image-to-image","local":"controlnet-image-to-image","sections":[],"depth":3}],"depth":2},{"title":"최적화","local":"최적화","sections":[],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 h-7 max-sm:h-7 px-2 max-sm:px-1.5 text-sm font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0 hover:text-gray-800 dark:hover:text-gray-200"><svg class="sm:size-3.5 size-3" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-7 max-sm:h-7 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible sm:size-3.5 size-3 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <div class="flex space-x-1 " style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"> <div class="relative colab-dropdown "> <button class=" " type="button"> <img alt="Open In Colab" class="!m-0" src="https://colab.research.google.com/assets/colab-badge.svg"> </button> </div> <div class="relative colab-dropdown "> <button class=" " type="button"> <img alt="Open In Studio Lab" class="!m-0" src="https://studiolab.sagemaker.aws/studiolab.svg"> </button> </div></div> <h1 class="relative group"><a id="kandinsky" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#kandinsky"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Kandinsky</span></h1> <p data-svelte-h="svelte-1bko70a">Kandinsky 모델은 일련의 다국어 text-to-image 생성 모델입니다. Kandinsky 2.0 모델은 두 개의 다국어 텍스트 인코더를 사용하고 그 결과를 연결해 UNet에 사용됩니다.</p> <p data-svelte-h="svelte-18b8tlp"><a href="../api/pipelines/kandinsky">Kandinsky 2.1</a>은 텍스트와 이미지 임베딩 간의 매핑을 생성하는 image prior 모델(<a href="https://huggingface.co/docs/transformers/model_doc/clip" rel="nofollow"><code>CLIP</code></a>)을 포함하도록 아키텍처를 변경했습니다. 이 매핑은 더 나은 text-image alignment를 제공하며, 학습 중에 텍스트 임베딩과 함께 사용되어 더 높은 품질의 결과를 가져옵니다. 마지막으로, Kandinsky 2.1은 spatial conditional 정규화 레이어를 추가하여 사실감을 높여주는 <a href="https://huggingface.co/papers/2209.09002" rel="nofollow">Modulating Quantized Vectors (MoVQ)</a> 디코더를 사용하여 latents를 이미지로 디코딩합니다.</p> <p data-svelte-h="svelte-1wcte0b"><a href="../api/pipelines/kandinsky_v22">Kandinsky 2.2</a>는 image prior 모델의 이미지 인코더를 더 큰 CLIP-ViT-G 모델로 교체하여 품질을 개선함으로써 이전 모델을 개선했습니다. 또한 image prior 모델은 해상도와 종횡비가 다른 이미지로 재훈련되어 더 높은 해상도의 이미지와 다양한 이미지 크기를 생성합니다.</p> <p data-svelte-h="svelte-ckpv5f"><a href="../api/pipelines/kandinsky3">Kandinsky 3</a>는 아키텍처를 단순화하고 prior 모델과 diffusion 모델을 포함하는 2단계 생성 프로세스에서 벗어나고 있습니다. 대신, Kandinsky 3는 <a href="https://huggingface.co/google/flan-ul2" rel="nofollow">Flan-UL2</a>를 사용하여 텍스트를 인코딩하고, <a href="https://hf.co/papers/1809.11096" rel="nofollow">BigGan-deep</a> 블록이 포함된 UNet을 사용하며, <a href="https://github.com/ai-forever/MoVQGAN" rel="nofollow">Sber-MoVQGAN</a>을 사용하여 latents를 이미지로 디코딩합니다. 텍스트 이해와 생성된 이미지 품질은 주로 더 큰 텍스트 인코더와 UNet을 사용함으로써 달성됩니다.</p> <p data-svelte-h="svelte-1sy3nlp">이 가이드에서는 text-to-image, image-to-image, 인페인팅, 보간 등을 위해 Kandinsky 모델을 사용하는 방법을 설명합니다.</p> <p data-svelte-h="svelte-1dydg7a">시작하기 전에 다음 라이브러리가 설치되어 있는지 확인하세요:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># Colab에서 필요한 라이브러리를 설치하기 위해 주석을 제외하세요</span> | |
| <span class="hljs-comment">#!pip install -q diffusers transformers accelerate</span><!-- HTML_TAG_END --></pre></div> <blockquote class="warning" data-svelte-h="svelte-wlpsgx"><p>Kandinsky 2.1과 2.2의 사용법은 매우 유사합니다! 유일한 차이점은 Kandinsky 2.2는 latents를 디코딩할 때 <code>프롬프트</code>를 입력으로 받지 않는다는 것입니다. 대신, Kandinsky 2.2는 디코딩 중에는 <code>image_embeds</code>만 받아들입니다.</p> <br> <p>Kandinsky 3는 더 간결한 아키텍처를 가지고 있으며 prior 모델이 필요하지 않습니다. 즉, <a href="sdxl">Stable Diffusion XL</a>과 같은 다른 diffusion 모델과 사용법이 동일합니다.</p></blockquote> <h2 class="relative group"><a id="text-to-image" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#text-to-image"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Text-to-image</span></h2> <p data-svelte-h="svelte-1sefbph">모든 작업에 Kandinsky 모델을 사용하려면 항상 프롬프트를 인코딩하고 이미지 임베딩을 생성하는 prior 파이프라인을 설정하는 것부터 시작해야 합니다. 이전 파이프라인은 negative 프롬프트 <code>""</code>에 해당하는 <code>negative_image_embeds</code>도 생성합니다. 더 나은 결과를 얻으려면 이전 파이프라인에 실제 <code>negative_prompt</code>를 전달할 수 있지만, 이렇게 하면 prior 파이프라인의 유효 배치 크기가 2배로 증가합니다.</p> <div class="flex space-x-2 items-center my-1.5 mr-8 h-7 !pl-0 -mx-3 md:mx-0"><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd border-gray-800 bg-black dark:bg-gray-700 text-white">Kandinsky 2.1 </div><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm">Kandinsky 2.2 </div><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm">Kandinsky 3 </div></div> <div class="language-select"><div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> KandinskyPriorPipeline, KandinskyPipeline | |
| <span class="hljs-keyword">import</span> torch | |
| prior_pipeline = KandinskyPriorPipeline.from_pretrained(<span class="hljs-string">"kandinsky-community/kandinsky-2-1-prior"</span>, torch_dtype=torch.float16).to(<span class="hljs-string">"cuda"</span>) | |
| pipeline = KandinskyPipeline.from_pretrained(<span class="hljs-string">"kandinsky-community/kandinsky-2-1"</span>, torch_dtype=torch.float16).to(<span class="hljs-string">"cuda"</span>) | |
| prompt = <span class="hljs-string">"A alien cheeseburger creature eating itself, claymation, cinematic, moody lighting"</span> | |
| negative_prompt = <span class="hljs-string">"low quality, bad quality"</span> <span class="hljs-comment"># negative 프롬프트 포함은 선택적이지만, 보통 결과는 더 좋습니다</span> | |
| image_embeds, negative_image_embeds = prior_pipeline(prompt, negative_prompt, guidance_scale=<span class="hljs-number">1.0</span>).to_tuple()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1t17459">이제 모든 프롬프트와 임베딩을 <code>KandinskyPipeline</code>에 전달하여 이미지를 생성합니다:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->image = pipeline(prompt, image_embeds=image_embeds, negative_prompt=negative_prompt, negative_image_embeds=negative_image_embeds, height=<span class="hljs-number">768</span>, width=<span class="hljs-number">768</span>).images[<span class="hljs-number">0</span>] | |
| image<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-i75i2v"><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/kandinsky-docs/cheeseburger.png"></div> </div> <p data-svelte-h="svelte-12j62dc">🤗 Diffusers는 또한 <code>KandinskyCombinedPipeline</code> 및 <code>KandinskyV22CombinedPipeline</code>이 포함된 end-to-end API를 제공하므로 prior 파이프라인과 text-to-image 변환 파이프라인을 별도로 불러올 필요가 없습니다. 결합된 파이프라인은 prior 모델과 디코더를 모두 자동으로 불러옵니다. 원하는 경우 <code>prior_guidance_scale</code> 및 <code>prior_num_inference_steps</code> 매개 변수를 사용하여 prior 파이프라인에 대해 다른 값을 설정할 수 있습니다.</p> <p data-svelte-h="svelte-pukajh">내부에서 결합된 파이프라인을 자동으로 호출하려면 <code>AutoPipelineForText2Image</code>를 사용합니다:</p> <div class="flex space-x-2 items-center my-1.5 mr-8 h-7 !pl-0 -mx-3 md:mx-0"><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd border-gray-800 bg-black dark:bg-gray-700 text-white">Kandinsky 2.1 </div><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm">Kandinsky 2.2 </div></div> <div class="language-select"><div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoPipelineForText2Image | |
| <span class="hljs-keyword">import</span> torch | |
| pipeline = AutoPipelineForText2Image.from_pretrained(<span class="hljs-string">"kandinsky-community/kandinsky-2-1"</span>, torch_dtype=torch.float16) | |
| pipeline.enable_model_cpu_offload() | |
| prompt = <span class="hljs-string">"A alien cheeseburger creature eating itself, claymation, cinematic, moody lighting"</span> | |
| negative_prompt = <span class="hljs-string">"low quality, bad quality"</span> | |
| image = pipeline(prompt=prompt, negative_prompt=negative_prompt, prior_guidance_scale=<span class="hljs-number">1.0</span>, guidance_scale=<span class="hljs-number">4.0</span>, height=<span class="hljs-number">768</span>, width=<span class="hljs-number">768</span>).images[<span class="hljs-number">0</span>] | |
| image<!-- HTML_TAG_END --></pre></div> </div> <h2 class="relative group"><a id="image-to-image" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#image-to-image"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Image-to-image</span></h2> <p data-svelte-h="svelte-1aianu4">Image-to-image 경우, 초기 이미지와 텍스트 프롬프트를 전달하여 파이프라인에 이미지를 conditioning합니다. Prior 파이프라인을 불러오는 것으로 시작합니다:</p> <div class="flex space-x-2 items-center my-1.5 mr-8 h-7 !pl-0 -mx-3 md:mx-0"><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd border-gray-800 bg-black dark:bg-gray-700 text-white">Kandinsky 2.1 </div><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm">Kandinsky 2.2 </div><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm">Kandinsky 3 </div></div> <div class="language-select"><div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> KandinskyImg2ImgPipeline, KandinskyPriorPipeline | |
| prior_pipeline = KandinskyPriorPipeline.from_pretrained(<span class="hljs-string">"kandinsky-community/kandinsky-2-1-prior"</span>, torch_dtype=torch.float16, use_safetensors=<span class="hljs-literal">True</span>).to(<span class="hljs-string">"cuda"</span>) | |
| pipeline = KandinskyImg2ImgPipeline.from_pretrained(<span class="hljs-string">"kandinsky-community/kandinsky-2-1"</span>, torch_dtype=torch.float16, use_safetensors=<span class="hljs-literal">True</span>).to(<span class="hljs-string">"cuda"</span>)<!-- HTML_TAG_END --></pre></div> </div> <p data-svelte-h="svelte-1eewmqm">Conditioning할 이미지를 다운로드합니다:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image | |
| <span class="hljs-comment"># 이미지 다운로드</span> | |
| url = <span class="hljs-string">"https://raw.githubusercontent.com/CompVis/stable-diffusion/main/assets/stable-samples/img2img/sketch-mountains-input.jpg"</span> | |
| original_image = load_image(url) | |
| original_image = original_image.resize((<span class="hljs-number">768</span>, <span class="hljs-number">512</span>))<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-1w2zk95"><img class="rounded-xl" src="https://raw.githubusercontent.com/CompVis/stable-diffusion/main/assets/stable-samples/img2img/sketch-mountains-input.jpg"></div> <p data-svelte-h="svelte-1sj0kef">Prior 파이프라인으로 <code>image_embeds</code>와 <code>negative_image_embeds</code>를 생성합니다:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->prompt = <span class="hljs-string">"A fantasy landscape, Cinematic lighting"</span> | |
| negative_prompt = <span class="hljs-string">"low quality, bad quality"</span> | |
| image_embeds, negative_image_embeds = prior_pipeline(prompt, negative_prompt).to_tuple()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1uxnltd">이제 원본 이미지와 모든 프롬프트 및 임베딩을 파이프라인으로 전달하여 이미지를 생성합니다:</p> <div class="flex space-x-2 items-center my-1.5 mr-8 h-7 !pl-0 -mx-3 md:mx-0"><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd border-gray-800 bg-black dark:bg-gray-700 text-white">Kandinsky 2.1 </div><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm">Kandinsky 2.2 </div><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm">Kandinsky 3 </div></div> <div class="language-select"><div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> make_image_grid | |
| image = pipeline(prompt, negative_prompt=negative_prompt, image=original_image, image_embeds=image_embeds, negative_image_embeds=negative_image_embeds, height=<span class="hljs-number">768</span>, width=<span class="hljs-number">768</span>, strength=<span class="hljs-number">0.3</span>).images[<span class="hljs-number">0</span>] | |
| make_image_grid([original_image.resize((<span class="hljs-number">512</span>, <span class="hljs-number">512</span>)), image.resize((<span class="hljs-number">512</span>, <span class="hljs-number">512</span>))], rows=<span class="hljs-number">1</span>, cols=<span class="hljs-number">2</span>)<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-1vh4dwd"><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/kandinsky-docs/img2img_fantasyland.png"></div> </div> <p data-svelte-h="svelte-hzxa8q">또한 🤗 Diffusers에서는 <code>KandinskyImg2ImgCombinedPipeline</code> 및 <code>KandinskyV22Img2ImgCombinedPipeline</code>이 포함된 end-to-end API를 제공하므로 prior 파이프라인과 image-to-image 파이프라인을 별도로 불러올 필요가 없습니다. 결합된 파이프라인은 prior 모델과 디코더를 모두 자동으로 불러옵니다. 원하는 경우 <code>prior_guidance_scale</code> 및 <code>prior_num_inference_steps</code> 매개 변수를 사용하여 이전 파이프라인에 대해 다른 값을 설정할 수 있습니다.</p> <p data-svelte-h="svelte-12ry8vz">내부에서 결합된 파이프라인을 자동으로 호출하려면 <code>AutoPipelineForImage2Image</code>를 사용합니다:</p> <div class="flex space-x-2 items-center my-1.5 mr-8 h-7 !pl-0 -mx-3 md:mx-0"><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd border-gray-800 bg-black dark:bg-gray-700 text-white">Kandinsky 2.1 </div><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm">Kandinsky 2.2 </div></div> <div class="language-select"><div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoPipelineForImage2Image | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> make_image_grid, load_image | |
| <span class="hljs-keyword">import</span> torch | |
| pipeline = AutoPipelineForImage2Image.from_pretrained(<span class="hljs-string">"kandinsky-community/kandinsky-2-1"</span>, torch_dtype=torch.float16, use_safetensors=<span class="hljs-literal">True</span>) | |
| pipeline.enable_model_cpu_offload() | |
| prompt = <span class="hljs-string">"A fantasy landscape, Cinematic lighting"</span> | |
| negative_prompt = <span class="hljs-string">"low quality, bad quality"</span> | |
| url = <span class="hljs-string">"https://raw.githubusercontent.com/CompVis/stable-diffusion/main/assets/stable-samples/img2img/sketch-mountains-input.jpg"</span> | |
| original_image = load_image(url) | |
| original_image.thumbnail((<span class="hljs-number">768</span>, <span class="hljs-number">768</span>)) | |
| image = pipeline(prompt=prompt, negative_prompt=negative_prompt, image=original_image, strength=<span class="hljs-number">0.3</span>).images[<span class="hljs-number">0</span>] | |
| make_image_grid([original_image.resize((<span class="hljs-number">512</span>, <span class="hljs-number">512</span>)), image.resize((<span class="hljs-number">512</span>, <span class="hljs-number">512</span>))], rows=<span class="hljs-number">1</span>, cols=<span class="hljs-number">2</span>)<!-- HTML_TAG_END --></pre></div> </div> <h2 class="relative group"><a id="inpainting" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#inpainting"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Inpainting</span></h2> <blockquote class="warning"><p data-svelte-h="svelte-19cj2ra">⚠️ Kandinsky 모델은 이제 검은색 픽셀 대신 ⬜️ <strong>흰색 픽셀</strong>을 사용하여 마스크 영역을 표현합니다. 프로덕션에서 <code>KandinskyInpaintPipeline</code>을 사용하는 경우 흰색 픽셀을 사용하도록 마스크를 변경해야 합니다:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># PIL 입력에 대해</span> | |
| <span class="hljs-keyword">import</span> PIL.ImageOps | |
| mask = PIL.ImageOps.invert(mask) | |
| <span class="hljs-comment"># PyTorch와 NumPy 입력에 대해</span> | |
| mask = <span class="hljs-number">1</span> - mask<!-- HTML_TAG_END --></pre></div></blockquote> <p data-svelte-h="svelte-qsp91m">인페인팅에서는 원본 이미지, 원본 이미지에서 대체할 영역의 마스크, 인페인팅할 내용에 대한 텍스트 프롬프트가 필요합니다. Prior 파이프라인을 불러옵니다:</p> <div class="flex space-x-2 items-center my-1.5 mr-8 h-7 !pl-0 -mx-3 md:mx-0"><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd border-gray-800 bg-black dark:bg-gray-700 text-white">Kandinsky 2.1 </div><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm">Kandinsky 2.2 </div></div> <div class="language-select"><div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> KandinskyInpaintPipeline, KandinskyPriorPipeline | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image, make_image_grid | |
| <span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np | |
| <span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image | |
| prior_pipeline = KandinskyPriorPipeline.from_pretrained(<span class="hljs-string">"kandinsky-community/kandinsky-2-1-prior"</span>, torch_dtype=torch.float16, use_safetensors=<span class="hljs-literal">True</span>).to(<span class="hljs-string">"cuda"</span>) | |
| pipeline = KandinskyInpaintPipeline.from_pretrained(<span class="hljs-string">"kandinsky-community/kandinsky-2-1-inpaint"</span>, torch_dtype=torch.float16, use_safetensors=<span class="hljs-literal">True</span>).to(<span class="hljs-string">"cuda"</span>)<!-- HTML_TAG_END --></pre></div> </div> <p data-svelte-h="svelte-1nfserr">초기 이미지를 불러오고 마스크를 생성합니다:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->init_image = load_image(<span class="hljs-string">"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/kandinsky/cat.png"</span>) | |
| mask = np.zeros((<span class="hljs-number">768</span>, <span class="hljs-number">768</span>), dtype=np.float32) | |
| <span class="hljs-comment"># mask area above cat's head</span> | |
| mask[:<span class="hljs-number">250</span>, <span class="hljs-number">250</span>:-<span class="hljs-number">250</span>] = <span class="hljs-number">1</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-339bbm">Prior 파이프라인으로 임베딩을 생성합니다:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->prompt = <span class="hljs-string">"a hat"</span> | |
| prior_output = prior_pipeline(prompt)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-dajjv4">이제 이미지 생성을 위해 초기 이미지, 마스크, 프롬프트와 임베딩을 파이프라인에 전달합니다:</p> <div class="flex space-x-2 items-center my-1.5 mr-8 h-7 !pl-0 -mx-3 md:mx-0"><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd border-gray-800 bg-black dark:bg-gray-700 text-white">Kandinsky 2.1 </div><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm">Kandinsky 2.2 </div></div> <div class="language-select"><div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->output_image = pipeline(prompt, image=init_image, mask_image=mask, **prior_output, height=<span class="hljs-number">768</span>, width=<span class="hljs-number">768</span>, num_inference_steps=<span class="hljs-number">150</span>).images[<span class="hljs-number">0</span>] | |
| mask = Image.fromarray((mask*<span class="hljs-number">255</span>).astype(<span class="hljs-string">'uint8'</span>), <span class="hljs-string">'L'</span>) | |
| make_image_grid([init_image, mask, output_image], rows=<span class="hljs-number">1</span>, cols=<span class="hljs-number">3</span>)<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-1d04i1f"><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/kandinsky-docs/inpaint_cat_hat.png"></div> </div> <p data-svelte-h="svelte-y29dku"><code>KandinskyInpaintCombinedPipeline</code> 및 <code>KandinskyV22InpaintCombinedPipeline</code>을 사용하여 내부에서 prior 및 디코더 파이프라인을 함께 호출할 수 있습니다. 이를 위해 <code>AutoPipelineForInpainting</code>을 사용합니다:</p> <div class="flex space-x-2 items-center my-1.5 mr-8 h-7 !pl-0 -mx-3 md:mx-0"><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd border-gray-800 bg-black dark:bg-gray-700 text-white">Kandinsky 2.1 </div><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm">Kandinsky 2.2 </div></div> <div class="language-select"><div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np | |
| <span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoPipelineForInpainting | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image, make_image_grid | |
| pipe = AutoPipelineForInpainting.from_pretrained(<span class="hljs-string">"kandinsky-community/kandinsky-2-1-inpaint"</span>, torch_dtype=torch.float16) | |
| pipe.enable_model_cpu_offload() | |
| init_image = load_image(<span class="hljs-string">"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/kandinsky/cat.png"</span>) | |
| mask = np.zeros((<span class="hljs-number">768</span>, <span class="hljs-number">768</span>), dtype=np.float32) | |
| <span class="hljs-comment"># 고양이 머리 위 마스크 지역</span> | |
| mask[:<span class="hljs-number">250</span>, <span class="hljs-number">250</span>:-<span class="hljs-number">250</span>] = <span class="hljs-number">1</span> | |
| prompt = <span class="hljs-string">"a hat"</span> | |
| output_image = pipe(prompt=prompt, image=init_image, mask_image=mask).images[<span class="hljs-number">0</span>] | |
| mask = Image.fromarray((mask*<span class="hljs-number">255</span>).astype(<span class="hljs-string">'uint8'</span>), <span class="hljs-string">'L'</span>) | |
| make_image_grid([init_image, mask, output_image], rows=<span class="hljs-number">1</span>, cols=<span class="hljs-number">3</span>)<!-- HTML_TAG_END --></pre></div> </div> <h2 class="relative group"><a id="interpolation-보간" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#interpolation-보간"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Interpolation (보간)</span></h2> <p data-svelte-h="svelte-p80btz">Interpolation(보간)을 사용하면 이미지와 텍스트 임베딩 사이의 latent space를 탐색할 수 있어 prior 모델의 중간 결과물을 볼 수 있는 멋진 방법입니다. Prior 파이프라인과 보간하려는 두 개의 이미지를 불러옵니다:</p> <div class="flex space-x-2 items-center my-1.5 mr-8 h-7 !pl-0 -mx-3 md:mx-0"><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd border-gray-800 bg-black dark:bg-gray-700 text-white">Kandinsky 2.1 </div><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm">Kandinsky 2.2 </div></div> <div class="language-select"><div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> KandinskyPriorPipeline, KandinskyPipeline | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image, make_image_grid | |
| <span class="hljs-keyword">import</span> torch | |
| prior_pipeline = KandinskyPriorPipeline.from_pretrained(<span class="hljs-string">"kandinsky-community/kandinsky-2-1-prior"</span>, torch_dtype=torch.float16, use_safetensors=<span class="hljs-literal">True</span>).to(<span class="hljs-string">"cuda"</span>) | |
| img_1 = load_image(<span class="hljs-string">"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/kandinsky/cat.png"</span>) | |
| img_2 = load_image(<span class="hljs-string">"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/kandinsky/starry_night.jpeg"</span>) | |
| make_image_grid([img_1.resize((<span class="hljs-number">512</span>,<span class="hljs-number">512</span>)), img_2.resize((<span class="hljs-number">512</span>,<span class="hljs-number">512</span>))], rows=<span class="hljs-number">1</span>, cols=<span class="hljs-number">2</span>)<!-- HTML_TAG_END --></pre></div> </div> <div class="flex gap-4" data-svelte-h="svelte-1c7h32m"><div><img class="rounded-xl" src="https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/kandinsky/cat.png"> <figcaption class="mt-2 text-center text-sm text-gray-500">a cat</figcaption></div> <div><img class="rounded-xl" src="https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/kandinsky/starry_night.jpeg"> <figcaption class="mt-2 text-center text-sm text-gray-500">Van Gogh's Starry Night painting</figcaption></div></div> <p data-svelte-h="svelte-1ct3lji">보간할 텍스트 또는 이미지를 지정하고 각 텍스트 또는 이미지에 대한 가중치를 설정합니다. 가중치를 실험하여 보간에 어떤 영향을 미치는지 확인하세요!</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->images_texts = [<span class="hljs-string">"a cat"</span>, img_1, img_2] | |
| weights = [<span class="hljs-number">0.3</span>, <span class="hljs-number">0.3</span>, <span class="hljs-number">0.4</span>]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1rcm9me"><code>interpolate</code> 함수를 호출하여 임베딩을 생성한 다음, 파이프라인으로 전달하여 이미지를 생성합니다:</p> <div class="flex space-x-2 items-center my-1.5 mr-8 h-7 !pl-0 -mx-3 md:mx-0"><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd border-gray-800 bg-black dark:bg-gray-700 text-white">Kandinsky 2.1 </div><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm">Kandinsky 2.2 </div></div> <div class="language-select"><div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># 프롬프트는 빈칸으로 남겨도 됩니다</span> | |
| prompt = <span class="hljs-string">""</span> | |
| prior_out = prior_pipeline.interpolate(images_texts, weights) | |
| pipeline = KandinskyPipeline.from_pretrained(<span class="hljs-string">"kandinsky-community/kandinsky-2-1"</span>, torch_dtype=torch.float16, use_safetensors=<span class="hljs-literal">True</span>).to(<span class="hljs-string">"cuda"</span>) | |
| image = pipeline(prompt, **prior_out, height=<span class="hljs-number">768</span>, width=<span class="hljs-number">768</span>).images[<span class="hljs-number">0</span>] | |
| image<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-twgci5"><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/kandinsky-docs/starry_cat.png"></div> </div> <h2 class="relative group"><a id="controlnet" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#controlnet"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>ControlNet</span></h2> <blockquote class="warning" data-svelte-h="svelte-5x8iov"><p>⚠️ ControlNet은 Kandinsky 2.2에서만 지원됩니다!</p></blockquote> <p data-svelte-h="svelte-re8j87">ControlNet을 사용하면 depth map이나 edge detection와 같은 추가 입력을 통해 사전학습된 large diffusion 모델을 conditioning할 수 있습니다. 예를 들어, 모델이 depth map의 구조를 이해하고 보존할 수 있도록 깊이 맵으로 Kandinsky 2.2를 conditioning할 수 있습니다.</p> <p data-svelte-h="svelte-bnaxe4">이미지를 불러오고 depth map을 추출해 보겠습니다:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image | |
| img = load_image( | |
| <span class="hljs-string">"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/kandinskyv22/cat.png"</span> | |
| ).resize((<span class="hljs-number">768</span>, <span class="hljs-number">768</span>)) | |
| img<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-11gtyyz"><img class="rounded-xl" src="https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/kandinskyv22/cat.png"></div> <p data-svelte-h="svelte-1xmggyl">그런 다음 🤗 Transformers의 <code>depth-estimation</code> <code>Pipeline</code>을 사용하여 이미지를 처리해 depth map을 구할 수 있습니다:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">make_hint</span>(<span class="hljs-params">image, depth_estimator</span>): | |
| image = depth_estimator(image)[<span class="hljs-string">"depth"</span>] | |
| image = np.array(image) | |
| image = image[:, :, <span class="hljs-literal">None</span>] | |
| image = np.concatenate([image, image, image], axis=<span class="hljs-number">2</span>) | |
| detected_map = torch.from_numpy(image).<span class="hljs-built_in">float</span>() / <span class="hljs-number">255.0</span> | |
| hint = detected_map.permute(<span class="hljs-number">2</span>, <span class="hljs-number">0</span>, <span class="hljs-number">1</span>) | |
| <span class="hljs-keyword">return</span> hint | |
| depth_estimator = pipeline(<span class="hljs-string">"depth-estimation"</span>) | |
| hint = make_hint(img, depth_estimator).unsqueeze(<span class="hljs-number">0</span>).half().to(<span class="hljs-string">"cuda"</span>)<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="controlnet-text-to-image" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#controlnet-text-to-image"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Text-to-image</span></h3> <p data-svelte-h="svelte-1c5fsic">Prior 파이프라인과 <code>KandinskyV22ControlnetPipeline</code>를 불러옵니다:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> KandinskyV22PriorPipeline, KandinskyV22ControlnetPipeline | |
| prior_pipeline = KandinskyV22PriorPipeline.from_pretrained( | |
| <span class="hljs-string">"kandinsky-community/kandinsky-2-2-prior"</span>, torch_dtype=torch.float16, use_safetensors=<span class="hljs-literal">True</span> | |
| ).to(<span class="hljs-string">"cuda"</span>) | |
| pipeline = KandinskyV22ControlnetPipeline.from_pretrained( | |
| <span class="hljs-string">"kandinsky-community/kandinsky-2-2-controlnet-depth"</span>, torch_dtype=torch.float16 | |
| ).to(<span class="hljs-string">"cuda"</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1ta47o3">프롬프트와 negative 프롬프트로 이미지 임베딩을 생성합니다:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->prompt = <span class="hljs-string">"A robot, 4k photo"</span> | |
| negative_prior_prompt = <span class="hljs-string">"lowres, text, error, cropped, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, out of frame, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutation, deformed, blurry, dehydrated, bad anatomy, bad proportions, extra limbs, cloned face, disfigured, gross proportions, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck, username, watermark, signature"</span> | |
| generator = torch.Generator(device=<span class="hljs-string">"cuda"</span>).manual_seed(<span class="hljs-number">43</span>) | |
| image_emb, zero_image_emb = prior_pipeline( | |
| prompt=prompt, negative_prompt=negative_prior_prompt, generator=generator | |
| ).to_tuple()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-abo9el">마지막으로 이미지 임베딩과 depth 이미지를 <code>KandinskyV22ControlnetPipeline</code>에 전달하여 이미지를 생성합니다:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->image = pipeline(image_embeds=image_emb, negative_image_embeds=zero_image_emb, hint=hint, num_inference_steps=<span class="hljs-number">50</span>, generator=generator, height=<span class="hljs-number">768</span>, width=<span class="hljs-number">768</span>).images[<span class="hljs-number">0</span>] | |
| image<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-1oo0se1"><img class="rounded-xl" src="https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/kandinskyv22/robot_cat_text2img.png"></div> <h3 class="relative group"><a id="controlnet-image-to-image" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#controlnet-image-to-image"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Image-to-image</span></h3> <p data-svelte-h="svelte-1qtubr7">ControlNet을 사용한 image-to-image의 경우, 다음을 사용할 필요가 있습니다:</p> <ul data-svelte-h="svelte-6td1jn"><li><code>KandinskyV22PriorEmb2EmbPipeline</code>로 텍스트 프롬프트와 이미지에서 이미지 임베딩을 생성합니다.</li> <li><code>KandinskyV22ControlnetImg2ImgPipeline</code>로 초기 이미지와 이미지 임베딩에서 이미지를 생성합니다.</li></ul> <p data-svelte-h="svelte-ya90bw">🤗 Transformers에서 <code>depth-estimation</code> <code>Pipeline</code>을 사용하여 고양이의 초기 이미지의 depth map을 처리해 추출합니다:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> KandinskyV22PriorEmb2EmbPipeline, KandinskyV22ControlnetImg2ImgPipeline | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline | |
| img = load_image( | |
| <span class="hljs-string">"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/kandinskyv22/cat.png"</span> | |
| ).resize((<span class="hljs-number">768</span>, <span class="hljs-number">768</span>)) | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">make_hint</span>(<span class="hljs-params">image, depth_estimator</span>): | |
| image = depth_estimator(image)[<span class="hljs-string">"depth"</span>] | |
| image = np.array(image) | |
| image = image[:, :, <span class="hljs-literal">None</span>] | |
| image = np.concatenate([image, image, image], axis=<span class="hljs-number">2</span>) | |
| detected_map = torch.from_numpy(image).<span class="hljs-built_in">float</span>() / <span class="hljs-number">255.0</span> | |
| hint = detected_map.permute(<span class="hljs-number">2</span>, <span class="hljs-number">0</span>, <span class="hljs-number">1</span>) | |
| <span class="hljs-keyword">return</span> hint | |
| depth_estimator = pipeline(<span class="hljs-string">"depth-estimation"</span>) | |
| hint = make_hint(img, depth_estimator).unsqueeze(<span class="hljs-number">0</span>).half().to(<span class="hljs-string">"cuda"</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1mw2wdw">Prior 파이프라인과 <code>KandinskyV22ControlnetImg2ImgPipeline</code>을 불러옵니다:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->prior_pipeline = KandinskyV22PriorEmb2EmbPipeline.from_pretrained( | |
| <span class="hljs-string">"kandinsky-community/kandinsky-2-2-prior"</span>, torch_dtype=torch.float16, use_safetensors=<span class="hljs-literal">True</span> | |
| ).to(<span class="hljs-string">"cuda"</span>) | |
| pipeline = KandinskyV22ControlnetImg2ImgPipeline.from_pretrained( | |
| <span class="hljs-string">"kandinsky-community/kandinsky-2-2-controlnet-depth"</span>, torch_dtype=torch.float16 | |
| ).to(<span class="hljs-string">"cuda"</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-7am2u1">텍스트 프롬프트와 초기 이미지를 이전 파이프라인에 전달하여 이미지 임베딩을 생성합니다:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->prompt = <span class="hljs-string">"A robot, 4k photo"</span> | |
| negative_prior_prompt = <span class="hljs-string">"lowres, text, error, cropped, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, out of frame, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutation, deformed, blurry, dehydrated, bad anatomy, bad proportions, extra limbs, cloned face, disfigured, gross proportions, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck, username, watermark, signature"</span> | |
| generator = torch.Generator(device=<span class="hljs-string">"cuda"</span>).manual_seed(<span class="hljs-number">43</span>) | |
| img_emb = prior_pipeline(prompt=prompt, image=img, strength=<span class="hljs-number">0.85</span>, generator=generator) | |
| negative_emb = prior_pipeline(prompt=negative_prior_prompt, image=img, strength=<span class="hljs-number">1</span>, generator=generator)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-yn34s8">이제 <code>KandinskyV22ControlnetImg2ImgPipeline</code>을 실행하여 초기 이미지와 이미지 임베딩으로부터 이미지를 생성할 수 있습니다:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->image = pipeline(image=img, strength=<span class="hljs-number">0.5</span>, image_embeds=img_emb.image_embeds, negative_image_embeds=negative_emb.image_embeds, hint=hint, num_inference_steps=<span class="hljs-number">50</span>, generator=generator, height=<span class="hljs-number">768</span>, width=<span class="hljs-number">768</span>).images[<span class="hljs-number">0</span>] | |
| make_image_grid([img.resize((<span class="hljs-number">512</span>, <span class="hljs-number">512</span>)), image.resize((<span class="hljs-number">512</span>, <span class="hljs-number">512</span>))], rows=<span class="hljs-number">1</span>, cols=<span class="hljs-number">2</span>)<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-eey5ka"><img class="rounded-xl" src="https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/kandinskyv22/robot_cat.png"></div> <h2 class="relative group"><a id="최적화" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#최적화"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>최적화</span></h2> <p data-svelte-h="svelte-1wfpj8q">Kandinsky는 mapping을 생성하기 위한 prior 파이프라인과 latents를 이미지로 디코딩하기 위한 두 번째 파이프라인이 필요하다는 점에서 독특합니다. 대부분의 계산이 두 번째 파이프라인에서 이루어지므로 최적화의 노력은 두 번째 파이프라인에 집중되어야 합니다. 다음은 추론 중 Kandinsky키를 개선하기 위한 몇 가지 팁입니다.</p> <ol data-svelte-h="svelte-6zsdp2"><li>PyTorch < 2.0을 사용할 경우 <a href="../optimization/xformers">xFormers</a>을 활성화합니다.</li></ol> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --> from diffusers import DiffusionPipeline | |
| import torch | |
| pipe = DiffusionPipeline.from_pretrained("kandinsky-community/kandinsky-2-1", torch_dtype=torch.float16) | |
| <span class="hljs-addition">+ pipe.enable_xformers_memory_efficient_attention()</span><!-- HTML_TAG_END --></pre></div> <ol start="2" data-svelte-h="svelte-1g7sf05"><li>PyTorch >= 2.0을 사용할 경우 <code>torch.compile</code>을 활성화하여 scaled dot-product attention (SDPA)를 자동으로 사용하도록 합니다:</li></ol> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --> pipe.unet.to(memory_format=torch.channels_last) | |
| <span class="hljs-addition">+ pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-sptkvy">이는 attention processor를 명시적으로 <code>AttnAddedKVProcessor2_0</code>을 사용하도록 설정하는 것과 동일합니다:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> diffusers.models.attention_processor <span class="hljs-keyword">import</span> AttnAddedKVProcessor2_0 | |
| pipe.unet.set_attn_processor(AttnAddedKVProcessor2_0())<!-- HTML_TAG_END --></pre></div> <ol start="3" data-svelte-h="svelte-12qj1et"><li>메모리 부족 오류를 방지하기 위해 <code>enable_model_cpu_offload()</code>를 사용하여 모델을 CPU로 오프로드합니다:</li></ol> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --> from diffusers import DiffusionPipeline | |
| import torch | |
| pipe = DiffusionPipeline.from_pretrained("kandinsky-community/kandinsky-2-1", torch_dtype=torch.float16) | |
| <span class="hljs-addition">+ pipe.enable_model_cpu_offload()</span><!-- HTML_TAG_END --></pre></div> <ol start="4" data-svelte-h="svelte-1y5yjap"><li>기본적으로 text-to-image 파이프라인은 <code>DDIMScheduler</code>를 사용하지만, <code>DDPMScheduler</code>와 같은 다른 스케줄러로 대체하여 추론 속도와 이미지 품질 간의 균형에 어떤 영향을 미치는지 확인할 수 있습니다:</li></ol> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DDPMScheduler | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline | |
| scheduler = DDPMScheduler.from_pretrained(<span class="hljs-string">"kandinsky-community/kandinsky-2-1"</span>, subfolder=<span class="hljs-string">"ddpm_scheduler"</span>) | |
| pipe = DiffusionPipeline.from_pretrained(<span class="hljs-string">"kandinsky-community/kandinsky-2-1"</span>, scheduler=scheduler, torch_dtype=torch.float16, use_safetensors=<span class="hljs-literal">True</span>).to(<span class="hljs-string">"cuda"</span>)<!-- HTML_TAG_END --></pre></div> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/diffusers/blob/main/docs/source/ko/using-diffusers/kandinsky.md" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p> | |
| <script> | |
| { | |
| __sveltekit_1nna9bv = { | |
| assets: "/docs/diffusers/pr_12820/ko", | |
| base: "/docs/diffusers/pr_12820/ko", | |
| env: {} | |
| }; | |
| const element = document.currentScript.parentElement; | |
| const data = [null,null]; | |
| Promise.all([ | |
| import("/docs/diffusers/pr_12820/ko/_app/immutable/entry/start.3d235f8e.js"), | |
| import("/docs/diffusers/pr_12820/ko/_app/immutable/entry/app.86e25fc8.js") | |
| ]).then(([kit, app]) => { | |
| kit.start(app, element, { | |
| node_ids: [0, 42], | |
| data, | |
| form: null, | |
| error: null | |
| }); | |
| }); | |
| } | |
| </script> | |
Xet Storage Details
- Size:
- 98.8 kB
- Xet hash:
- 394ac9e4fdb9b9a2e3c4b2a85a5e028bbabb7d74289c14420a76dbb46007ed9f
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.