Buckets:

download
raw
10.1 kB
import{s as _t,o as $t,n as gt}from"../chunks/scheduler.852ec091.js";import{S as vt,i as yt,g as m,s,r as _,A as wt,h as d,f as n,c as l,j as z,u as $,x as G,k as D,y as p,a as i,v as g,d as v,t as y,w}from"../chunks/index.28275fd3.js";import{D as nt}from"../chunks/Docstring.ee6c313e.js";import{C as Mt}from"../chunks/CodeBlock.c3366071.js";import{E as Lt}from"../chunks/ExampleCodeBlock.00f06ed4.js";import{H as mt,E as Tt}from"../chunks/EditOnGithub.582011f0.js";function jt(Q){let r,x="Example:",h,b,c;return b=new Mt({props:{code:"aW1wb3J0JTIwdG9yY2glMEFpbXBvcnQlMjB0b3JjaC5ubiUyMGFzJTIwbm4lMEElMEFpbXBvcnQlMjBiaXRzYW5kYnl0ZXMlMjBhcyUyMGJuYiUwQWZyb20lMjBibmIubm4lMjBpbXBvcnQlMjBMaW5lYXI4Yml0THQlMEElMEFmcDE2X21vZGVsJTIwJTNEJTIwbm4uU2VxdWVudGlhbCglMEElMjAlMjAlMjAlMjBubi5MaW5lYXIoNjQlMkMlMjA2NCklMkMlMEElMjAlMjAlMjAlMjBubi5MaW5lYXIoNjQlMkMlMjA2NCklMEEpJTBBJTBBaW50OF9tb2RlbCUyMCUzRCUyMG5uLlNlcXVlbnRpYWwoJTBBJTIwJTIwJTIwJTIwTGluZWFyOGJpdEx0KDY0JTJDJTIwNjQlMkMlMjBoYXNfZnAxNl93ZWlnaHRzJTNERmFsc2UpJTJDJTBBJTIwJTIwJTIwJTIwTGluZWFyOGJpdEx0KDY0JTJDJTIwNjQlMkMlMjBoYXNfZnAxNl93ZWlnaHRzJTNERmFsc2UpJTBBKSUwQSUwQWludDhfbW9kZWwubG9hZF9zdGF0ZV9kaWN0KGZwMTZfbW9kZWwuc3RhdGVfZGljdCgpKSUwQWludDhfbW9kZWwlMjAlM0QlMjBpbnQ4X21vZGVsLnRvKDApJTIwJTIzJTIwUXVhbnRpemF0aW9uJTIwaGFwcGVucyUyMGhlcmU=",highlighted:`<span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">import</span> torch.nn <span class="hljs-keyword">as</span> nn
<span class="hljs-keyword">import</span> bitsandbytes <span class="hljs-keyword">as</span> bnb
<span class="hljs-keyword">from</span> bnb.nn <span class="hljs-keyword">import</span> Linear8bitLt
fp16_model = nn.Sequential(
nn.Linear(<span class="hljs-number">64</span>, <span class="hljs-number">64</span>),
nn.Linear(<span class="hljs-number">64</span>, <span class="hljs-number">64</span>)
)
int8_model = nn.Sequential(
Linear8bitLt(<span class="hljs-number">64</span>, <span class="hljs-number">64</span>, has_fp16_weights=<span class="hljs-literal">False</span>),
Linear8bitLt(<span class="hljs-number">64</span>, <span class="hljs-number">64</span>, has_fp16_weights=<span class="hljs-literal">False</span>)
)
int8_model.load_state_dict(fp16_model.state_dict())
int8_model = int8_model.to(<span class="hljs-number">0</span>) <span class="hljs-comment"># Quantization happens here</span>`,wrap:!1}}),{c(){r=m("p"),r.textContent=x,h=s(),_(b.$$.fragment)},l(a){r=d(a,"P",{"data-svelte-h":!0}),G(r)!=="svelte-11lpom8"&&(r.textContent=x),h=l(a),$(b.$$.fragment,a)},m(a,f){i(a,r,f),i(a,h,f),g(b,a,f),c=!0},p:gt,i(a){c||(v(b.$$.fragment,a),c=!0)},o(a){y(b.$$.fragment,a),c=!1},d(a){a&&(n(r),n(h)),w(b,a)}}}function xt(Q){let r,x,h,b,c,a,f,dt='<a href="https://hf.co/papers/2208.07339" rel="nofollow">LLM.int8()</a> is a quantization method that aims to make large language model inference more accessible without significant degradation. Unlike naive 8-bit quantization, which can result in loss of critical information and accuracy, LLM.int8() dynamically adapts to ensure sensitive components of the computation retain higher precision when needed. The key is to extract the outliers from the inputs and weights and multiply them in 16-bit. All other values are multiplied in 8-bit before being dequantized back to 16-bits. The outputs from the 16-bit and 8-bit multiplication are combined to produce the final output.',X,J,ut='<a href="../../explanations/resources#llm-int8">Further Resources</a>',H,I,R,o,E,at,P,bt=`This class is the base module for the <a href="https://arxiv.org/abs/2208.07339" rel="nofollow">LLM.int8()</a> algorithm.
To read more about it, have a look at the paper.`,st,Z,ct=`In order to quantize a linear layer one should first load the original fp16 / bf16 weights into
the Linear8bitLt module, then call <code>int8_module.to(&quot;cuda&quot;)</code> to quantize the fp16 weights.`,lt,L,it,T,k,rt,F,ft="Initialize Linear8bitLt class.",V,B,Y,M,C,ot,j,N,pt,U,ht="Initialize self. See help(type(self)) for accurate signature.",q,W,S,A,O;return c=new mt({props:{title:"LLM.int8()",local:"llmint8",headingTag:"h1"}}),I=new mt({props:{title:"Linear8bitLt",local:"bitsandbytes.nn.Linear8bitLt",headingTag:"h2"}}),E=new nt({props:{name:"class bitsandbytes.nn.Linear8bitLt",anchor:"bitsandbytes.nn.Linear8bitLt",parameters:[{name:"input_features",val:": int"},{name:"output_features",val:": int"},{name:"bias",val:" = True"},{name:"has_fp16_weights",val:" = True"},{name:"threshold",val:" = 0.0"},{name:"index",val:" = None"},{name:"device",val:" = None"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1501/bitsandbytes/nn/modules.py#L846"}}),L=new Lt({props:{anchor:"bitsandbytes.nn.Linear8bitLt.example",$$slots:{default:[jt]},$$scope:{ctx:Q}}}),k=new nt({props:{name:"__init__",anchor:"bitsandbytes.nn.Linear8bitLt.__init__",parameters:[{name:"input_features",val:": int"},{name:"output_features",val:": int"},{name:"bias",val:" = True"},{name:"has_fp16_weights",val:" = True"},{name:"threshold",val:" = 0.0"},{name:"index",val:" = None"},{name:"device",val:" = None"}],parametersDescription:[{anchor:"bitsandbytes.nn.Linear8bitLt.__init__.input_features",description:`<strong>input_features</strong> (<code>int</code>) &#x2014;
Number of input features of the linear layer.`,name:"input_features"},{anchor:"bitsandbytes.nn.Linear8bitLt.__init__.output_features",description:`<strong>output_features</strong> (<code>int</code>) &#x2014;
Number of output features of the linear layer.`,name:"output_features"},{anchor:"bitsandbytes.nn.Linear8bitLt.__init__.bias",description:`<strong>bias</strong> (<code>bool</code>, defaults to <code>True</code>) &#x2014;
Whether the linear class uses the bias term as well.`,name:"bias"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1501/bitsandbytes/nn/modules.py#L878"}}),B=new mt({props:{title:"Int8Params",local:"bitsandbytes.nn.Int8Params",headingTag:"h2"}}),C=new nt({props:{name:"class bitsandbytes.nn.Int8Params",anchor:"bitsandbytes.nn.Int8Params",parameters:[{name:"data",val:": typing.Optional[torch.Tensor] = None"},{name:"requires_grad",val:" = True"},{name:"has_fp16_weights",val:" = False"},{name:"CB",val:": typing.Optional[torch.Tensor] = None"},{name:"SCB",val:": typing.Optional[torch.Tensor] = None"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1501/bitsandbytes/nn/modules.py#L566"}}),N=new nt({props:{name:"__init__",anchor:"bitsandbytes.nn.Int8Params.__init__",parameters:[{name:"*args",val:""},{name:"**kwargs",val:""}]}}),W=new Tt({props:{source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/main/docs/source/reference/nn/linear8bit.mdx"}}),{c(){r=m("meta"),x=s(),h=m("p"),b=s(),_(c.$$.fragment),a=s(),f=m("p"),f.innerHTML=dt,X=s(),J=m("p"),J.innerHTML=ut,H=s(),_(I.$$.fragment),R=s(),o=m("div"),_(E.$$.fragment),at=s(),P=m("p"),P.innerHTML=bt,st=s(),Z=m("p"),Z.innerHTML=ct,lt=s(),_(L.$$.fragment),it=s(),T=m("div"),_(k.$$.fragment),rt=s(),F=m("p"),F.textContent=ft,V=s(),_(B.$$.fragment),Y=s(),M=m("div"),_(C.$$.fragment),ot=s(),j=m("div"),_(N.$$.fragment),pt=s(),U=m("p"),U.textContent=ht,q=s(),_(W.$$.fragment),S=s(),A=m("p"),this.h()},l(t){const e=wt("svelte-u9bgzb",document.head);r=d(e,"META",{name:!0,content:!0}),e.forEach(n),x=l(t),h=d(t,"P",{}),z(h).forEach(n),b=l(t),$(c.$$.fragment,t),a=l(t),f=d(t,"P",{"data-svelte-h":!0}),G(f)!=="svelte-3b0d0i"&&(f.innerHTML=dt),X=l(t),J=d(t,"P",{"data-svelte-h":!0}),G(J)!=="svelte-1a4aty"&&(J.innerHTML=ut),H=l(t),$(I.$$.fragment,t),R=l(t),o=d(t,"DIV",{class:!0});var u=z(o);$(E.$$.fragment,u),at=l(u),P=d(u,"P",{"data-svelte-h":!0}),G(P)!=="svelte-xo5v9s"&&(P.innerHTML=bt),st=l(u),Z=d(u,"P",{"data-svelte-h":!0}),G(Z)!=="svelte-13qed9e"&&(Z.innerHTML=ct),lt=l(u),$(L.$$.fragment,u),it=l(u),T=d(u,"DIV",{class:!0});var K=z(T);$(k.$$.fragment,K),rt=l(K),F=d(K,"P",{"data-svelte-h":!0}),G(F)!=="svelte-p49gdy"&&(F.textContent=ft),K.forEach(n),u.forEach(n),V=l(t),$(B.$$.fragment,t),Y=l(t),M=d(t,"DIV",{class:!0});var tt=z(M);$(C.$$.fragment,tt),ot=l(tt),j=d(tt,"DIV",{class:!0});var et=z(j);$(N.$$.fragment,et),pt=l(et),U=d(et,"P",{"data-svelte-h":!0}),G(U)!=="svelte-gef1cn"&&(U.textContent=ht),et.forEach(n),tt.forEach(n),q=l(t),$(W.$$.fragment,t),S=l(t),A=d(t,"P",{}),z(A).forEach(n),this.h()},h(){D(r,"name","hf:doc:metadata"),D(r,"content",Jt),D(T,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),D(o,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),D(j,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),D(M,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(t,e){p(document.head,r),i(t,x,e),i(t,h,e),i(t,b,e),g(c,t,e),i(t,a,e),i(t,f,e),i(t,X,e),i(t,J,e),i(t,H,e),g(I,t,e),i(t,R,e),i(t,o,e),g(E,o,null),p(o,at),p(o,P),p(o,st),p(o,Z),p(o,lt),g(L,o,null),p(o,it),p(o,T),g(k,T,null),p(T,rt),p(T,F),i(t,V,e),g(B,t,e),i(t,Y,e),i(t,M,e),g(C,M,null),p(M,ot),p(M,j),g(N,j,null),p(j,pt),p(j,U),i(t,q,e),g(W,t,e),i(t,S,e),i(t,A,e),O=!0},p(t,[e]){const u={};e&2&&(u.$$scope={dirty:e,ctx:t}),L.$set(u)},i(t){O||(v(c.$$.fragment,t),v(I.$$.fragment,t),v(E.$$.fragment,t),v(L.$$.fragment,t),v(k.$$.fragment,t),v(B.$$.fragment,t),v(C.$$.fragment,t),v(N.$$.fragment,t),v(W.$$.fragment,t),O=!0)},o(t){y(c.$$.fragment,t),y(I.$$.fragment,t),y(E.$$.fragment,t),y(L.$$.fragment,t),y(k.$$.fragment,t),y(B.$$.fragment,t),y(C.$$.fragment,t),y(N.$$.fragment,t),y(W.$$.fragment,t),O=!1},d(t){t&&(n(x),n(h),n(b),n(a),n(f),n(X),n(J),n(H),n(R),n(o),n(V),n(Y),n(M),n(q),n(S),n(A)),n(r),w(c,t),w(I,t),w(E),w(L),w(k),w(B,t),w(C),w(N),w(W,t)}}}const Jt='{"title":"LLM.int8()","local":"llmint8","sections":[{"title":"Linear8bitLt","local":"bitsandbytes.nn.Linear8bitLt","sections":[],"depth":2},{"title":"Int8Params","local":"bitsandbytes.nn.Int8Params","sections":[],"depth":2}],"depth":1}';function It(Q){return $t(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Gt extends vt{constructor(r){super(),yt(this,r,It,xt,_t,{})}}export{Gt as component};

Xet Storage Details

Size:
10.1 kB
·
Xet hash:
afa1029c88ba9b323995e088de813fe9b762d969192ae9de8104142a97e61c09

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.