Buckets:

download
raw
9.74 kB
import{s as ct,o as ft,n as ht}from"../chunks/scheduler.852ec091.js";import{S as _t,i as $t,g as d,s as l,r as _,A as gt,h as b,f as n,c as i,j as F,u as $,x as q,k as Z,y as m,a as r,v as g,d as v,t as w,w as y}from"../chunks/index.28275fd3.js";import{D as tt}from"../chunks/Docstring.ee6c313e.js";import{C as vt}from"../chunks/CodeBlock.c3366071.js";import{E as wt}from"../chunks/ExampleCodeBlock.00f06ed4.js";import{H as ot,E as yt}from"../chunks/EditOnGithub.582011f0.js";function Mt(U){let s,x="Example:",h,u,c;return u=new vt({props:{code:"aW1wb3J0JTIwdG9yY2glMEFpbXBvcnQlMjB0b3JjaC5ubiUyMGFzJTIwbm4lMEElMEFpbXBvcnQlMjBiaXRzYW5kYnl0ZXMlMjBhcyUyMGJuYiUwQWZyb20lMjBibmIubm4lMjBpbXBvcnQlMjBMaW5lYXI4Yml0THQlMEElMEFmcDE2X21vZGVsJTIwJTNEJTIwbm4uU2VxdWVudGlhbCglMEElMjAlMjAlMjAlMjBubi5MaW5lYXIoNjQlMkMlMjA2NCklMkMlMEElMjAlMjAlMjAlMjBubi5MaW5lYXIoNjQlMkMlMjA2NCklMEEpJTBBJTBBaW50OF9tb2RlbCUyMCUzRCUyMG5uLlNlcXVlbnRpYWwoJTBBJTIwJTIwJTIwJTIwTGluZWFyOGJpdEx0KDY0JTJDJTIwNjQlMkMlMjBoYXNfZnAxNl93ZWlnaHRzJTNERmFsc2UpJTJDJTBBJTIwJTIwJTIwJTIwTGluZWFyOGJpdEx0KDY0JTJDJTIwNjQlMkMlMjBoYXNfZnAxNl93ZWlnaHRzJTNERmFsc2UpJTBBKSUwQSUwQWludDhfbW9kZWwubG9hZF9zdGF0ZV9kaWN0KGZwMTZfbW9kZWwuc3RhdGVfZGljdCgpKSUwQWludDhfbW9kZWwlMjAlM0QlMjBpbnQ4X21vZGVsLnRvKDApJTIwJTIzJTIwUXVhbnRpemF0aW9uJTIwaGFwcGVucyUyMGhlcmU=",highlighted:`<span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">import</span> torch.nn <span class="hljs-keyword">as</span> nn
<span class="hljs-keyword">import</span> bitsandbytes <span class="hljs-keyword">as</span> bnb
<span class="hljs-keyword">from</span> bnb.nn <span class="hljs-keyword">import</span> Linear8bitLt
fp16_model = nn.Sequential(
nn.Linear(<span class="hljs-number">64</span>, <span class="hljs-number">64</span>),
nn.Linear(<span class="hljs-number">64</span>, <span class="hljs-number">64</span>)
)
int8_model = nn.Sequential(
Linear8bitLt(<span class="hljs-number">64</span>, <span class="hljs-number">64</span>, has_fp16_weights=<span class="hljs-literal">False</span>),
Linear8bitLt(<span class="hljs-number">64</span>, <span class="hljs-number">64</span>, has_fp16_weights=<span class="hljs-literal">False</span>)
)
int8_model.load_state_dict(fp16_model.state_dict())
int8_model = int8_model.to(<span class="hljs-number">0</span>) <span class="hljs-comment"># Quantization happens here</span>`,wrap:!1}}),{c(){s=d("p"),s.textContent=x,h=l(),_(u.$$.fragment)},l(a){s=b(a,"P",{"data-svelte-h":!0}),q(s)!=="svelte-11lpom8"&&(s.textContent=x),h=i(a),$(u.$$.fragment,a)},m(a,f){r(a,s,f),r(a,h,f),g(u,a,f),c=!0},p:ht,i(a){c||(v(u.$$.fragment,a),c=!0)},o(a){w(u.$$.fragment,a),c=!1},d(a){a&&(n(s),n(h)),y(u,a)}}}function Tt(U){let s,x,h,u,c,a,f,mt='<a href="https://hf.co/papers/2208.07339" rel="nofollow">LLM.int8()</a> is a quantization method that doesn’t degrade performance which makes large model inference more accessible. The key is to extract the outliers from the inputs and weights and multiply them in 16-bit. All other values are multiplied in 8-bit and quantized to Int8 before being dequantized back to 16-bits. The outputs from the 16-bit and 8-bit multiplication are combined to produce the final output.',A,I,Q,o,J,et,W,pt=`This class is the base module for the <a href="https://arxiv.org/abs/2208.07339" rel="nofollow">LLM.int8()</a> algorithm.
To read more about it, have a look at the paper.`,nt,z,dt=`In order to quantize a linear layer one should first load the original fp16 / bf16 weights into
the Linear8bitLt module, then call <code>int8_module.to(&quot;cuda&quot;)</code> to quantize the fp16 weights.`,at,T,st,L,k,lt,G,bt="Initialize Linear8bitLt class.",X,E,V,M,B,it,j,C,rt,P,ut="Initialize self. See help(type(self)) for accurate signature.",Y,N,R,D,H;return c=new ot({props:{title:"8-bit quantization",local:"8-bit-quantization",headingTag:"h1"}}),I=new ot({props:{title:"Linear8bitLt",local:"bitsandbytes.nn.Linear8bitLt",headingTag:"h2"}}),J=new tt({props:{name:"class bitsandbytes.nn.Linear8bitLt",anchor:"bitsandbytes.nn.Linear8bitLt",parameters:[{name:"input_features",val:": int"},{name:"output_features",val:": int"},{name:"bias",val:" = True"},{name:"has_fp16_weights",val:" = True"},{name:"memory_efficient_backward",val:" = False"},{name:"threshold",val:" = 0.0"},{name:"index",val:" = None"},{name:"device",val:" = None"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1137/bitsandbytes/nn/modules.py#L853"}}),T=new wt({props:{anchor:"bitsandbytes.nn.Linear8bitLt.example",$$slots:{default:[Mt]},$$scope:{ctx:U}}}),k=new tt({props:{name:"__init__",anchor:"bitsandbytes.nn.Linear8bitLt.__init__",parameters:[{name:"input_features",val:": int"},{name:"output_features",val:": int"},{name:"bias",val:" = True"},{name:"has_fp16_weights",val:" = True"},{name:"memory_efficient_backward",val:" = False"},{name:"threshold",val:" = 0.0"},{name:"index",val:" = None"},{name:"device",val:" = None"}],parametersDescription:[{anchor:"bitsandbytes.nn.Linear8bitLt.__init__.input_features",description:`<strong>input_features</strong> (<code>int</code>) &#x2014;
Number of input features of the linear layer.`,name:"input_features"},{anchor:"bitsandbytes.nn.Linear8bitLt.__init__.output_features",description:`<strong>output_features</strong> (<code>int</code>) &#x2014;
Number of output features of the linear layer.`,name:"output_features"},{anchor:"bitsandbytes.nn.Linear8bitLt.__init__.bias",description:`<strong>bias</strong> (<code>bool</code>, defaults to <code>True</code>) &#x2014;
Whether the linear class uses the bias term as well.`,name:"bias"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1137/bitsandbytes/nn/modules.py#L885"}}),E=new ot({props:{title:"Int8Params",local:"bitsandbytes.nn.Int8Params",headingTag:"h2"}}),B=new tt({props:{name:"class bitsandbytes.nn.Int8Params",anchor:"bitsandbytes.nn.Int8Params",parameters:[{name:"data",val:" = None"},{name:"requires_grad",val:" = True"},{name:"has_fp16_weights",val:" = False"},{name:"CB",val:" = None"},{name:"SCB",val:" = None"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1137/bitsandbytes/nn/modules.py#L570"}}),C=new tt({props:{name:"__init__",anchor:"bitsandbytes.nn.Int8Params.__init__",parameters:[{name:"*args",val:""},{name:"**kwargs",val:""}]}}),N=new yt({props:{source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/main/docs/source/reference/nn/linear8bit.mdx"}}),{c(){s=d("meta"),x=l(),h=d("p"),u=l(),_(c.$$.fragment),a=l(),f=d("p"),f.innerHTML=mt,A=l(),_(I.$$.fragment),Q=l(),o=d("div"),_(J.$$.fragment),et=l(),W=d("p"),W.innerHTML=pt,nt=l(),z=d("p"),z.innerHTML=dt,at=l(),_(T.$$.fragment),st=l(),L=d("div"),_(k.$$.fragment),lt=l(),G=d("p"),G.textContent=bt,X=l(),_(E.$$.fragment),V=l(),M=d("div"),_(B.$$.fragment),it=l(),j=d("div"),_(C.$$.fragment),rt=l(),P=d("p"),P.textContent=ut,Y=l(),_(N.$$.fragment),R=l(),D=d("p"),this.h()},l(t){const e=gt("svelte-u9bgzb",document.head);s=b(e,"META",{name:!0,content:!0}),e.forEach(n),x=i(t),h=b(t,"P",{}),F(h).forEach(n),u=i(t),$(c.$$.fragment,t),a=i(t),f=b(t,"P",{"data-svelte-h":!0}),q(f)!=="svelte-6ius2o"&&(f.innerHTML=mt),A=i(t),$(I.$$.fragment,t),Q=i(t),o=b(t,"DIV",{class:!0});var p=F(o);$(J.$$.fragment,p),et=i(p),W=b(p,"P",{"data-svelte-h":!0}),q(W)!=="svelte-xo5v9s"&&(W.innerHTML=pt),nt=i(p),z=b(p,"P",{"data-svelte-h":!0}),q(z)!=="svelte-13qed9e"&&(z.innerHTML=dt),at=i(p),$(T.$$.fragment,p),st=i(p),L=b(p,"DIV",{class:!0});var S=F(L);$(k.$$.fragment,S),lt=i(S),G=b(S,"P",{"data-svelte-h":!0}),q(G)!=="svelte-p49gdy"&&(G.textContent=bt),S.forEach(n),p.forEach(n),X=i(t),$(E.$$.fragment,t),V=i(t),M=b(t,"DIV",{class:!0});var K=F(M);$(B.$$.fragment,K),it=i(K),j=b(K,"DIV",{class:!0});var O=F(j);$(C.$$.fragment,O),rt=i(O),P=b(O,"P",{"data-svelte-h":!0}),q(P)!=="svelte-gef1cn"&&(P.textContent=ut),O.forEach(n),K.forEach(n),Y=i(t),$(N.$$.fragment,t),R=i(t),D=b(t,"P",{}),F(D).forEach(n),this.h()},h(){Z(s,"name","hf:doc:metadata"),Z(s,"content",Lt),Z(L,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),Z(o,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),Z(j,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),Z(M,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(t,e){m(document.head,s),r(t,x,e),r(t,h,e),r(t,u,e),g(c,t,e),r(t,a,e),r(t,f,e),r(t,A,e),g(I,t,e),r(t,Q,e),r(t,o,e),g(J,o,null),m(o,et),m(o,W),m(o,nt),m(o,z),m(o,at),g(T,o,null),m(o,st),m(o,L),g(k,L,null),m(L,lt),m(L,G),r(t,X,e),g(E,t,e),r(t,V,e),r(t,M,e),g(B,M,null),m(M,it),m(M,j),g(C,j,null),m(j,rt),m(j,P),r(t,Y,e),g(N,t,e),r(t,R,e),r(t,D,e),H=!0},p(t,[e]){const p={};e&2&&(p.$$scope={dirty:e,ctx:t}),T.$set(p)},i(t){H||(v(c.$$.fragment,t),v(I.$$.fragment,t),v(J.$$.fragment,t),v(T.$$.fragment,t),v(k.$$.fragment,t),v(E.$$.fragment,t),v(B.$$.fragment,t),v(C.$$.fragment,t),v(N.$$.fragment,t),H=!0)},o(t){w(c.$$.fragment,t),w(I.$$.fragment,t),w(J.$$.fragment,t),w(T.$$.fragment,t),w(k.$$.fragment,t),w(E.$$.fragment,t),w(B.$$.fragment,t),w(C.$$.fragment,t),w(N.$$.fragment,t),H=!1},d(t){t&&(n(x),n(h),n(u),n(a),n(f),n(A),n(Q),n(o),n(X),n(V),n(M),n(Y),n(R),n(D)),n(s),y(c,t),y(I,t),y(J),y(T),y(k),y(E,t),y(B),y(C),y(N,t)}}}const Lt='{"title":"8-bit quantization","local":"8-bit-quantization","sections":[{"title":"Linear8bitLt","local":"bitsandbytes.nn.Linear8bitLt","sections":[],"depth":2},{"title":"Int8Params","local":"bitsandbytes.nn.Int8Params","sections":[],"depth":2}],"depth":1}';function jt(U){return ft(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Ct extends _t{constructor(s){super(),$t(this,s,jt,Tt,ct,{})}}export{Ct as component};

Xet Storage Details

Size:
9.74 kB
·
Xet hash:
196bb4b1d3604451ae96c25ee4d985eaf0e7af6eb1822344c918be9c248cfe2f

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.