Buckets:

hf-doc-build
/

doc-dev

hf-doc-build/doc-dev / bitsandbytes /pr_1137 /en /_app /immutable /nodes /16.6c8c485e.js

14.4 kB

	import{s as Gt,o as Xt,n as Yt}from"../chunks/scheduler.852ec091.js";import{S as Zt,i as Vt,g as l,s as a,r as b,A as Ht,h as o,f as n,c as s,j as F,u as c,x as L,k as q,y as i,a as r,v as f,d as h,t as _,w as $}from"../chunks/index.28275fd3.js";import{D as et}from"../chunks/Docstring.ee6c313e.js";import{C as At}from"../chunks/CodeBlock.c3366071.js";import{E as St}from"../chunks/ExampleCodeBlock.00f06ed4.js";import{H as at,E as Kt}from"../chunks/EditOnGithub.582011f0.js";function Ot(st){let m,J="Example:",x,g,v;return g=new At({props:{code:"aW1wb3J0JTIwdG9yY2glMEFpbXBvcnQlMjB0b3JjaC5ubiUyMGFzJTIwbm4lMEElMEFpbXBvcnQlMjBiaXRzYW5kYnl0ZXMlMjBhcyUyMGJuYiUwQWZyb20lMjBibmIubm4lMjBpbXBvcnQlMjBMaW5lYXI0Yml0JTBBJTBBZnAxNl9tb2RlbCUyMCUzRCUyMG5uLlNlcXVlbnRpYWwoJTBBJTIwJTIwJTIwJTIwbm4uTGluZWFyKDY0JTJDJTIwNjQpJTJDJTBBJTIwJTIwJTIwJTIwbm4uTGluZWFyKDY0JTJDJTIwNjQpJTBBKSUwQSUwQXF1YW50aXplZF9tb2RlbCUyMCUzRCUyMG5uLlNlcXVlbnRpYWwoJTBBJTIwJTIwJTIwJTIwTGluZWFyNGJpdCg2NCUyQyUyMDY0KSUyQyUwQSUyMCUyMCUyMCUyMExpbmVhcjRiaXQoNjQlMkMlMjA2NCklMEEpJTBBJTBBcXVhbnRpemVkX21vZGVsLmxvYWRfc3RhdGVfZGljdChmcDE2X21vZGVsLnN0YXRlX2RpY3QoKSklMEFxdWFudGl6ZWRfbW9kZWwlMjAlM0QlMjBxdWFudGl6ZWRfbW9kZWwudG8oMCklMjAlMjMlMjBRdWFudGl6YXRpb24lMjBoYXBwZW5zJTIwaGVyZQ==",highlighted:`<span class="hljs-keyword">import</span> torch
	<span class="hljs-keyword">import</span> torch.nn <span class="hljs-keyword">as</span> nn

	<span class="hljs-keyword">import</span> bitsandbytes <span class="hljs-keyword">as</span> bnb
	<span class="hljs-keyword">from</span> bnb.nn <span class="hljs-keyword">import</span> Linear4bit

	fp16_model = nn.Sequential(
	nn.Linear(<span class="hljs-number">64</span>, <span class="hljs-number">64</span>),
	nn.Linear(<span class="hljs-number">64</span>, <span class="hljs-number">64</span>)
	)

	quantized_model = nn.Sequential(
	Linear4bit(<span class="hljs-number">64</span>, <span class="hljs-number">64</span>),
	Linear4bit(<span class="hljs-number">64</span>, <span class="hljs-number">64</span>)
	)

	quantized_model.load_state_dict(fp16_model.state_dict())
	quantized_model = quantized_model.to(<span class="hljs-number">0</span>) <span class="hljs-comment"># Quantization happens here</span>`,wrap:!1}}),{c(){m=l("p"),m.textContent=J,x=a(),b(g.$$.fragment)},l(d){m=o(d,"P",{"data-svelte-h":!0}),L(m)!=="svelte-11lpom8"&&(m.textContent=J),x=s(d),c(g.$$.fragment,d)},m(d,T){r(d,m,T),r(d,x,T),f(g,d,T),v=!0},p:Yt,i(d){v\|\|(h(g.$$.fragment,d),v=!0)},o(d){_(g.$$.fragment,d),v=!1},d(d){d&&(n(m),n(x)),$(g,d)}}}function te(st){let m,J,x,g,v,d,T,Jt='<a href="https://hf.co/papers/2305.14314" rel="nofollow">QLoRA</a> is a finetuning method that quantizes a model to 4-bits and adds a set of low-rank adaptation (LoRA) weights to the model and tuning them through the quantized weights. This method also introduces a new data type, 4-bit NormalFloat (<code>LinearNF4</code>) in addition to the standard Float4 data type (<code>LinearFP4</code>). <code>LinearNF4</code> is a quantization data type for normally distributed data and can improve performance.',it,z,rt,u,B,gt,Y,zt=`This class is the base module for the 4-bit quantization algorithm presented in <a href="https://arxiv.org/abs/2305.14314" rel="nofollow">QLoRA</a>.
	QLoRA 4-bit linear layers uses blockwise k-bit quantization under the hood, with the possibility of selecting various
	compute datatypes such as FP4 and NF4.`,vt,Z,Bt=`In order to quantize a linear layer one should first load the original fp16 / bf16 weights into
	the Linear4bit module, then call <code>quantized_module.to("cuda")</code> to quantize the fp16 / bf16 weights.`,wt,C,Lt,N,I,Tt,V,It="Initialize Linear4bit class.",lt,P,ot,k,Pt="[[autdodoc]] bitsandbytes.nn.LinearFP4",dt,E,kt="<li><strong>init</strong></li>",mt,R,pt,p,W,xt,H,Et="Implements the NF4 data type.",Mt,A,Rt=`Constructs a quantization data type where each bin has equal area under a standard normal distribution N(0, 1) that
	is normalized into the range [-1, 1].`,Ct,S,Wt='For more information read the paper: QLoRA: Efficient Finetuning of Quantized LLMs (<a href="https://arxiv.org/abs/2305.14314" rel="nofollow">https://arxiv.org/abs/2305.14314</a>)',Nt,K,Qt=`Implementation of the NF4 data type in bitsandbytes can be found in the <code>create_normal_map</code> function in
	the <code>functional.py</code> file: <a href="https://github.com/TimDettmers/bitsandbytes/blob/main/bitsandbytes/functional.py#L236" rel="nofollow">https://github.com/TimDettmers/bitsandbytes/blob/main/bitsandbytes/functional.py#L236</a>.`,jt,O,Q,ut,U,bt,M,D,Ft,j,G,qt,tt,Ut="Initialize self. See help(type(self)) for accurate signature.",ct,X,ft,nt,ht;return v=new at({props:{title:"4-bit quantization",local:"4-bit-quantization",headingTag:"h1"}}),z=new at({props:{title:"Linear4bit",local:"bitsandbytes.nn.Linear4bit",headingTag:"h2"}}),B=new et({props:{name:"class bitsandbytes.nn.Linear4bit",anchor:"bitsandbytes.nn.Linear4bit",parameters:[{name:"input_features",val:""},{name:"output_features",val:""},{name:"bias",val:" = True"},{name:"compute_dtype",val:" = None"},{name:"compress_statistics",val:" = True"},{name:"quant_type",val:" = 'fp4'"},{name:"quant_storage",val:" = torch.uint8"},{name:"device",val:" = None"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1137/bitsandbytes/nn/modules.py#L367"}}),C=new St({props:{anchor:"bitsandbytes.nn.Linear4bit.example",$$slots:{default:[Ot]},$$scope:{ctx:st}}}),I=new et({props:{name:"__init__",anchor:"bitsandbytes.nn.Linear4bit.__init__",parameters:[{name:"input_features",val:""},{name:"output_features",val:""},{name:"bias",val:" = True"},{name:"compute_dtype",val:" = None"},{name:"compress_statistics",val:" = True"},{name:"quant_type",val:" = 'fp4'"},{name:"quant_storage",val:" = torch.uint8"},{name:"device",val:" = None"}],parametersDescription:[{anchor:"bitsandbytes.nn.Linear4bit.__init__.input_features",description:`<strong>input_features</strong> (<code>str</code>) —
	Number of input features of the linear layer.`,name:"input_features"},{anchor:"bitsandbytes.nn.Linear4bit.__init__.output_features",description:`<strong>output_features</strong> (<code>str</code>) —
	Number of output features of the linear layer.`,name:"output_features"},{anchor:"bitsandbytes.nn.Linear4bit.__init__.bias",description:`<strong>bias</strong> (<code>bool</code>, defaults to <code>True</code>) —
	Whether the linear class uses the bias term as well.`,name:"bias"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1137/bitsandbytes/nn/modules.py#L400"}}),P=new at({props:{title:"LinearFP4",local:"linearfp4",headingTag:"h2"}}),R=new at({props:{title:"LinearNF4",local:"bitsandbytes.nn.LinearNF4",headingTag:"h2"}}),W=new et({props:{name:"class bitsandbytes.nn.LinearNF4",anchor:"bitsandbytes.nn.LinearNF4",parameters:[{name:"input_features",val:""},{name:"output_features",val:""},{name:"bias",val:" = True"},{name:"compute_dtype",val:" = None"},{name:"compress_statistics",val:" = True"},{name:"quant_storage",val:" = torch.uint8"},{name:"device",val:" = None"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1137/bitsandbytes/nn/modules.py#L527"}}),Q=new et({props:{name:"__init__",anchor:"bitsandbytes.nn.LinearNF4.__init__",parameters:[{name:"input_features",val:""},{name:"output_features",val:""},{name:"bias",val:" = True"},{name:"compute_dtype",val:" = None"},{name:"compress_statistics",val:" = True"},{name:"quant_storage",val:" = torch.uint8"},{name:"device",val:" = None"}],parametersDescription:[{anchor:"bitsandbytes.nn.LinearNF4.__init__.input_features",description:`<strong>input_features</strong> (<code>str</code>) —
	Number of input features of the linear layer.`,name:"input_features"},{anchor:"bitsandbytes.nn.LinearNF4.__init__.output_features",description:`<strong>output_features</strong> (<code>str</code>) —
	Number of output features of the linear layer.`,name:"output_features"},{anchor:"bitsandbytes.nn.LinearNF4.__init__.bias",description:`<strong>bias</strong> (<code>bool</code>, defaults to <code>True</code>) —
	Whether the linear class uses the bias term as well.`,name:"bias"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1137/bitsandbytes/nn/modules.py#L539"}}),U=new at({props:{title:"Params4bit",local:"bitsandbytes.nn.Params4bit",headingTag:"h2"}}),D=new et({props:{name:"class bitsandbytes.nn.Params4bit",anchor:"bitsandbytes.nn.Params4bit",parameters:[{name:"data",val:": Optional = None"},{name:"requires_grad",val:" = False"},{name:"quant_state",val:": Optional = None"},{name:"blocksize",val:": int = 64"},{name:"compress_statistics",val:": bool = True"},{name:"quant_type",val:": str = 'fp4'"},{name:"quant_storage",val:": dtype = torch.uint8"},{name:"module",val:": Optional = None"},{name:"bnb_quantized",val:": bool = False"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1137/bitsandbytes/nn/modules.py#L211"}}),G=new et({props:{name:"__init__",anchor:"bitsandbytes.nn.Params4bit.__init__",parameters:[{name:"args",val:""},{name:"*kwargs",val:""}]}}),X=new Kt({props:{source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/main/docs/source/reference/nn/linear4bit.mdx"}}),{c(){m=l("meta"),J=a(),x=l("p"),g=a(),b(v.$$.fragment),d=a(),T=l("p"),T.innerHTML=Jt,it=a(),b(z.$$.fragment),rt=a(),u=l("div"),b(B.$$.fragment),gt=a(),Y=l("p"),Y.innerHTML=zt,vt=a(),Z=l("p"),Z.innerHTML=Bt,wt=a(),b(C.$$.fragment),Lt=a(),N=l("div"),b(I.$$.fragment),Tt=a(),V=l("p"),V.textContent=It,lt=a(),b(P.$$.fragment),ot=a(),k=l("p"),k.textContent=Pt,dt=a(),E=l("ul"),E.innerHTML=kt,mt=a(),b(R.$$.fragment),pt=a(),p=l("div"),b(W.$$.fragment),xt=a(),H=l("p"),H.textContent=Et,Mt=a(),A=l("p"),A.textContent=Rt,Ct=a(),S=l("p"),S.innerHTML=Wt,Nt=a(),K=l("p"),K.innerHTML=Qt,jt=a(),O=l("div"),b(Q.$$.fragment),ut=a(),b(U.$$.fragment),bt=a(),M=l("div"),b(D.$$.fragment),Ft=a(),j=l("div"),b(G.$$.fragment),qt=a(),tt=l("p"),tt.textContent=Ut,ct=a(),b(X.$$.fragment),ft=a(),nt=l("p"),this.h()},l(t){const e=Ht("svelte-u9bgzb",document.head);m=o(e,"META",{name:!0,content:!0}),e.forEach(n),J=s(t),x=o(t,"P",{}),F(x).forEach(n),g=s(t),c(v.$$.fragment,t),d=s(t),T=o(t,"P",{"data-svelte-h":!0}),L(T)!=="svelte-j7lmsi"&&(T.innerHTML=Jt),it=s(t),c(z.$$.fragment,t),rt=s(t),u=o(t,"DIV",{class:!0});var y=F(u);c(B.$$.fragment,y),gt=s(y),Y=o(y,"P",{"data-svelte-h":!0}),L(Y)!=="svelte-b56pxf"&&(Y.innerHTML=zt),vt=s(y),Z=o(y,"P",{"data-svelte-h":!0}),L(Z)!=="svelte-yrth4q"&&(Z.innerHTML=Bt),wt=s(y),c(C.$$.fragment,y),Lt=s(y),N=o(y,"DIV",{class:!0});var _t=F(N);c(I.$$.fragment,_t),Tt=s(_t),V=o(_t,"P",{"data-svelte-h":!0}),L(V)!=="svelte-9bim4q"&&(V.textContent=It),_t.forEach(n),y.forEach(n),lt=s(t),c(P.$$.fragment,t),ot=s(t),k=o(t,"P",{"data-svelte-h":!0}),L(k)!=="svelte-5mwpka"&&(k.textContent=Pt),dt=s(t),E=o(t,"UL",{"data-svelte-h":!0}),L(E)!=="svelte-16exli2"&&(E.innerHTML=kt),mt=s(t),c(R.$$.fragment,t),pt=s(t),p=o(t,"DIV",{class:!0});var w=F(p);c(W.$$.fragment,w),xt=s(w),H=o(w,"P",{"data-svelte-h":!0}),L(H)!=="svelte-5d8bnh"&&(H.textContent=Et),Mt=s(w),A=o(w,"P",{"data-svelte-h":!0}),L(A)!=="svelte-4zan40"&&(A.textContent=Rt),Ct=s(w),S=o(w,"P",{"data-svelte-h":!0}),L(S)!=="svelte-1ditbm8"&&(S.innerHTML=Wt),Nt=s(w),K=o(w,"P",{"data-svelte-h":!0}),L(K)!=="svelte-weczdq"&&(K.innerHTML=Qt),jt=s(w),O=o(w,"DIV",{class:!0});var Dt=F(O);c(Q.$$.fragment,Dt),Dt.forEach(n),w.forEach(n),ut=s(t),c(U.$$.fragment,t),bt=s(t),M=o(t,"DIV",{class:!0});var $t=F(M);c(D.$$.fragment,$t),Ft=s($t),j=o($t,"DIV",{class:!0});var yt=F(j);c(G.$$.fragment,yt),qt=s(yt),tt=o(yt,"P",{"data-svelte-h":!0}),L(tt)!=="svelte-gef1cn"&&(tt.textContent=Ut),yt.forEach(n),$t.forEach(n),ct=s(t),c(X.$$.fragment,t),ft=s(t),nt=o(t,"P",{}),F(nt).forEach(n),this.h()},h(){q(m,"name","hf:doc:metadata"),q(m,"content",ee),q(N,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),q(u,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),q(O,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),q(p,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),q(j,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),q(M,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(t,e){i(document.head,m),r(t,J,e),r(t,x,e),r(t,g,e),f(v,t,e),r(t,d,e),r(t,T,e),r(t,it,e),f(z,t,e),r(t,rt,e),r(t,u,e),f(B,u,null),i(u,gt),i(u,Y),i(u,vt),i(u,Z),i(u,wt),f(C,u,null),i(u,Lt),i(u,N),f(I,N,null),i(N,Tt),i(N,V),r(t,lt,e),f(P,t,e),r(t,ot,e),r(t,k,e),r(t,dt,e),r(t,E,e),r(t,mt,e),f(R,t,e),r(t,pt,e),r(t,p,e),f(W,p,null),i(p,xt),i(p,H),i(p,Mt),i(p,A),i(p,Ct),i(p,S),i(p,Nt),i(p,K),i(p,jt),i(p,O),f(Q,O,null),r(t,ut,e),f(U,t,e),r(t,bt,e),r(t,M,e),f(D,M,null),i(M,Ft),i(M,j),f(G,j,null),i(j,qt),i(j,tt),r(t,ct,e),f(X,t,e),r(t,ft,e),r(t,nt,e),ht=!0},p(t,[e]){const y={};e&2&&(y.$$scope={dirty:e,ctx:t}),C.$set(y)},i(t){ht\|\|(h(v.$$.fragment,t),h(z.$$.fragment,t),h(B.$$.fragment,t),h(C.$$.fragment,t),h(I.$$.fragment,t),h(P.$$.fragment,t),h(R.$$.fragment,t),h(W.$$.fragment,t),h(Q.$$.fragment,t),h(U.$$.fragment,t),h(D.$$.fragment,t),h(G.$$.fragment,t),h(X.$$.fragment,t),ht=!0)},o(t){_(v.$$.fragment,t),_(z.$$.fragment,t),_(B.$$.fragment,t),_(C.$$.fragment,t),_(I.$$.fragment,t),_(P.$$.fragment,t),_(R.$$.fragment,t),_(W.$$.fragment,t),_(Q.$$.fragment,t),_(U.$$.fragment,t),_(D.$$.fragment,t),_(G.$$.fragment,t),_(X.$$.fragment,t),ht=!1},d(t){t&&(n(J),n(x),n(g),n(d),n(T),n(it),n(rt),n(u),n(lt),n(ot),n(k),n(dt),n(E),n(mt),n(pt),n(p),n(ut),n(bt),n(M),n(ct),n(ft),n(nt)),n(m),$(v,t),$(z,t),$(B),$(C),$(I),$(P,t),$(R,t),$(W),$(Q),$(U,t),$(D),$(G),$(X,t)}}}const ee='{"title":"4-bit quantization","local":"4-bit-quantization","sections":[{"title":"Linear4bit","local":"bitsandbytes.nn.Linear4bit","sections":[],"depth":2},{"title":"LinearFP4","local":"linearfp4","sections":[],"depth":2},{"title":"LinearNF4","local":"bitsandbytes.nn.LinearNF4","sections":[],"depth":2},{"title":"Params4bit","local":"bitsandbytes.nn.Params4bit","sections":[],"depth":2}],"depth":1}';function ne(st){return Xt(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class de extends Zt{constructor(m){super(),Vt(this,m,ne,te,Gt,{})}}export{de as component};

Xet Storage Details

Size:: 14.4 kB
Xet hash:: 4ddf4abbfe760bfe50cbb14b3d21ac696f83537175d5eaa0127acfff5be58095

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.