Buckets:

hf-doc-build
/

doc-dev

hf-doc-build/doc-dev / bitsandbytes /pr_1137 /en /_app /immutable /nodes /27.2f0b01ee.js

15.6 kB

	import{s as Dt,n as St,o as Gt}from"../chunks/scheduler.852ec091.js";import{S as xt,i as wt,g as s,s as a,r as d,A as zt,h as r,f as e,c as o,j as D,u as l,x as M,k as S,y as m,a as i,v as p,d as c,t as b,w as _}from"../chunks/index.28275fd3.js";import{D as V}from"../chunks/Docstring.ee6c313e.js";import{H as dt,E as Tt}from"../chunks/EditOnGithub.582011f0.js";function kt(ut){let g,B,W,O,G,R,x,ht="Stochastic gradient descent (SGD) is a basic gradient descent optimizer to minimize loss given a set of model parameters and updates the parameters in the opposite direction of the gradient. The update is performed on a randomly sampled mini-batch of data from the dataset.",U,w,ft="bitsandbytes also supports momentum and Nesterov momentum to accelerate SGD by adding a weighted average of past gradients to the current gradient.",J,z,K,u,T,lt,v,k,pt,L,vt="Base SGD optimizer.",Q,C,X,h,N,ct,y,E,bt,q,yt="8-bit SGD optimizer.",Y,P,Z,f,F,_t,$,A,gt,I,$t="32-bit SGD optimizer.",tt,j,et,H,nt;return G=new dt({props:{title:"SGD",local:"sgd",headingTag:"h1"}}),z=new dt({props:{title:"SGD",local:"api-class ][ bitsandbytes.optim.SGD",headingTag:"h2"}}),T=new V({props:{name:"class bitsandbytes.optim.SGD",anchor:"bitsandbytes.optim.SGD",parameters:[{name:"params",val:""},{name:"lr",val:""},{name:"momentum",val:" = 0"},{name:"dampening",val:" = 0"},{name:"weight_decay",val:" = 0"},{name:"nesterov",val:" = False"},{name:"optim_bits",val:" = 32"},{name:"args",val:" = None"},{name:"min_8bit_size",val:" = 4096"},{name:"percentile_clipping",val:" = 100"},{name:"block_wise",val:" = True"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1137/bitsandbytes/optim/sgd.py#L8"}}),k=new V({props:{name:"__init__",anchor:"bitsandbytes.optim.SGD.__init__",parameters:[{name:"params",val:""},{name:"lr",val:""},{name:"momentum",val:" = 0"},{name:"dampening",val:" = 0"},{name:"weight_decay",val:" = 0"},{name:"nesterov",val:" = False"},{name:"optim_bits",val:" = 32"},{name:"args",val:" = None"},{name:"min_8bit_size",val:" = 4096"},{name:"percentile_clipping",val:" = 100"},{name:"block_wise",val:" = True"}],parametersDescription:[{anchor:"bitsandbytes.optim.SGD.__init__.params",description:`<strong>params</strong> (<code>torch.tensor</code>) —
	The input parameters to optimize.`,name:"params"},{anchor:"bitsandbytes.optim.SGD.__init__.lr",description:`<strong>lr</strong> (<code>float</code>) —
	The learning rate.`,name:"lr"},{anchor:"bitsandbytes.optim.SGD.__init__.momentum",description:`<strong>momentum</strong> (<code>float</code>, defaults to 0) —
	The momentum value speeds up the optimizer by taking bigger steps.`,name:"momentum"},{anchor:"bitsandbytes.optim.SGD.__init__.dampening",description:`<strong>dampening</strong> (<code>float</code>, defaults to 0) —
	The dampening value reduces the momentum of the optimizer.`,name:"dampening"},{anchor:"bitsandbytes.optim.SGD.__init__.weight_decay",description:`<strong>weight_decay</strong> (<code>float</code>, defaults to 0.0) —
	The weight decay value for the optimizer.`,name:"weight_decay"},{anchor:"bitsandbytes.optim.SGD.__init__.nesterov",description:`<strong>nesterov</strong> (<code>bool</code>, defaults to <code>False</code>) —
	Whether to use Nesterov momentum.`,name:"nesterov"},{anchor:"bitsandbytes.optim.SGD.__init__.optim_bits",description:`<strong>optim_bits</strong> (<code>int</code>, defaults to 32) —
	The number of bits of the optimizer state.`,name:"optim_bits"},{anchor:"bitsandbytes.optim.SGD.__init__.args",description:`<strong>args</strong> (<code>object</code>, defaults to <code>None</code>) —
	An object with additional arguments.`,name:"args"},{anchor:"bitsandbytes.optim.SGD.__init__.min_8bit_size",description:`<strong>min_8bit_size</strong> (<code>int</code>, defaults to 4096) —
	The minimum number of elements of the parameter tensors for 8-bit optimization.`,name:"min_8bit_size"},{anchor:"bitsandbytes.optim.SGD.__init__.percentile_clipping",description:`<strong>percentile_clipping</strong> (<code>int</code>, defaults to 100) —
	Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.`,name:"percentile_clipping"},{anchor:"bitsandbytes.optim.SGD.__init__.block_wise",description:`<strong>block_wise</strong> (<code>bool</code>, defaults to <code>True</code>) —
	Whether to independently quantize each block of tensors to reduce outlier effects and improve stability.`,name:"block_wise"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1137/bitsandbytes/optim/sgd.py#L9"}}),C=new dt({props:{title:"SGD8bit",local:"bitsandbytes.optim.SGD8bit",headingTag:"h2"}}),N=new V({props:{name:"class bitsandbytes.optim.SGD8bit",anchor:"bitsandbytes.optim.SGD8bit",parameters:[{name:"params",val:""},{name:"lr",val:""},{name:"momentum",val:" = 0"},{name:"dampening",val:" = 0"},{name:"weight_decay",val:" = 0"},{name:"nesterov",val:" = False"},{name:"args",val:" = None"},{name:"min_8bit_size",val:" = 4096"},{name:"percentile_clipping",val:" = 100"},{name:"block_wise",val:" = True"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1137/bitsandbytes/optim/sgd.py#L67"}}),E=new V({props:{name:"__init__",anchor:"bitsandbytes.optim.SGD8bit.__init__",parameters:[{name:"params",val:""},{name:"lr",val:""},{name:"momentum",val:" = 0"},{name:"dampening",val:" = 0"},{name:"weight_decay",val:" = 0"},{name:"nesterov",val:" = False"},{name:"args",val:" = None"},{name:"min_8bit_size",val:" = 4096"},{name:"percentile_clipping",val:" = 100"},{name:"block_wise",val:" = True"}],parametersDescription:[{anchor:"bitsandbytes.optim.SGD8bit.__init__.params",description:`<strong>params</strong> (<code>torch.tensor</code>) —
	The input parameters to optimize.`,name:"params"},{anchor:"bitsandbytes.optim.SGD8bit.__init__.lr",description:`<strong>lr</strong> (<code>float</code>) —
	The learning rate.`,name:"lr"},{anchor:"bitsandbytes.optim.SGD8bit.__init__.momentum",description:`<strong>momentum</strong> (<code>float</code>, defaults to 0) —
	The momentum value speeds up the optimizer by taking bigger steps.`,name:"momentum"},{anchor:"bitsandbytes.optim.SGD8bit.__init__.dampening",description:`<strong>dampening</strong> (<code>float</code>, defaults to 0) —
	The dampening value reduces the momentum of the optimizer.`,name:"dampening"},{anchor:"bitsandbytes.optim.SGD8bit.__init__.weight_decay",description:`<strong>weight_decay</strong> (<code>float</code>, defaults to 0.0) —
	The weight decay value for the optimizer.`,name:"weight_decay"},{anchor:"bitsandbytes.optim.SGD8bit.__init__.nesterov",description:`<strong>nesterov</strong> (<code>bool</code>, defaults to <code>False</code>) —
	Whether to use Nesterov momentum.`,name:"nesterov"},{anchor:"bitsandbytes.optim.SGD8bit.__init__.args",description:`<strong>args</strong> (<code>object</code>, defaults to <code>None</code>) —
	An object with additional arguments.`,name:"args"},{anchor:"bitsandbytes.optim.SGD8bit.__init__.min_8bit_size",description:`<strong>min_8bit_size</strong> (<code>int</code>, defaults to 4096) —
	The minimum number of elements of the parameter tensors for 8-bit optimization.`,name:"min_8bit_size"},{anchor:"bitsandbytes.optim.SGD8bit.__init__.percentile_clipping",description:`<strong>percentile_clipping</strong> (<code>int</code>, defaults to 100) —
	Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.`,name:"percentile_clipping"},{anchor:"bitsandbytes.optim.SGD8bit.__init__.block_wise",description:`<strong>block_wise</strong> (<code>bool</code>, defaults to <code>True</code>) —
	Whether to independently quantize each block of tensors to reduce outlier effects and improve stability.`,name:"block_wise"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1137/bitsandbytes/optim/sgd.py#L68"}}),P=new dt({props:{title:"SGD32bit",local:"bitsandbytes.optim.SGD32bit",headingTag:"h2"}}),F=new V({props:{name:"class bitsandbytes.optim.SGD32bit",anchor:"bitsandbytes.optim.SGD32bit",parameters:[{name:"params",val:""},{name:"lr",val:""},{name:"momentum",val:" = 0"},{name:"dampening",val:" = 0"},{name:"weight_decay",val:" = 0"},{name:"nesterov",val:" = False"},{name:"args",val:" = None"},{name:"min_8bit_size",val:" = 4096"},{name:"percentile_clipping",val:" = 100"},{name:"block_wise",val:" = True"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1137/bitsandbytes/optim/sgd.py#L123"}}),A=new V({props:{name:"__init__",anchor:"bitsandbytes.optim.SGD32bit.__init__",parameters:[{name:"params",val:""},{name:"lr",val:""},{name:"momentum",val:" = 0"},{name:"dampening",val:" = 0"},{name:"weight_decay",val:" = 0"},{name:"nesterov",val:" = False"},{name:"args",val:" = None"},{name:"min_8bit_size",val:" = 4096"},{name:"percentile_clipping",val:" = 100"},{name:"block_wise",val:" = True"}],parametersDescription:[{anchor:"bitsandbytes.optim.SGD32bit.__init__.params",description:`<strong>params</strong> (<code>torch.tensor</code>) —
	The input parameters to optimize.`,name:"params"},{anchor:"bitsandbytes.optim.SGD32bit.__init__.lr",description:`<strong>lr</strong> (<code>float</code>) —
	The learning rate.`,name:"lr"},{anchor:"bitsandbytes.optim.SGD32bit.__init__.momentum",description:`<strong>momentum</strong> (<code>float</code>, defaults to 0) —
	The momentum value speeds up the optimizer by taking bigger steps.`,name:"momentum"},{anchor:"bitsandbytes.optim.SGD32bit.__init__.dampening",description:`<strong>dampening</strong> (<code>float</code>, defaults to 0) —
	The dampening value reduces the momentum of the optimizer.`,name:"dampening"},{anchor:"bitsandbytes.optim.SGD32bit.__init__.weight_decay",description:`<strong>weight_decay</strong> (<code>float</code>, defaults to 0.0) —
	The weight decay value for the optimizer.`,name:"weight_decay"},{anchor:"bitsandbytes.optim.SGD32bit.__init__.nesterov",description:`<strong>nesterov</strong> (<code>bool</code>, defaults to <code>False</code>) —
	Whether to use Nesterov momentum.`,name:"nesterov"},{anchor:"bitsandbytes.optim.SGD32bit.__init__.args",description:`<strong>args</strong> (<code>object</code>, defaults to <code>None</code>) —
	An object with additional arguments.`,name:"args"},{anchor:"bitsandbytes.optim.SGD32bit.__init__.min_8bit_size",description:`<strong>min_8bit_size</strong> (<code>int</code>, defaults to 4096) —
	The minimum number of elements of the parameter tensors for 8-bit optimization.`,name:"min_8bit_size"},{anchor:"bitsandbytes.optim.SGD32bit.__init__.percentile_clipping",description:`<strong>percentile_clipping</strong> (<code>int</code>, defaults to 100) —
	Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.`,name:"percentile_clipping"},{anchor:"bitsandbytes.optim.SGD32bit.__init__.block_wise",description:`<strong>block_wise</strong> (<code>bool</code>, defaults to <code>True</code>) —
	Whether to independently quantize each block of tensors to reduce outlier effects and improve stability.`,name:"block_wise"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1137/bitsandbytes/optim/sgd.py#L124"}}),j=new Tt({props:{source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/main/docs/source/reference/optim/sgd.mdx"}}),{c(){g=s("meta"),B=a(),W=s("p"),O=a(),d(G.$$.fragment),R=a(),x=s("p"),x.textContent=ht,U=a(),w=s("p"),w.textContent=ft,J=a(),d(z.$$.fragment),K=a(),u=s("div"),d(T.$$.fragment),lt=a(),v=s("div"),d(k.$$.fragment),pt=a(),L=s("p"),L.textContent=vt,Q=a(),d(C.$$.fragment),X=a(),h=s("div"),d(N.$$.fragment),ct=a(),y=s("div"),d(E.$$.fragment),bt=a(),q=s("p"),q.textContent=yt,Y=a(),d(P.$$.fragment),Z=a(),f=s("div"),d(F.$$.fragment),_t=a(),$=s("div"),d(A.$$.fragment),gt=a(),I=s("p"),I.textContent=$t,tt=a(),d(j.$$.fragment),et=a(),H=s("p"),this.h()},l(t){const n=zt("svelte-u9bgzb",document.head);g=r(n,"META",{name:!0,content:!0}),n.forEach(e),B=o(t),W=r(t,"P",{}),D(W).forEach(e),O=o(t),l(G.$$.fragment,t),R=o(t),x=r(t,"P",{"data-svelte-h":!0}),M(x)!=="svelte-q53bao"&&(x.textContent=ht),U=o(t),w=r(t,"P",{"data-svelte-h":!0}),M(w)!=="svelte-xtlqke"&&(w.textContent=ft),J=o(t),l(z.$$.fragment,t),K=o(t),u=r(t,"DIV",{class:!0});var it=D(u);l(T.$$.fragment,it),lt=o(it),v=r(it,"DIV",{class:!0});var at=D(v);l(k.$$.fragment,at),pt=o(at),L=r(at,"P",{"data-svelte-h":!0}),M(L)!=="svelte-1r01lii"&&(L.textContent=vt),at.forEach(e),it.forEach(e),Q=o(t),l(C.$$.fragment,t),X=o(t),h=r(t,"DIV",{class:!0});var ot=D(h);l(N.$$.fragment,ot),ct=o(ot),y=r(ot,"DIV",{class:!0});var st=D(y);l(E.$$.fragment,st),bt=o(st),q=r(st,"P",{"data-svelte-h":!0}),M(q)!=="svelte-utr5h5"&&(q.textContent=yt),st.forEach(e),ot.forEach(e),Y=o(t),l(P.$$.fragment,t),Z=o(t),f=r(t,"DIV",{class:!0});var rt=D(f);l(F.$$.fragment,rt),_t=o(rt),$=r(rt,"DIV",{class:!0});var mt=D($);l(A.$$.fragment,mt),gt=o(mt),I=r(mt,"P",{"data-svelte-h":!0}),M(I)!=="svelte-wdls4c"&&(I.textContent=$t),mt.forEach(e),rt.forEach(e),tt=o(t),l(j.$$.fragment,t),et=o(t),H=r(t,"P",{}),D(H).forEach(e),this.h()},h(){S(g,"name","hf:doc:metadata"),S(g,"content",Ct),S(v,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),S(u,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),S(y,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),S(h,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),S($,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),S(f,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(t,n){m(document.head,g),i(t,B,n),i(t,W,n),i(t,O,n),p(G,t,n),i(t,R,n),i(t,x,n),i(t,U,n),i(t,w,n),i(t,J,n),p(z,t,n),i(t,K,n),i(t,u,n),p(T,u,null),m(u,lt),m(u,v),p(k,v,null),m(v,pt),m(v,L),i(t,Q,n),p(C,t,n),i(t,X,n),i(t,h,n),p(N,h,null),m(h,ct),m(h,y),p(E,y,null),m(y,bt),m(y,q),i(t,Y,n),p(P,t,n),i(t,Z,n),i(t,f,n),p(F,f,null),m(f,_t),m(f,$),p(A,$,null),m($,gt),m($,I),i(t,tt,n),p(j,t,n),i(t,et,n),i(t,H,n),nt=!0},p:St,i(t){nt\|\|(c(G.$$.fragment,t),c(z.$$.fragment,t),c(T.$$.fragment,t),c(k.$$.fragment,t),c(C.$$.fragment,t),c(N.$$.fragment,t),c(E.$$.fragment,t),c(P.$$.fragment,t),c(F.$$.fragment,t),c(A.$$.fragment,t),c(j.$$.fragment,t),nt=!0)},o(t){b(G.$$.fragment,t),b(z.$$.fragment,t),b(T.$$.fragment,t),b(k.$$.fragment,t),b(C.$$.fragment,t),b(N.$$.fragment,t),b(E.$$.fragment,t),b(P.$$.fragment,t),b(F.$$.fragment,t),b(A.$$.fragment,t),b(j.$$.fragment,t),nt=!1},d(t){t&&(e(B),e(W),e(O),e(R),e(x),e(U),e(w),e(J),e(K),e(u),e(Q),e(X),e(h),e(Y),e(Z),e(f),e(tt),e(et),e(H)),e(g),_(G,t),_(z,t),_(T),_(k),_(C,t),_(N),_(E),_(P,t),_(F),_(A),_(j,t)}}}const Ct='{"title":"SGD","local":"sgd","sections":[{"title":"SGD","local":"api-class ][ bitsandbytes.optim.SGD","sections":[],"depth":2},{"title":"SGD8bit","local":"bitsandbytes.optim.SGD8bit","sections":[],"depth":2},{"title":"SGD32bit","local":"bitsandbytes.optim.SGD32bit","sections":[],"depth":2}],"depth":1}';function Nt(ut){return Gt(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class jt extends xt{constructor(g){super(),wt(this,g,Nt,kt,Dt,{})}}export{jt as component};

Xet Storage Details

Size:: 15.6 kB
Xet hash:: 3a5dedd5f891de53e2ce98c10fce708bb6f1347b633f3744e1f5169aa5ec9887

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.