Buckets:

download
raw
16.1 kB
import{s as kt,n as qt,o as Rt}from"../chunks/scheduler.852ec091.js";import{S as Bt,i as Jt,g as b,s as i,r as m,A as Kt,h as f,f as a,c as l,j as g,u as r,x as jt,k as v,y,a as n,v as s,d as o,t as p,w as d}from"../chunks/index.28275fd3.js";import{D as $}from"../chunks/Docstring.ee6c313e.js";import{H as J,E as Qt}from"../chunks/EditOnGithub.582011f0.js";function Wt(zt){let _,et,tt,nt,E,it,P,Dt='<a href="https://hf.co/papers/2409.03137" rel="nofollow">AdEMAMix</a> is a variant of the <code>Adam</code> optimizer.',lt,w,Vt="bitsandbytes also supports paged optimizers which take advantage of CUDAs unified memory to transfer memory from the GPU to the CPU when GPU memory is exhausted.",mt,O,rt,h,I,wt,K,N,st,T,ot,c,L,Ot,Q,z,pt,D,dt,u,V,It,W,C,bt,F,ft,A,U,Nt,X,H,gt,G,vt,M,S,Tt,Y,j,yt,k,$t,x,q,Lt,Z,R,_t,B,ht,at,ct;return E=new J({props:{title:"AdEMAMix",local:"ademamix",headingTag:"h1"}}),O=new J({props:{title:"AdEMAMix",local:"api-class ][ bitsandbytes.optim.AdEMAMix",headingTag:"h2"}}),I=new $({props:{name:"class bitsandbytes.optim.AdEMAMix",anchor:"bitsandbytes.optim.AdEMAMix",parameters:[{name:"params",val:": typing.Iterable[torch.nn.parameter.Parameter]"},{name:"lr",val:": float = 0.001"},{name:"betas",val:": typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999)"},{name:"alpha",val:": float = 5.0"},{name:"t_alpha",val:": typing.Optional[int] = None"},{name:"t_beta3",val:": typing.Optional[int] = None"},{name:"eps",val:": float = 1e-08"},{name:"weight_decay",val:": float = 0.01"},{name:"optim_bits",val:": typing.Literal[8, 32] = 32"},{name:"min_8bit_size",val:": int = 4096"},{name:"is_paged",val:": bool = False"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1532/bitsandbytes/optim/ademamix.py#L106"}}),N=new $({props:{name:"__init__",anchor:"bitsandbytes.optim.AdEMAMix.__init__",parameters:[{name:"params",val:": typing.Iterable[torch.nn.parameter.Parameter]"},{name:"lr",val:": float = 0.001"},{name:"betas",val:": typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999)"},{name:"alpha",val:": float = 5.0"},{name:"t_alpha",val:": typing.Optional[int] = None"},{name:"t_beta3",val:": typing.Optional[int] = None"},{name:"eps",val:": float = 1e-08"},{name:"weight_decay",val:": float = 0.01"},{name:"optim_bits",val:": typing.Literal[8, 32] = 32"},{name:"min_8bit_size",val:": int = 4096"},{name:"is_paged",val:": bool = False"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1532/bitsandbytes/optim/ademamix.py#L107"}}),T=new J({props:{title:"AdEMAMix8bit",local:"bitsandbytes.optim.AdEMAMix8bit",headingTag:"h2"}}),L=new $({props:{name:"class bitsandbytes.optim.AdEMAMix8bit",anchor:"bitsandbytes.optim.AdEMAMix8bit",parameters:[{name:"params",val:": typing.Iterable[torch.nn.parameter.Parameter]"},{name:"lr",val:": float = 0.001"},{name:"betas",val:": typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999)"},{name:"alpha",val:": float = 5.0"},{name:"t_alpha",val:": typing.Optional[int] = None"},{name:"t_beta3",val:": typing.Optional[int] = None"},{name:"eps",val:": float = 1e-08"},{name:"weight_decay",val:": float = 0.01"},{name:"min_8bit_size",val:": int = 4096"},{name:"is_paged",val:": bool = False"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1532/bitsandbytes/optim/ademamix.py#L272"}}),z=new $({props:{name:"__init__",anchor:"bitsandbytes.optim.AdEMAMix8bit.__init__",parameters:[{name:"params",val:": typing.Iterable[torch.nn.parameter.Parameter]"},{name:"lr",val:": float = 0.001"},{name:"betas",val:": typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999)"},{name:"alpha",val:": float = 5.0"},{name:"t_alpha",val:": typing.Optional[int] = None"},{name:"t_beta3",val:": typing.Optional[int] = None"},{name:"eps",val:": float = 1e-08"},{name:"weight_decay",val:": float = 0.01"},{name:"min_8bit_size",val:": int = 4096"},{name:"is_paged",val:": bool = False"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1532/bitsandbytes/optim/ademamix.py#L273"}}),D=new J({props:{title:"AdEMAMix32bit",local:"bitsandbytes.optim.AdEMAMix32bit",headingTag:"h2"}}),V=new $({props:{name:"class bitsandbytes.optim.AdEMAMix32bit",anchor:"bitsandbytes.optim.AdEMAMix32bit",parameters:[{name:"params",val:": typing.Iterable[torch.nn.parameter.Parameter]"},{name:"lr",val:": float = 0.001"},{name:"betas",val:": typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999)"},{name:"alpha",val:": float = 5.0"},{name:"t_alpha",val:": typing.Optional[int] = None"},{name:"t_beta3",val:": typing.Optional[int] = None"},{name:"eps",val:": float = 1e-08"},{name:"weight_decay",val:": float = 0.01"},{name:"min_8bit_size",val:": int = 4096"},{name:"is_paged",val:": bool = False"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1532/bitsandbytes/optim/ademamix.py#L357"}}),C=new $({props:{name:"__init__",anchor:"bitsandbytes.optim.AdEMAMix32bit.__init__",parameters:[{name:"params",val:": typing.Iterable[torch.nn.parameter.Parameter]"},{name:"lr",val:": float = 0.001"},{name:"betas",val:": typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999)"},{name:"alpha",val:": float = 5.0"},{name:"t_alpha",val:": typing.Optional[int] = None"},{name:"t_beta3",val:": typing.Optional[int] = None"},{name:"eps",val:": float = 1e-08"},{name:"weight_decay",val:": float = 0.01"},{name:"min_8bit_size",val:": int = 4096"},{name:"is_paged",val:": bool = False"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1532/bitsandbytes/optim/ademamix.py#L358"}}),F=new J({props:{title:"PagedAdEMAMix",local:"bitsandbytes.optim.PagedAdEMAMix",headingTag:"h2"}}),U=new $({props:{name:"class bitsandbytes.optim.PagedAdEMAMix",anchor:"bitsandbytes.optim.PagedAdEMAMix",parameters:[{name:"params",val:": typing.Iterable[torch.nn.parameter.Parameter]"},{name:"lr",val:": float = 0.001"},{name:"betas",val:": typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999)"},{name:"alpha",val:": float = 5.0"},{name:"t_alpha",val:": typing.Optional[int] = None"},{name:"t_beta3",val:": typing.Optional[int] = None"},{name:"eps",val:": float = 1e-08"},{name:"weight_decay",val:": float = 0.01"},{name:"optim_bits",val:": typing.Literal[8, 32] = 32"},{name:"min_8bit_size",val:": int = 4096"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1532/bitsandbytes/optim/ademamix.py#L328"}}),H=new $({props:{name:"__init__",anchor:"bitsandbytes.optim.PagedAdEMAMix.__init__",parameters:[{name:"params",val:": typing.Iterable[torch.nn.parameter.Parameter]"},{name:"lr",val:": float = 0.001"},{name:"betas",val:": typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999)"},{name:"alpha",val:": float = 5.0"},{name:"t_alpha",val:": typing.Optional[int] = None"},{name:"t_beta3",val:": typing.Optional[int] = None"},{name:"eps",val:": float = 1e-08"},{name:"weight_decay",val:": float = 0.01"},{name:"optim_bits",val:": typing.Literal[8, 32] = 32"},{name:"min_8bit_size",val:": int = 4096"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1532/bitsandbytes/optim/ademamix.py#L329"}}),G=new J({props:{title:"PagedAdEMAMix8bit",local:"bitsandbytes.optim.PagedAdEMAMix8bit",headingTag:"h2"}}),S=new $({props:{name:"class bitsandbytes.optim.PagedAdEMAMix8bit",anchor:"bitsandbytes.optim.PagedAdEMAMix8bit",parameters:[{name:"params",val:": typing.Iterable[torch.nn.parameter.Parameter]"},{name:"lr",val:": float = 0.001"},{name:"betas",val:": typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999)"},{name:"alpha",val:": float = 5.0"},{name:"t_alpha",val:": typing.Optional[int] = None"},{name:"t_beta3",val:": typing.Optional[int] = None"},{name:"eps",val:": float = 1e-08"},{name:"weight_decay",val:": float = 0.01"},{name:"min_8bit_size",val:": int = 4096"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1532/bitsandbytes/optim/ademamix.py#L301"}}),j=new $({props:{name:"__init__",anchor:"bitsandbytes.optim.PagedAdEMAMix8bit.__init__",parameters:[{name:"params",val:": typing.Iterable[torch.nn.parameter.Parameter]"},{name:"lr",val:": float = 0.001"},{name:"betas",val:": typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999)"},{name:"alpha",val:": float = 5.0"},{name:"t_alpha",val:": typing.Optional[int] = None"},{name:"t_beta3",val:": typing.Optional[int] = None"},{name:"eps",val:": float = 1e-08"},{name:"weight_decay",val:": float = 0.01"},{name:"min_8bit_size",val:": int = 4096"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1532/bitsandbytes/optim/ademamix.py#L302"}}),k=new J({props:{title:"PagedAdEMAMix32bit",local:"bitsandbytes.optim.PagedAdEMAMix32bit",headingTag:"h2"}}),q=new $({props:{name:"class bitsandbytes.optim.PagedAdEMAMix32bit",anchor:"bitsandbytes.optim.PagedAdEMAMix32bit",parameters:[{name:"params",val:": typing.Iterable[torch.nn.parameter.Parameter]"},{name:"lr",val:": float = 0.001"},{name:"betas",val:": typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999)"},{name:"alpha",val:": float = 5.0"},{name:"t_alpha",val:": typing.Optional[int] = None"},{name:"t_beta3",val:": typing.Optional[int] = None"},{name:"eps",val:": float = 1e-08"},{name:"weight_decay",val:": float = 0.01"},{name:"min_8bit_size",val:": int = 4096"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1532/bitsandbytes/optim/ademamix.py#L390"}}),R=new $({props:{name:"__init__",anchor:"bitsandbytes.optim.PagedAdEMAMix32bit.__init__",parameters:[{name:"params",val:": typing.Iterable[torch.nn.parameter.Parameter]"},{name:"lr",val:": float = 0.001"},{name:"betas",val:": typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999)"},{name:"alpha",val:": float = 5.0"},{name:"t_alpha",val:": typing.Optional[int] = None"},{name:"t_beta3",val:": typing.Optional[int] = None"},{name:"eps",val:": float = 1e-08"},{name:"weight_decay",val:": float = 0.01"},{name:"min_8bit_size",val:": int = 4096"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1532/bitsandbytes/optim/ademamix.py#L391"}}),B=new Qt({props:{source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/main/docs/source/reference/optim/ademamix.mdx"}}),{c(){_=b("meta"),et=i(),tt=b("p"),nt=i(),m(E.$$.fragment),it=i(),P=b("p"),P.innerHTML=Dt,lt=i(),w=b("p"),w.textContent=Vt,mt=i(),m(O.$$.fragment),rt=i(),h=b("div"),m(I.$$.fragment),wt=i(),K=b("div"),m(N.$$.fragment),st=i(),m(T.$$.fragment),ot=i(),c=b("div"),m(L.$$.fragment),Ot=i(),Q=b("div"),m(z.$$.fragment),pt=i(),m(D.$$.fragment),dt=i(),u=b("div"),m(V.$$.fragment),It=i(),W=b("div"),m(C.$$.fragment),bt=i(),m(F.$$.fragment),ft=i(),A=b("div"),m(U.$$.fragment),Nt=i(),X=b("div"),m(H.$$.fragment),gt=i(),m(G.$$.fragment),vt=i(),M=b("div"),m(S.$$.fragment),Tt=i(),Y=b("div"),m(j.$$.fragment),yt=i(),m(k.$$.fragment),$t=i(),x=b("div"),m(q.$$.fragment),Lt=i(),Z=b("div"),m(R.$$.fragment),_t=i(),m(B.$$.fragment),ht=i(),at=b("p"),this.h()},l(t){const e=Kt("svelte-u9bgzb",document.head);_=f(e,"META",{name:!0,content:!0}),e.forEach(a),et=l(t),tt=f(t,"P",{}),g(tt).forEach(a),nt=l(t),r(E.$$.fragment,t),it=l(t),P=f(t,"P",{"data-svelte-h":!0}),jt(P)!=="svelte-1holr5j"&&(P.innerHTML=Dt),lt=l(t),w=f(t,"P",{"data-svelte-h":!0}),jt(w)!=="svelte-qpasov"&&(w.textContent=Vt),mt=l(t),r(O.$$.fragment,t),rt=l(t),h=f(t,"DIV",{class:!0});var ut=g(h);r(I.$$.fragment,ut),wt=l(ut),K=f(ut,"DIV",{class:!0});var Ct=g(K);r(N.$$.fragment,Ct),Ct.forEach(a),ut.forEach(a),st=l(t),r(T.$$.fragment,t),ot=l(t),c=f(t,"DIV",{class:!0});var At=g(c);r(L.$$.fragment,At),Ot=l(At),Q=f(At,"DIV",{class:!0});var Ft=g(Q);r(z.$$.fragment,Ft),Ft.forEach(a),At.forEach(a),pt=l(t),r(D.$$.fragment,t),dt=l(t),u=f(t,"DIV",{class:!0});var Mt=g(u);r(V.$$.fragment,Mt),It=l(Mt),W=f(Mt,"DIV",{class:!0});var Ut=g(W);r(C.$$.fragment,Ut),Ut.forEach(a),Mt.forEach(a),bt=l(t),r(F.$$.fragment,t),ft=l(t),A=f(t,"DIV",{class:!0});var xt=g(A);r(U.$$.fragment,xt),Nt=l(xt),X=f(xt,"DIV",{class:!0});var Ht=g(X);r(H.$$.fragment,Ht),Ht.forEach(a),xt.forEach(a),gt=l(t),r(G.$$.fragment,t),vt=l(t),M=f(t,"DIV",{class:!0});var Et=g(M);r(S.$$.fragment,Et),Tt=l(Et),Y=f(Et,"DIV",{class:!0});var Gt=g(Y);r(j.$$.fragment,Gt),Gt.forEach(a),Et.forEach(a),yt=l(t),r(k.$$.fragment,t),$t=l(t),x=f(t,"DIV",{class:!0});var Pt=g(x);r(q.$$.fragment,Pt),Lt=l(Pt),Z=f(Pt,"DIV",{class:!0});var St=g(Z);r(R.$$.fragment,St),St.forEach(a),Pt.forEach(a),_t=l(t),r(B.$$.fragment,t),ht=l(t),at=f(t,"P",{}),g(at).forEach(a),this.h()},h(){v(_,"name","hf:doc:metadata"),v(_,"content",Xt),v(K,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),v(h,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),v(Q,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),v(c,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),v(W,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),v(u,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),v(X,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),v(A,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),v(Y,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),v(M,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),v(Z,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),v(x,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(t,e){y(document.head,_),n(t,et,e),n(t,tt,e),n(t,nt,e),s(E,t,e),n(t,it,e),n(t,P,e),n(t,lt,e),n(t,w,e),n(t,mt,e),s(O,t,e),n(t,rt,e),n(t,h,e),s(I,h,null),y(h,wt),y(h,K),s(N,K,null),n(t,st,e),s(T,t,e),n(t,ot,e),n(t,c,e),s(L,c,null),y(c,Ot),y(c,Q),s(z,Q,null),n(t,pt,e),s(D,t,e),n(t,dt,e),n(t,u,e),s(V,u,null),y(u,It),y(u,W),s(C,W,null),n(t,bt,e),s(F,t,e),n(t,ft,e),n(t,A,e),s(U,A,null),y(A,Nt),y(A,X),s(H,X,null),n(t,gt,e),s(G,t,e),n(t,vt,e),n(t,M,e),s(S,M,null),y(M,Tt),y(M,Y),s(j,Y,null),n(t,yt,e),s(k,t,e),n(t,$t,e),n(t,x,e),s(q,x,null),y(x,Lt),y(x,Z),s(R,Z,null),n(t,_t,e),s(B,t,e),n(t,ht,e),n(t,at,e),ct=!0},p:qt,i(t){ct||(o(E.$$.fragment,t),o(O.$$.fragment,t),o(I.$$.fragment,t),o(N.$$.fragment,t),o(T.$$.fragment,t),o(L.$$.fragment,t),o(z.$$.fragment,t),o(D.$$.fragment,t),o(V.$$.fragment,t),o(C.$$.fragment,t),o(F.$$.fragment,t),o(U.$$.fragment,t),o(H.$$.fragment,t),o(G.$$.fragment,t),o(S.$$.fragment,t),o(j.$$.fragment,t),o(k.$$.fragment,t),o(q.$$.fragment,t),o(R.$$.fragment,t),o(B.$$.fragment,t),ct=!0)},o(t){p(E.$$.fragment,t),p(O.$$.fragment,t),p(I.$$.fragment,t),p(N.$$.fragment,t),p(T.$$.fragment,t),p(L.$$.fragment,t),p(z.$$.fragment,t),p(D.$$.fragment,t),p(V.$$.fragment,t),p(C.$$.fragment,t),p(F.$$.fragment,t),p(U.$$.fragment,t),p(H.$$.fragment,t),p(G.$$.fragment,t),p(S.$$.fragment,t),p(j.$$.fragment,t),p(k.$$.fragment,t),p(q.$$.fragment,t),p(R.$$.fragment,t),p(B.$$.fragment,t),ct=!1},d(t){t&&(a(et),a(tt),a(nt),a(it),a(P),a(lt),a(w),a(mt),a(rt),a(h),a(st),a(ot),a(c),a(pt),a(dt),a(u),a(bt),a(ft),a(A),a(gt),a(vt),a(M),a(yt),a($t),a(x),a(_t),a(ht),a(at)),a(_),d(E,t),d(O,t),d(I),d(N),d(T,t),d(L),d(z),d(D,t),d(V),d(C),d(F,t),d(U),d(H),d(G,t),d(S),d(j),d(k,t),d(q),d(R),d(B,t)}}}const Xt='{"title":"AdEMAMix","local":"ademamix","sections":[{"title":"AdEMAMix","local":"api-class ][ bitsandbytes.optim.AdEMAMix","sections":[],"depth":2},{"title":"AdEMAMix8bit","local":"bitsandbytes.optim.AdEMAMix8bit","sections":[],"depth":2},{"title":"AdEMAMix32bit","local":"bitsandbytes.optim.AdEMAMix32bit","sections":[],"depth":2},{"title":"PagedAdEMAMix","local":"bitsandbytes.optim.PagedAdEMAMix","sections":[],"depth":2},{"title":"PagedAdEMAMix8bit","local":"bitsandbytes.optim.PagedAdEMAMix8bit","sections":[],"depth":2},{"title":"PagedAdEMAMix32bit","local":"bitsandbytes.optim.PagedAdEMAMix32bit","sections":[],"depth":2}],"depth":1}';function Yt(zt){return Rt(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class na extends Bt{constructor(_){super(),Jt(this,_,Yt,Wt,kt,{})}}export{na as component};

Xet Storage Details

Size:
16.1 kB
·
Xet hash:
d6f2069ce6564df0abb0e8ad9071132bcd9710cbf0d7d4986d6cfe11b42b5c98

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.