
    rhTQ              !       r   d dl mZmZmZ d dlZd dlmZ ddlmZmZm	Z	m
Z
mZmZmZmZmZmZmZmZmZ ddgZ G d de      Zd	d
e de
 de de	 d	z   e_        	 	 	 	 	 	 	 d$dee   dee   dee   dee   dee   dee   dee   dedee   dededededededef dZd Zdee   dee   dee   dee   dee   dee   dedededededededefd Zdee   dee   dee   dee   dee   dee   dedededededededefd!Zdee   dee   dee   dee   dee   dee   dedededededededed"dfd#Zy)%    )castOptionalUnionN)Tensor   )_default_to_fused_or_foreach_device_dtype_check_for_fused_differentiable_doc_foreach_doc_get_scalar_dtype
_get_value_maximize_doc_params_doc
_to_scalar_use_grad_for_differentiable_view_as_real	OptimizerParamsTAdagradadagradc                        e Zd Z	 	 	 	 	 	 ddddddedeeef   dededed	ed
ee   dededee   f fdZ	 fdZ
d Zd Zedd       Z xZS )r   NF)maximizedifferentiablefusedparamslrlr_decayweight_decayinitial_accumulator_valueepsforeachr   r   r   c                F   t        |t              r|j                         dk7  rt        d      d|k  st        d|       d|k  st        d|       d|k  st        d|       d|k  st        d|       d|k  st        d|       t	        ||||||||	|
		      }t
        |   ||       |
r!|	rt        d
      |rt        d      d| _        | j                  D ]  }|d   D ]  }| j                  |   }|d   r/t        j                  dt        |d         |j                        nt        j                  dt                     |d<   t        j                   |      rt#        ||      n|}t        j$                  ||t        j&                        |d<     y )Nr   zTensor lr must be 1-elementg        zInvalid learning rate: zInvalid lr_decay value: zInvalid weight_decay value: z)Invalid initial_accumulator_value value: zInvalid epsilon value: )	r   r   r    r   r   r!   r   r   r   z)`fused` does not support `differentiable`z0`fused` and `foreach` cannot be `True` together.Tr   r    is_fused)dtypedevicer&   step)memory_formatsum)
isinstancer   numel
ValueErrordictsuper__init__RuntimeError"_need_device_dtype_check_for_fusedparam_groupsstatetorchzerosr   r'   tensor
is_complexcomplex	full_likepreserve_format)selfr   r   r   r   r   r    r!   r   r   r   defaultsgrouppr5   
init_value	__class__s                   f/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/torch/optim/adagrad.pyr1   zAdagrad.__init__   s    b&!bhhjAo:;;by6rd;<<h7zBCCl";L>JKK//;<U;VW  cz6se<==%&?)

 	*"#NOO"#UVV6:D3&& 	E8_ 

1 W~ KK/wH xx c1B1DE f ''* 57PQ2 
  %z1F1F e!	    c                    t         |   |       d }| j                  D ]J  }|j                  dd        |j                  dd       |j                  dd       |j                  dd       }L t	        | j
                  j                               }t        |      dk7  xr t        j                  |d   d         }|s8|D ]2  }t        j                  t        |d         t        |      	      |d<   4 y y )
Nr!   r   Fr   r   r   r)   r$   r(   )r0   __setstate__r4   
setdefaultlistr5   valueslenr6   	is_tensorr8   floatr   )r=   r5   r   r?   state_valuesstep_is_tensorsrB   s          rC   rF   zAdagrad.__setstate__b   s    U# && 	4EY-Z/-u5$$Wd3E		4 DJJ--/0l+q0 
eooOF#7
 ! !LL!F)$,=u,M&	 rD   c                 ~    | j                   D ].  }|d   D ]$  }| j                  |   }|d   j                          & 0 y )Nr   r+   )r4   r5   share_memory_)r=   r?   r@   r5   s       rC   share_memoryzAdagrad.share_memoryw   sG    && 	-E8_ -

1e**,-	-rD   c                    d\  }}|d   D ]  }|j                   |d   r!t        | dd      rt        |d       d| _        ||j                   j                  z  }|t        j                  |      z  }|j                  |       |j                  |j                          | j                  |   }	|j                  |	d          |j                  |	d	           ||fS )
N)FFr   r   r3   T)cuda_unsupportedFr+   r)   )	gradgetattrr	   r3   	is_sparser6   r9   appendr5   )
r=   r?   params_with_gradgrads
state_sumsstate_stepshas_sparse_gradhas_complexr@   r5   s
             rC   _init_groupzAdagrad._init_group}   s    '3$x 	2Avv!>g8'
 2!dK>CD;166#3#33u//22 ''*QVV$

1!!%,/""5=1	2" ++rD   c                 b   d}|$t        j                         5   |       }ddd       | j                  D ]k  }g }g }g }g }| j                  |||||      \  }}	t	        |||||d   |d   |d   |d   ||d   |d   |d   |	|d	   t        | d
d      t        | dd             m |S # 1 sw Y   xY w)zPerform a single optimization step.

        Args:
            closure (Callable, optional): A closure that reevaluates the model
                and returns the loss.
        Nr   r   r   r    r!   r   r   r   
grad_scale	found_inf)r   r   r   r    r]   r!   r   r   r^   r   ra   rb   )r6   enable_gradr4   r_   r   rV   )
r=   closurelossr?   rY   rZ   r[   r\   r]   r^   s
             rC   r)   zAdagrad.step   s     ""$ !y! && 	E-/"$E')J(*K+/+;+;'
K,(O[  ;">2z*%L /i(z*$%56'Gn"4t<!$T:!	: A! !s   B%%B.)g{Gz?r   r   r   g|=NN)__name__
__module____qualname__r   r   rL   r   r   boolr1   rF   rR   r_   r   r)   __classcell__)rB   s   @rC   r   r      s     $(+,"&D $ $DD %- D 	D
 D $)D D $D D D ~DL*-,* "* "*rD   a[  Implements Adagrad algorithm.

    .. math::
       \begin{aligned}
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{input}      : \gamma \text{ (lr)}, \: \theta_0 \text{ (params)}, \: f(\theta)
                \text{ (objective)}, \: \lambda \text{ (weight decay)},                          \\
            &\hspace{12mm}    \tau \text{ (initial accumulator value)}, \: \eta\text{ (lr decay)}\\
            &\textbf{initialize} :  state\_sum_0 \leftarrow \tau                          \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{for} \: t=1 \: \textbf{to} \: \ldots \: \textbf{do}                         \\
            &\hspace{5mm}g_t           \leftarrow   \nabla_{\theta} f_t (\theta_{t-1})           \\
            &\hspace{5mm} \tilde{\gamma}    \leftarrow \gamma / (1 +(t-1) \eta)                  \\
            &\hspace{5mm} \textbf{if} \: \lambda \neq 0                                          \\
            &\hspace{10mm} g_t \leftarrow g_t + \lambda \theta_{t-1}                             \\
            &\hspace{5mm}state\_sum_t  \leftarrow  state\_sum_{t-1} + g^2_t                      \\
            &\hspace{5mm}\theta_t \leftarrow
                \theta_{t-1}- \tilde{\gamma} \frac{g_t}{\sqrt{state\_sum_t}+\epsilon}            \\
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
            &\bf{return} \:  \theta_t                                                     \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
       \end{aligned}

    For further details regarding the algorithm we refer to `Adaptive Subgradient Methods for Online Learning
    and Stochastic Optimization`_.
    z
    Args:
        a  
        lr (float, Tensor, optional): learning rate (default: 1e-2)
        lr_decay (float, optional): learning rate decay (default: 0)
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
        initial_accumulator_value (float, optional): initial value of the
            sum of squares of gradients (default: 0)
        eps (float, optional): term added to the denominator to improve
            numerical stability (default: 1e-10)
        z	
        a  
        fused (bool, optional): whether the fused implementation (CPU only) is used.
            Currently, `torch.float64`, `torch.float32`, `torch.float16`, and `torch.bfloat16`
            are supported. (default: None). Please note that the fused implementations does not
            support sparse or complex gradients.
    .. _Adaptive Subgradient Methods for Online Learning and Stochastic
        Optimization: http://jmlr.org/papers/v12/duchi11a.html

    r   rZ   r[   r\   r   ra   rb   r]   r!   r   r^   r   r   r   r    r   c                   t        d |D              st        d      ||t        | |	d      \  }}|d}|d}|r)t        j                  j                         rt        d      |r)t        j                  j                         rt        d      |r%t        j                  j                         st        }n-|r%t        j                  j                         st        }nt        } || ||||||||||	|
||       y)	ztFunctional API that performs Adagrad algorithm computation.

    See :class:`~torch.optim.Adagrad` for details.
    c              3   P   K   | ]  }t        |t        j                           y wrf   )r,   r6   r   ).0ts     rC   	<genexpr>zadagrad.<locals>.<genexpr>  s     @qz!U\\*@s   $&zPAPI has changed, `state_steps` argument must contain a list of singleton tensorsNF)	use_fusedz6torch.jit.script not supported with foreach optimizersz4torch.jit.script not supported with fused optimizers
r   r   r   r    r]   r   r   r^   ra   rb   )	allr2   r   r6   jitis_scripting_fused_adagrad_multi_tensor_adagrad_single_tensor_adagrad)r   rZ   r[   r\   r   ra   rb   r]   r!   r   r^   r   r   r   r    r   _funcs                     rC   r   r      s    2 @K@@^
 	
 }1Ne

7 }599))+STT'')QRRUYY++-	//1$%!'%rD   c                 P    | j                         }t        j                  |||      S rf   )sizer6   sparse_coo_tensor)rU   grad_indicesrI   r|   s       rC   _make_sparser   =  s"    99;D""<>>rD   c          
         ||J t         j                  j                         st        |      }t	        | |||      D ]  \  }}}}|dz  }t        |      }|s|n| }|dk7  r*|j                  rt        d      |j                  ||      }|d|dz
  |z  z   z  }|j                  r|j                         }|j                         }|j                         }|j                  t        |||j                  d                   |j                  |      }|j                         j!                         j                  |	      }|j                  t        ||||z        |        &t        j"                  |      }|r?t        j$                  |      }t        j$                  |      }t        j$                  |      }|j'                  ||d       |r|j)                         |	z   }n|j)                         j                  |	      }|j+                  |||        |st        j,                  |      }t        j,                  |      }
 y )Nr   r   z;weight_decay option is not compatible with sparse gradientsalpha   value)r6   rt   ru   r   zipr   rW   r2   addcoalesce_indices_valuesadd_r   powsparse_masksqrt_r9   view_as_realaddcmul_sqrtaddcdiv_view_as_complex)r   rZ   r[   r\   ra   rb   r   r   r   r    r]   r   r   r^   paramrU   	state_sumstep_tr)   clrr~   grad_valuesstd
std_valuesr9   s                            rC   rx   rx   B  s   " )"33399!!#^*-feZ*U (=&tY!&!#t$1~~"Q  88E86DAX--.>>==?D==?L,,.KNN<lKOOA<NOP''-C,,.33C8JJJT<z1IJSVRV   ))%0J))$/!..y9	**51tT3nn&,nn&++C0NN4SDN1--e4!11)<	Q(=rD   c                   |rJ d       ||J t        |       dk(  ry t        |      }t        j                  | |||g      }|j	                         D ]  \  \  }}}}}t        t        t           |      }t        t        t           |      }t        t        t           |      }t        t        t           |      }|
xr t        d |D              }|rt        ||||||||	d|||||       |rt        |||       |rt        j                  |      }t        j                  j                         s=|d   j                  r.t        j                   |t        j"                  dd      d	       nt        j                   |d
       |dk7  r3|rt        j                   |||	       nt        j$                  |||	      }|D cg c]  }| d
t'        |      d
z
  |z  z   z   }}t        j(                  |||d
       t        j*                  |      }t        j                   ||	       |dk7  s|rt        j,                  ||       |}nt        j.                  ||      }t        j0                  |||       ! y c c}w )Nz#_foreach ops don't support autogradr   c              3   4   K   | ]  }|j                     y wrf   )rW   )rn   rU   s     rC   rp   z(_multi_tensor_adagrad.<locals>.<genexpr>  s      9
#DNN9
s   Trr   g      ?cpu)r'   r   r   r   )rJ   r   r   "_group_tensors_by_device_and_dtyperI   r   rH   r   anyrx   r   r6   _foreach_negcompileris_compilingis_cpu_foreach_add_r8   _foreach_addr   _foreach_addcmul__foreach_sqrt_foreach_mul__foreach_mul_foreach_addcdiv_)r   rZ   r[   r\   ra   rb   r   r   r   r    r]   r   r   r^   grouped_tensorlistsdevice_params_device_grads_device_state_sums_device_state_steps_ry   device_paramsdevice_gradsdevice_state_sumsdevice_state_stepsdevice_has_sparse_gradr)   	minus_clrr   	numerators                                rC   rw   rw     s   " DDD)"333 6{a	BB#FF	
K0  &&(M? 		 	T&\>:DL-8 f/AB!$v,0CD!0 "
S 9
'39
 6
 ""!")! $!-'%#   -7HI --l;L ~~**,1CA1F1M1M"ELLU$C3  2A61##L-|T$11 -| 
 GY
>BRC1
4(1,889
	 
 	 1<UVW!!"34C%1i8$I**<CIy#>[M?z
s    I6returnc                "   | sy |
s|rt        d      |rt        d      t        |      }||j                  |ind }||j                  |ind }t        j                  | |||g      }|j                         D ]  \  \  }}\  \  }}}}}t        t        t           |      }t        t        t           |      }t        t        t           |      }t        t        t           |      }d\  }}|!|||vr|j                  |d      ||<   ||   }|!|||vr|j                  |d      ||<   ||   }t        j                  |d       t        j                  ||||||||	|||       |t        j                  ||gt        |      z          y )Nz5`fused` does not support sparse grad or complex paramz<adagrad with fused=True does not support differentiable=True)NNT)non_blockingr   )r   r   r   r    r   ra   rb   )r2   r   r'   r   r   itemsr   rH   r   tor6   r   _fused_adagrad__foreach_sub_rJ   )r   rZ   r[   r\   ra   rb   r   r   r   r    r]   r   r   r^   grad_scale_dictfound_inf_dictgrouped_tensorsr'   ry   r   r   r   r   r   r   r   r   device_grad_scaledevice_found_infs                                rC   rv   rv     s   " +RSSJ
 	
 
BB ,6+A		J't  7@6Ki&&	2QUNBB	
K0O 
			 ( 	 	
	T&\>:DL-8 f/AB!$v,0CD.8++!o&A_,*4--T-*R' / 7 ^%?.)2f4)Pv&-f5.2%(&	
 '"%5$6=O9P$PM(rD   )NNNFNFF)typingr   r   r   r6   r   	optimizerr   r	   r
   r   r   r   r   r   r   r   r   r   r   __all__r   __doc__rH   rj   rL   r   r   rx   rw   rv   r#   rD   rC   <module>r      s   ( (     " i
 bi bL4		 	 
 		 		 5. p !#'"& "" GLG<G VG f	G
 D>G  G G G d^G G G 	G  !G" #G$ 
%G& 'GT?
>=L>=<>= V>= f	>=
  >= >= 	>= >= >= 
>= >= >= >= >=Bj?Lj?<j? Vj? f	j?
  j? j? 	j? j? j? 
j? j? j? j? j?ZMLM<M VM f	M
  M M 	M M M 
M M M M M  
!MrD   