
    gR                     |    d Z ddlZddlZddlZddlmZ ddlZddlm	Z	  G d d      Z
 G d d      Z G d	 d
      Zy)a  
If you use the VADER sentiment analysis tools, please cite:

Hutto, C.J. & Gilbert, E.E. (2014). VADER: A Parsimonious Rule-based Model for
Sentiment Analysis of Social Media Text. Eighth International Conference on
Weblogs and Social Media (ICWSM-14). Ann Arbor, MI, June 2014.
    N)product)pairwisec                   "   e Zd ZdZdZdZdZdZh dZi deded	ed
edededededededededededededei dedededededededed ed!ed"ed#ed$ed%ed&ed'ed(ei d)ed*ed+ed,ed-ed.ed/ed0ed1ed2ed3ed4ed5ed6ed7ed8ed9eeeeeeeeeeeeeeeed:Z	d;d;d<d=d>d?d=d@Z
 ej                  dA ej                  ej                         dB      Zg dCZdD ZdIdEZdJdFZdG ZyH)KVaderConstantsz8
    A class to keep the Vader lists and constants.
    gn?gnҿg~jt?gGz>;   ain'tcan'tdon'tisn'tuh-uhwon'taren'tdidn'thadn'thasn'tshan'twasn'tdaren'tdoesn'thaven'tmustn'tneedn'tweren'tcouldn'tmightn'toughtn'twouldn't	shouldn'tnornotaintcantdontisntnonenopeuhuhwontarentdidnthadnthasntnevershantwasntcannotdarentdoesnthaventmustntneedntrarelyseldomwerentcouldntdespitemightntneithernothingnowhereoughtntwithoutwouldntshouldnt
absolutely	amazinglyawfully
completelyconsiderably	decidedlydeeplyeffing
enormouslyentirely
especiallyexceptionally	extremely
fabulouslyflippingflippinfrickingfrickinfriggingfrigginfullyfuckinggreatlyhellahighlyhugely
incredibly	intenselymajorlymoremostparticularlypurelyquitereally
remarkablysosubstantially
thoroughlytotallytremendouslyuberunbelievably	unusuallyutterlyveryalmostbarelyhardlyzjust enoughzkind of)kindakindofzkind-oflesslittle
marginallyoccasionallypartlyscarcelyslightlysomewhatzsort ofsortasortofzsort-of         ?   g      )zthe shitzthe bombzbad assz
yeah rightzcut the mustardzkiss of deathzhand to mouth[]).!?,;:-'"z!!z!!!z??z???z?!?z!?!z?!?!z!?!?c                      y N )selfs    I/var/www/openai/venv/lib/python3.12/site-packages/nltk/sentiment/vader.py__init__zVaderConstants.__init__   s        c                     | j                   t        fd|D              ry|rt        d |D              ryt        |      D ].  \  }}|j                         dk(  s|j                         dk7  s. y y)z<
        Determine if input contains negation words
        c              3   B   K   | ]  }|j                         v   y wr   lower).0word	neg_wordss     r   	<genexpr>z)VaderConstants.negated.<locals>.<genexpr>   s     A[Ttzz|y([s   Tc              3   @   K   | ]  }d |j                         v   yw)zn'tNr   )r   r   s     r   r   z)VaderConstants.negated.<locals>.<genexpr>   s     A[T5DJJL([s   leastatF)NEGATEanyr   r   )r   input_words
include_ntfirstsecondr   s        @r   negatedzVaderConstants.negated   si     KK	A[AAA[AA%k2ME6||~(U[[]d-B 3 r   c                 B    |t        j                  ||z  |z         z  }|S )z|
        Normalize the score to be between -1 and 1 using an alpha that
        approximates the max expected value
        )mathsqrt)r   scorealpha
norm_scores       r   	normalizezVaderConstants.normalize   s&    
 TYY'>??
r   c                     d}|j                         }|| j                  v rP| j                  |   }|dk  r|dz  }|j                         r'|r%|dkD  r|| j                  z  }|S || j                  z  }|S )zh
        Check if the preceding words increase, decrease, or negate/nullify the
        valence
                r   )r   BOOSTER_DICTisupperC_INCR)r   r   valenceis_cap_diffscalar
word_lowers         r   scalar_inc_deczVaderConstants.scalar_inc_dec   s~    
 ZZ\
***&&z2F{"||~+Q;dkk)F  dkk)Fr   N)T)   )__name__
__module____qualname____doc__B_INCRB_DECRr   N_SCALARr   r   SPECIAL_CASE_IDIOMSrecompileescapestringpunctuationREGEX_REMOVE_PUNCTUATION	PUNC_LISTr   r   r   r   r   r   r   r   r   !   s    FF FH<FBCfCVC 	6C 	f	C
 	C 	VC 	&C 	&C 	fC 	FC 	fC 	C 	VC 	fC 	FC  	6!C" 	F#C$ 	6%C& 	F'C( 	6)C* 	+C, 	6-C. 	6/C0 	1C2 	&3C4 	&5C6 	f7C8 	V9C: 	6;C< 	=C> 	?C@ 	ACB 	&CCD 	ECF 	&GCH 	fICJ 	fKCL 	MCN 	fOCP 	6QCR 	SCT 	UCV 	WCX 	VYCZ 	6[C\ 	]C^ 	&_C` 	&aCb 	&cCd 	veCf 	6gCh ECLN   *rzzAibii8J8J.K-LA*NOI(r   r   c                   (    e Zd ZdZd Zd Zd Zd Zy)	SentiTextzL
    Identify sentiment-relevant string-level properties of input text.
    c                     t        |t              st        |j                  d            }|| _        || _        || _        | j                         | _        | j                  | j                        | _	        y )Nzutf-8)

isinstancestrencodetextr   r   _words_and_emoticonswords_and_emoticonsallcap_differentialr   )r   r   	punc_listregex_remove_punctuations       r   r   zSentiText.__init__  sa    $$t{{7+,D	"(@%#'#<#<#>   33D4L4LMr   c                    | j                   j                  d| j                        }|j                         }|D ch c]  }t	        |      dkD  s| }}t        | j                  |      D ci c]  }dj                  |      |d    }}t        || j                        D ci c]  }dj                  |      |d    }}|}|j                  |       |S c c}w c c}w c c}w )zt
        Returns mapping of form:
        {
            'cat,': 'cat',
            ',cat': 'cat',
        }
            r   )	r   subr   splitlenr   r   joinupdate)r   no_punc_text
words_onlywppunc_before
punc_afterwords_punc_dicts           r   _words_plus_punczSentiText._words_plus_punc  s     4488TYYG!'')
!+:As1vza
:181TU1TArwwqz1Q4'1TU07
DNN0ST0S1bggaj!A$&0S
T%z* ;UTs   CC-C#C c                     | j                   j                         }| j                         }|D cg c]  }t        |      dkD  s| }}t	        |      D ]  \  }}||v s||   ||<    |S c c}w )z
        Removes leading and trailing puncutation
        Leaves contractions and most emoticons
            Does not preserve punc-plus-letter emoticons (e.g. :D)
        r   )r   r   r   r   	enumerate)r   wesr   weis        r   r   zSentiText._words_and_emoticons+  st     iioo//1/Cb3r7Q;rC/s^EAr_$(,A $ 
	 0s   A+A+c                     d}d}|D ]  }|j                         s|dz  } t        |      |z
  }d|cxk  rt        |      k  rn |S d}|S )z
        Check whether just some words in the input are ALL CAPS

        :param list words: The words to inspect
        :returns: `True` if some but not all items in `words` are ALL CAPS
        Fr   r   T)r   r   )r   wordsis_differentallcap_wordsr   cap_differentials         r   r   zSentiText.allcap_differential9  sa     D||~!  u:4,#e*,  Lr   N)r   r   r   r   r   r   r   r   r   r   r   r   r     s    	N(r   r   c                   b    e Zd ZdZ	 ddZd Zd Zd Zd Zd Z	d Z
d	 Zd
 Zd Zd Zd Zd Zy)SentimentIntensityAnalyzerz8
    Give a sentiment intensity score to sentences.
    c                     t         j                  j                  |      | _        | j	                         | _        t               | _        y r   )nltkdataloadlexicon_filemake_lex_dictlexiconr   	constants)r   r   s     r   r   z#SentimentIntensityAnalyzer.__init__P  s3     !IINN<8))+')r   c                     i }| j                   j                  d      D ]5  }|j                         j                  d      dd \  }}t        |      ||<   7 |S )z6
        Convert lexicon file to a dictionary
        
	r   r   )r   r   stripfloat)r   lex_dictliner   measures        r   r   z(SentimentIntensityAnalyzer.make_lex_dictX  sY     %%++D1D"jjl006q;OT7"7^HTN 2 r   c                    t        || j                  j                  | j                  j                        }g }|j                  }|D ]  }d}|j                  |      }|t        |      dz
  k  r,|j                         dk(  r||dz      j                         dk(  s&|j                         | j                  j                  v r|j                  |       | j                  |||||      } | j                  ||      }| j                  ||      S )a  
        Return a float for sentiment strength based on the input text.
        Positive values are positive valence, negative value are negative
        valence.

        :note: Hashtags are not taken into consideration (e.g. #BAD is neutral). If you
            are interested in processing the text in the hashtags too, then we recommend
            preprocessing your data to remove the #, after which the hashtag text may be
            matched as if it was a normal word in the sentence.
        r   r   kindof)r   r   r   r   r   indexr   r   r   appendsentiment_valence
_but_checkscore_valence)r   r   	sentitext
sentimentsr   itemr   r   s           r   polarity_scoresz*SentimentIntensityAnalyzer.polarity_scoresb  s     $..**DNN,S,S
	 
';;'DG#))$/AC+,q00JJLF*'A.446$>!<!<<!!'*//D!ZXJ ( __%8*E
!!*d33r   c                    |j                   }|j                  }|j                         }|| j                  v r| j                  |   }|j	                         r:|r8|dkD  r|| j
                  j                  z  }n|| j
                  j                  z  }t        dd      D ]  }	||	kD  s	|||	dz   z
     j                         | j                  vs/| j
                  j                  |||	dz   z
     ||      }
|	dk(  r
|
dk7  r|
dz  }
|	dk(  r
|
dk7  r|
dz  }
||
z   }| j                  |||	|      }|	dk(  s| j                  |||      } | j                  |||      }|j                  |       |S )Nr   r   r   gffffff?r   g?)r   r   r   r   r   r   r   ranger   _never_check_idioms_check_least_checkr  )r   r   r	  r  r   r
  r   r   item_lowercasestart_iss              r   r  z,SentimentIntensityAnalyzer.sentiment_valence  s}   ++';;T\\)ll>2G ||~+Q;t~~444Gt~~444G A;K+A1,=>DDF<<( 55+A1,=>A !|QH!|QG%kG"//!4gqG !|"&"4"4W>QST"U+ '< ''1DaHG'"r   c                    |dkD  r||dz
     j                         | j                  vrf||dz
     j                         dk(  rM||dz
     j                         dk7  r2||dz
     j                         dk7  r|| j                  j                  z  }|S |dkD  rT||dz
     j                         | j                  vr2||dz
     j                         dk(  r|| j                  j                  z  }|S )Nr   r   r   r   ro   r   )r   r   r   r   )r   r   r   r   s       r   r  z'SentimentIntensityAnalyzer._least_check  s     E#AE*002$,,F#AE*002g= $AE*002d:'A.446&@!DNN$;$;;  E#AE*002$,,F#AE*002g= 7 77Gr   c                    |D cg c]  }|j                          }}dht        |      z  }|rR|j                  t        t	        |                  }t        |      D ]!  \  }}||k  r	|dz  ||<   ||kD  s|dz  ||<   # |S c c}w )Nbutg      ?r   )r   setr  nextiterr   )r   r   r
  w_er  bisidx	sentiments           r   r  z%SentimentIntensityAnalyzer._but_check  s    6IJ6Issyy{6IJg/00$**4S	?;B#,Z#8i"9'03Jt$BY'03Jt$	 $9
  Ks   Bc                    ||dz
      d||    }dj                  ||dz
     ||dz
     ||         }||dz
      d||dz
      }dj                  ||dz
     ||dz
     ||dz
           }dj                  ||dz
     ||dz
           }|||||g}	|	D ]5  }
|
| j                  j                  v s| j                  j                  |
   } n t        |      dz
  |kD  rA||    d||dz       }|| j                  j                  v r| j                  j                  |   }t        |      dz
  |dz   kD  rSdj                  ||   ||dz      ||dz            }|| j                  j                  v r| j                  j                  |   }|| j                  j                  v s|| j                  j                  v r|| j                  j
                  z   }|S )Nr    z{} {} {}r   r   z{} {})formatr   r   r   r   r   )r   r   r   r   onezero
twoonezerotwoonethreetwoonethreetwo	sequencesseqzeroone
zeroonetwos                r   r  z(SentimentIntensityAnalyzer._idioms_check  s%   (Q/02Ea2H1IJ&&A&A&"

 (A./q1DQU1K0LM ''A&A&A&
 >>A&(;AE(B
 j&+xH	Cdnn888..<<SA 
 "#a'!+,Q/02Ea!e2L1MNG$..<<<..<<WE"#a'!a%/#**#A&#AE*#AE*J
 T^^???..<<ZH 333444 5 55Gr   c                 \   |dk(  r;| j                   j                  ||dz
     g      r|| j                   j                  z  }|dk(  re||dz
     dk(  r||dz
     dk(  s||dz
     dk(  r|dz  }n>| j                   j                  |||dz   z
     g      r|| j                   j                  z  }|dk(  r|||dz
     dk(  r||dz
     dk(  s!||dz
     dk(  s||dz
     dk(  s||dz
     dk(  r|d	z  }|S | j                   j                  |||dz   z
     g      r|| j                   j                  z  }|S )
Nr   r   r   r,   rf   thisr   r   g      ?)r   r   r   )r   r   r   r  r   s        r   r  z'SentimentIntensityAnalyzer._never_check  sf   a<~~%%':1q5'A&BC!DNN$;$;;a<"1q5)W4#AE*d2&q1u-7!C-'')<Q'A+=N)O(PQ!DNN$;$;;a<#AE*g5'A.$6*1q51V; (A.$6*1q51V; "D.  '')<Q'A+=N)O(PQ!DNN$;$;;r   c                 T    | j                  |      }| j                  |      }||z   }|S r   )_amplify_ep_amplify_qm)r   sum_sr   ep_amplifierqm_amplifierpunct_emph_amplifiers         r   _punctuation_emphasisz0SentimentIntensityAnalyzer._punctuation_emphasis$  s3    ''-''-+l:##r   c                 @    |j                  d      }|dkD  rd}|dz  }|S )Nr      g㥛 ?count)r   r   ep_countr1  s       r   r.  z&SentimentIntensityAnalyzer._amplify_ep+  s-    ::c?a<H  %'r   c                 R    |j                  d      }d}|dkD  r|dk  r|dz  }|S d}|S )Nr   r   r   r   g
ףp=
?gQ?r7  )r   r   qm_countr2  s       r   r/  z&SentimentIntensityAnalyzer._amplify_qm5  sB    ::c?a<1}  ($   $r   c                     d}d}d}|D ]9  }|dkD  r|t        |      dz   z  }|dk  r|t        |      dz
  z  }|dk(  s5|dz  }; |||fS )Nr   r   r   )r   )r   r
  pos_sumneg_sum	neu_countsentiment_scores         r   _sift_sentiment_scoresz1SentimentIntensityAnalyzer._sift_sentiment_scoresB  s}    	)O"/*Q. "/*Q. !#Q	  * **r   c                    |r
t        t        |            }| j                  ||      }|dkD  r||z  }n
|dk  r||z  }| j                  j	                  |      }| j                  |      \  }}}|t        j                  |      kD  r||z  }n|t        j                  |      k  r||z  }|t        j                  |      z   |z   }	t        j                  ||	z        }
t        j                  ||	z        }t        j                  ||	z        }nd}d}
d}d}t        |d      t        |d      t        |
d      t        |d      d}|S )Nr   r   r   r6  )negneuposcompound)	r   sumr4  r   r   rA  r   fabsround)r   r
  r   r0  r3  rF  r=  r>  r?  totalrE  rC  rD  sentiment_dicts                 r   r  z(SentimentIntensityAnalyzer.score_valenceT  sH   #j/*E#'#=#=eT#J qy----~~//6H*.*E*Ej*Q'GWi7++//499W--//dii009<E))GeO,C))GeO,C))I-.C HCCC a=a=a=h*	
 r   N)z;sentiment/vader_lexicon.zip/vader_lexicon/vader_lexicon.txt)r   r   r   r   r   r   r  r  r  r  r  r  r4  r.  r/  rA  r  r   r   r   r   r   K  sQ     S* 4D0d(
/b:$+$%r   r   )r   r   r   r   	itertoolsr   	nltk.datar   	nltk.utilr   r   r   r   r   r   r   <module>rO     sD      	    c cLA AHn nr   