
    g$                       d dl mZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlm	Z	 d dlm
Z
 d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ er0d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ  G d d      Z y)    )annotations)TYPE_CHECKING)Any)Iterable)Literal)Sequence)native_to_narwhals_dtype)parse_exprs_and_named_exprs)Implementation)flatten)parse_columns_to_drop)parse_version)validate_backend_version)	DataFrame)Self)SparkLikeExprSparkLikeLazyGroupBySparkLikeNamespace)IntoSparkLikeExpr)DType)Versionc                  @   e Zd Z	 	 	 	 	 	 	 	 ddZddZddZddZddZddZe	dd       Z
ddZ	 	 	 	 	 	 	 	 d d	Zd!d
Ze	d"d       Zd"dZ	 	 	 	 	 	 	 	 d dZd#dZd$dZd%dZddd	 	 	 	 	 	 	 	 	 	 	 d&dZd'dZd(dZ	 d)	 	 	 	 	 	 	 d*dZ	 	 	 	 	 	 	 	 	 	 	 	 d+dZy),SparkLikeLazyFramec                   || _         || _        t        j                  | _        || _        t        | j                  | j                         y N)_native_frame_backend_versionr   PYSPARK_implementation_versionr   )selfnative_dataframebackend_versionversions       S/var/www/openai/venv/lib/python3.12/site-packages/narwhals/_spark_like/dataframe.py__init__zSparkLikeLazyFrame.__init__   s?     . /-55 !5!5t7L7LM    c                    | j                   t        j                  u r| j                   j                         S dt	        | j                          }t        |      )NzExpected pyspark, got: )r!   r   r    to_native_namespacetypeAssertionError)r#   msgs     r'   __native_namespace__z'SparkLikeLazyFrame.__native_namespace__+   sN    >#9#99'';;=='T-A-A(B'CDS!!r)   c                J    ddl m}  || j                  | j                        S )Nr   r   r%   r&   )narwhals._spark_like.namespacer   r   r"   )r#   r   s     r'   __narwhals_namespace__z)SparkLikeLazyFrame.__narwhals_namespace__2   s!    E! 114==
 	
r)   c                    | S r    r#   s    r'   __narwhals_lazyframe__z)SparkLikeLazyFrame.__narwhals_lazyframe__9   s    r)   c                R    | j                  | j                  | j                  |      S Nr1   )	__class__r   r   )r#   r&   s     r'   _change_versionz"SparkLikeLazyFrame._change_version<   s+    ~~0E0Ew  
 	
r)   c                R    | j                  || j                  | j                        S r9   )r:   r   r"   )r#   dfs     r'   _from_native_framez%SparkLikeLazyFrame._from_native_frameA   s)    ~~ 5 5t}}  
 	
r)   c                .    | j                   j                  S r   )r   columnsr6   s    r'   r@   zSparkLikeLazyFrame.columnsF   s    !!)))r)   c                    dd l }ddlm}  || j                  j	                         t
        j                  t        |j                        | j                        S )Nr   )PandasLikeDataFrame)r$   implementationr%   r&   )
pandasnarwhals._pandas_like.dataframerB   r   toPandasr   PANDASr   __version__r"   )r#   pdrB   s      r'   collectzSparkLikeLazyFrame.collectJ   sC    G"!//88:)00)"..9MM	
 	
r)   c                   |r<t        d |D              r*|s(| j                   | j                  j                  |       S t	        | g|i |}|sEddlm} | j                  j                  }|j                  g  |g             }| j                  |      S |j                         D cg c]  \  }}|j                  |       }	}}| j                   | j                  j                  |	       S c c}}w )Nc              3  <   K   | ]  }t        |t                y wr   )
isinstancestr).0xs     r'   	<genexpr>z,SparkLikeLazyFrame.select.<locals>.<genexpr>[   s     ;UAs+Us   r   )
StructType)allr>   r   selectr
   pyspark.sql.typesrR   sparkSessioncreateDataFrameitemsalias)
r#   exprsnamed_exprsnew_columnsrR   spark_sessionspark_dfcol_namecolnew_columns_lists
             r'   rT   zSparkLikeLazyFrame.selectV   s    
 S;U;;K**+D4+=+=+D+De+LMM1$NN+N4 ..;;M$44RBHH**844EPEVEVEXYEXMHcCIIh/EXY&&'@t'9'9'@'@BR'STT Zs   (C.c                    | j                         } |j                  | }|j                  |       d   }| j                  j	                  |      }| j                  |      S Nr   )r3   all_horizontal_callr   wherer>   )r#   
predicatesplxexpr	conditionr^   s         r'   filterzSparkLikeLazyFrame.filterm   s\    ))+!s!!:.JJt$Q'	%%++I6&&x00r)   c                    | j                   j                  D ci c].  }|j                  t        |j                  | j
                        0 c}S c c}w )N)dtyper&   )r   schemanamer	   dataTyper"   )r#   fields     r'   rn   zSparkLikeLazyFrame.schemau   sW     ++22	
 3 JJ0nndmm  3	
 	
 
s   3Ac                    | j                   S r   )rn   r6   s    r'   collect_schemaz!SparkLikeLazyFrame.collect_schema~   s    {{r)   c                t    t        | g|i |}| j                  | j                  j                  |            S r   )r
   r>   r   withColumns)r#   rZ   r[   new_columns_maps       r'   with_columnszSparkLikeLazyFrame.with_columns   s:    
 6dRURkR&&t'9'9'E'Eo'VWWr)   c                n    t        | ||      }| j                   | j                  j                  |       S )N)compliant_framer@   strict)r   r>   r   drop)r#   r@   rz   columns_to_drops       r'   r{   zSparkLikeLazyFrame.drop   s:    / '&
 &&'>t'9'9'>'>'PQQr)   c                    | j                   j                  }| j                  |j                  | j                   j	                  |                  S )N)num)r   rV   r>   rW   take)r#   nr]   s      r'   headzSparkLikeLazyFrame.head   sH    **77&&))$*<*<*A*Aa*A*HI
 	
r)   c               6    ddl m}  || t        |      |      S )Nr   r   )r=   keysdrop_null_keys)narwhals._spark_like.group_byr   list)r#   r   r   r   s       r'   group_byzSparkLikeLazyFrame.group_by   s    F#$t*^
 	
r)   F)
descending
nulls_lastc               \  
 dd l mc m
 t        g t        |g      |      }t	        |t
              r|gt        |      z  }|r
fd|D        }n
fd|D        }t        ||      D cg c]  \  }} ||       }	}}| j                   | j                  j                  |	       S c c}}w )Nr   c              3  R   K   | ]  }|rj                   nj                     y wr   )desc_nulls_lastasc_nulls_lastrO   dFs     r'   rQ   z*SparkLikeLazyFrame.sort.<locals>.<genexpr>   s(      FPQ!!A,<,<<j   $'c              3  R   K   | ]  }|rj                   nj                     y wr   )desc_nulls_firstasc_nulls_firstr   s     r'   rQ   z*SparkLikeLazyFrame.sort.<locals>.<genexpr>   s(      HR1a""Q->->>
r   )pyspark.sql.functionssql	functionsr   rM   boollenzipr>   r   sort)r#   byr   r   more_byflat_by
sort_funcsr`   sort_f	sort_colsr   s             @r'   r   zSparkLikeLazyFrame.sort   s     	*)4GRDM4G45j$'$G4JFPJHRJ 584LM4L[S&VC[4L	M&&'>t'9'9'>'>	'JKK Ns   +B(c                X    | j                  | j                  j                  |            S )Nsubset)r>   r   dropna)r#   r   s     r'   
drop_nullszSparkLikeLazyFrame.drop_nulls   s'    &&t'9'9'@'@'@'OPPr)   c                P   dd l mc m} | j                  D ci c]  }||j	                  ||       }}| j                  | j                  j                  |j                         D cg c]%  \  }}|j                  |      j                  |      ' c}}            S c c}w c c}}w rc   )r   r   r   r@   getr>   r   rT   rX   r`   rY   )r#   mappingr   colnamerename_mappingoldnews          r'   renamezSparkLikeLazyFrame.rename   s    )) CG,,
BNwGW[['22, 	 
 &&%%7E7K7K7MN7M83s!!#&7MN
 	


 Os   B'*B"Nc                   |dk7  rd}t        |      t        |t              r|gn|}| j                  | j                  j                  |            S )NanyzC`LazyFrame.unique` with PySpark backend only supports `keep='any'`.r   )
ValueErrorrM   rN   r>   r   dropDuplicates)r#   r   keepr.   s       r'   uniquezSparkLikeLazyFrame.unique   sR     5=WCS/!'4&&&&t'9'9'H'HPV'H'WXXr)   c           	        dd l mc m} | j                  }|j                  }| j                  }	|j                  }
t        |t              r|g}t        |t              r|g}i t        t        |xs g |xs g             t        t        |
      j                  t        |xs g                   D ci c]  }|||	v r| | n| c}}|j                  |j                         D cg c]%  \  }}|j                  |      j                  |      ' c}}      }|	}|dv r*|j!                  |
D cg c]  }||xs g vr||    c}       | j#                  |j%                  |||      j                  |            S c c}w c c}}w c c}w )Nr   >   leftcrossinner)otheronhow)r   r   r   r   r@   rM   rN   dictr   r   set
differencerT   rX   r`   rY   extendr>   join)r#   r   r   left_onright_onsuffixr   self_nativeother_nativeleft_columnsright_columnsr   r   r   r   	col_orders                   r'   r   zSparkLikeLazyFrame.join   s    	*)((**||gs#iGh$ zH

3x~2w}"56
  $C$6$A$A#hnRTBU$VWWG L1HG9VH-gUW
 ##3A3G3G3IJ3IxsCQUU3Zc"3IJ
 !	,, $1#0x~26 #7+#0 &&5W#>EEiP
 	
/ Ks   0E:&*E?
-F)r$   r   r%   ztuple[int, ...]r&   r   returnNone)r   r   )r   r   )r   r   )r&   r   r   r   )r=   r   r   r   )r   	list[str])r#   r   rZ   r   r[   r   r   r   )rg   r   r   r   )r   zdict[str, DType])r#   r   r@   r   rz   r   r   r   )r#   r   r   intr   r   )r#   r   r   rN   r   r   r   r   )r#   r   r   zstr | Iterable[str]r   rN   r   zbool | Sequence[bool]r   r   r   r   )r#   r   r   str | list[str] | Noner   r   )r#   r   r   zdict[str, str]r   r   r   )r#   r   r   r   r   zLiteral['any', 'none']r   r   )r   r   r   z1Literal['inner', 'left', 'cross', 'semi', 'anti']r   r   r   r   r   rN   r   r   )__name__
__module____qualname__r(   r/   r3   r7   r;   r>   propertyr@   rJ   rT   rk   rn   rs   rw   r{   r   r   r   r   r   r   r   r5   r)   r'   r   r      s   N#N )	N
 N 
N"




 * *

UU!U )U 
	U.1 
 
XX!X )X 
	XR

 -2 LLL L *	L
 L 
L2Q

 *.
Y
Y&
Y %	
Y
 

Y3
3
 ?3
 (	3

 )3
 3
 
3
r)   r   N)!
__future__r   typingr   r   r   r   r   narwhals._spark_like.utilsr	   r
   narwhals.utilsr   r   r   r   r   pyspark.sqlr   typing_extensionsr   narwhals._spark_like.exprr   r   r   r2   r   narwhals._spark_like.typingr   narwhals.dtypesr   r   r   r5   r)   r'   <module>r      sS    "       ? B ) " 0 ( 3%&7BA=%&g
 g
r)   