1818import decimal
1919import datetime
2020
21+ import numpy as np
2122import pandas as pd
23+ from distutils .version import LooseVersion
2224
2325from pyspark import pandas as ps
2426from pyspark .testing .pandasutils import PandasOnSparkTestCase
@@ -94,13 +96,11 @@ def complex_pdf(self):
9496
9597 @property
9698 def complex_psdf (self ):
97- pssers = {
98- "this_array" : self .psser ,
99- "that_array" : ps .Series ([[2 , 3 , 4 ]]),
100- "this_struct" : ps .Index ([("x" , 1 )]).to_series ().reset_index (drop = True ),
101- "that_struct" : ps .Index ([("a" , 2 )]).to_series ().reset_index (drop = True ),
102- }
103- return ps .concat (pssers , axis = 1 )
99+ s1 = self .psser .rename ("this_array" )
100+ s2 = ps .Series ([[2 , 3 , 4 ]], name = "that_array" )
101+ s3 = ps .Index ([("x" , 1 )]).to_series (name = "this_struct" ).reset_index (drop = True )
102+ s4 = ps .Index ([("a" , 2 )]).to_series (name = "that_struct" ).reset_index (drop = True )
103+ return ps .concat ([s1 , s2 , s3 , s4 ], axis = 1 )
104104
105105 def test_add (self ):
106106 pdf , psdf = self .array_pdf , self .array_psdf
@@ -247,6 +247,31 @@ def test_from_to_pandas(self):
247247 self .assert_eq (pser , psser ._to_pandas (), check_exact = False )
248248 self .assert_eq (ps .from_pandas (pser ), psser )
249249
250+ def test_from_pandas_with_np_array_elements (self ):
251+ # SPARK-55242: pyspark.pandas should handle list-valued columns whose elements
252+ # are stored as np.ndarray by pandas 3 (e.g. [[e] for e in ...]).
253+ # Previously this raised "ValueError: The truth value of an array is ambiguous"
254+ # inside DataTypeOps.prepare() when it called col.replace({np.nan: None}).
255+ import warnings
256+ from pyspark .pandas .utils import PandasAPIOnSparkAdviceWarning
257+
258+ pdf = pd .DataFrame (
259+ {
260+ "a" : [1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 ],
261+ "b" : [[e ] for e in [4 , 5 , 6 , 3 , 2 , 1 , 0 , 0 , 0 ]],
262+ },
263+ index = np .random .rand (9 ),
264+ )
265+ with warnings .catch_warnings ():
266+ warnings .simplefilter ("ignore" , PandasAPIOnSparkAdviceWarning )
267+ # from_pandas must not raise; the resulting DataFrame must match the original.
268+ psdf = ps .from_pandas (pdf )
269+ # Sort both by "a" to ensure deterministic order for comparison.
270+ pdf = pdf .sort_values (by = "a" ).reset_index (drop = True )
271+ psdf = psdf .sort_values (by = "a" ).reset_index (drop = True )
272+ self .assert_eq (pdf ["a" ], psdf ["a" ])
273+ self .assert_eq (pdf ["b" ], psdf ["b" ])
274+
250275 def test_isnull (self ):
251276 pdf , psdf = self .array_pdf , self .array_psdf
252277 for col in self .array_df_cols :
@@ -266,13 +291,20 @@ def test_invert(self):
266291 self .assertRaises (TypeError , lambda : ~ self .psser )
267292
268293 def test_eq (self ):
269- pdf , psdf = self .complex_pdf , self .complex_pdf
270- self .assert_eq (
271- pdf ["this_array" ] == pdf ["that_array" ], psdf ["this_array" ] == psdf ["that_array" ]
272- )
273- self .assert_eq (
274- pdf ["this_struct" ] == pdf ["that_struct" ], psdf ["this_struct" ] == psdf ["that_struct" ]
275- )
294+ pdf , psdf = self .complex_pdf , self .complex_psdf
295+ if LooseVersion (pd .__version__ ) >= LooseVersion ("3.0.0" ):
296+ with self .assertRaises ((ValueError , TypeError )):
297+ psdf ["this_array" ] == psdf ["that_array" ]
298+ with self .assertRaises ((ValueError , TypeError )):
299+ psdf ["this_struct" ] == psdf ["that_struct" ]
300+ else :
301+ self .assert_eq (
302+ pdf ["this_array" ] == pdf ["that_array" ], psdf ["this_array" ] == psdf ["that_array" ]
303+ )
304+ self .assert_eq (
305+ pdf ["this_struct" ] == pdf ["that_struct" ], psdf ["this_struct" ] == psdf ["that_struct" ]
306+ )
307+
276308 self .assert_eq (
277309 pdf ["this_array" ] == pdf ["this_array" ], psdf ["this_array" ] == psdf ["this_array" ]
278310 )
@@ -281,13 +313,20 @@ def test_eq(self):
281313 )
282314
283315 def test_ne (self ):
284- pdf , psdf = self .complex_pdf , self .complex_pdf
285- self .assert_eq (
286- pdf ["this_array" ] != pdf ["that_array" ], psdf ["this_array" ] != psdf ["that_array" ]
287- )
288- self .assert_eq (
289- pdf ["this_struct" ] != pdf ["that_struct" ], psdf ["this_struct" ] != psdf ["that_struct" ]
290- )
316+ pdf , psdf = self .complex_pdf , self .complex_psdf
317+ if LooseVersion (pd .__version__ ) >= LooseVersion ("3.0.0" ):
318+ with self .assertRaises ((ValueError , TypeError )):
319+ psdf ["this_array" ] != psdf ["that_array" ]
320+ with self .assertRaises ((ValueError , TypeError )):
321+ psdf ["this_struct" ] != psdf ["that_struct" ]
322+ else :
323+ self .assert_eq (
324+ pdf ["this_array" ] != pdf ["that_array" ], psdf ["this_array" ] != psdf ["that_array" ]
325+ )
326+ self .assert_eq (
327+ pdf ["this_struct" ] != pdf ["that_struct" ], psdf ["this_struct" ] != psdf ["that_struct" ]
328+ )
329+
291330 self .assert_eq (
292331 pdf ["this_array" ] != pdf ["this_array" ], psdf ["this_array" ] != psdf ["this_array" ]
293332 )
@@ -296,13 +335,20 @@ def test_ne(self):
296335 )
297336
298337 def test_lt (self ):
299- pdf , psdf = self .complex_pdf , self .complex_pdf
300- self .assert_eq (
301- pdf ["this_array" ] < pdf ["that_array" ], psdf ["this_array" ] < psdf ["that_array" ]
302- )
303- self .assert_eq (
304- pdf ["this_struct" ] < pdf ["that_struct" ], psdf ["this_struct" ] < psdf ["that_struct" ]
305- )
338+ pdf , psdf = self .complex_pdf , self .complex_psdf
339+ if LooseVersion (pd .__version__ ) >= LooseVersion ("3.0.0" ):
340+ with self .assertRaises ((ValueError , TypeError )):
341+ psdf ["this_array" ] < psdf ["that_array" ]
342+ with self .assertRaises ((ValueError , TypeError )):
343+ psdf ["this_struct" ] < psdf ["that_struct" ]
344+ else :
345+ self .assert_eq (
346+ pdf ["this_array" ] < pdf ["that_array" ], psdf ["this_array" ] < psdf ["that_array" ]
347+ )
348+ self .assert_eq (
349+ pdf ["this_struct" ] < pdf ["that_struct" ], psdf ["this_struct" ] < psdf ["that_struct" ]
350+ )
351+
306352 self .assert_eq (
307353 pdf ["this_array" ] < pdf ["this_array" ], psdf ["this_array" ] < psdf ["this_array" ]
308354 )
@@ -311,13 +357,20 @@ def test_lt(self):
311357 )
312358
313359 def test_le (self ):
314- pdf , psdf = self .complex_pdf , self .complex_pdf
315- self .assert_eq (
316- pdf ["this_array" ] <= pdf ["that_array" ], psdf ["this_array" ] <= psdf ["that_array" ]
317- )
318- self .assert_eq (
319- pdf ["this_struct" ] <= pdf ["that_struct" ], psdf ["this_struct" ] <= psdf ["that_struct" ]
320- )
360+ pdf , psdf = self .complex_pdf , self .complex_psdf
361+ if LooseVersion (pd .__version__ ) >= LooseVersion ("3.0.0" ):
362+ with self .assertRaises ((ValueError , TypeError )):
363+ psdf ["this_array" ] <= psdf ["that_array" ]
364+ with self .assertRaises ((ValueError , TypeError )):
365+ psdf ["this_struct" ] <= psdf ["that_struct" ]
366+ else :
367+ self .assert_eq (
368+ pdf ["this_array" ] <= pdf ["that_array" ], psdf ["this_array" ] <= psdf ["that_array" ]
369+ )
370+ self .assert_eq (
371+ pdf ["this_struct" ] <= pdf ["that_struct" ], psdf ["this_struct" ] <= psdf ["that_struct" ]
372+ )
373+
321374 self .assert_eq (
322375 pdf ["this_array" ] <= pdf ["this_array" ], psdf ["this_array" ] <= psdf ["this_array" ]
323376 )
@@ -326,13 +379,20 @@ def test_le(self):
326379 )
327380
328381 def test_gt (self ):
329- pdf , psdf = self .complex_pdf , self .complex_pdf
330- self .assert_eq (
331- pdf ["this_array" ] > pdf ["that_array" ], psdf ["this_array" ] > psdf ["that_array" ]
332- )
333- self .assert_eq (
334- pdf ["this_struct" ] > pdf ["that_struct" ], psdf ["this_struct" ] > psdf ["that_struct" ]
335- )
382+ pdf , psdf = self .complex_pdf , self .complex_psdf
383+ if LooseVersion (pd .__version__ ) >= LooseVersion ("3.0.0" ):
384+ with self .assertRaises ((ValueError , TypeError )):
385+ psdf ["this_array" ] > psdf ["that_array" ]
386+ with self .assertRaises ((ValueError , TypeError )):
387+ psdf ["this_struct" ] > psdf ["that_struct" ]
388+ else :
389+ self .assert_eq (
390+ pdf ["this_array" ] > pdf ["that_array" ], psdf ["this_array" ] > psdf ["that_array" ]
391+ )
392+ self .assert_eq (
393+ pdf ["this_struct" ] > pdf ["that_struct" ], psdf ["this_struct" ] > psdf ["that_struct" ]
394+ )
395+
336396 self .assert_eq (
337397 pdf ["this_array" ] > pdf ["this_array" ], psdf ["this_array" ] > psdf ["this_array" ]
338398 )
@@ -341,13 +401,20 @@ def test_gt(self):
341401 )
342402
343403 def test_ge (self ):
344- pdf , psdf = self .complex_pdf , self .complex_pdf
345- self .assert_eq (
346- pdf ["this_array" ] >= pdf ["that_array" ], psdf ["this_array" ] >= psdf ["that_array" ]
347- )
348- self .assert_eq (
349- pdf ["this_struct" ] >= pdf ["that_struct" ], psdf ["this_struct" ] >= psdf ["that_struct" ]
350- )
404+ pdf , psdf = self .complex_pdf , self .complex_psdf
405+ if LooseVersion (pd .__version__ ) >= LooseVersion ("3.0.0" ):
406+ with self .assertRaises ((ValueError , TypeError )):
407+ psdf ["this_array" ] >= psdf ["that_array" ]
408+ with self .assertRaises ((ValueError , TypeError )):
409+ psdf ["this_struct" ] >= psdf ["that_struct" ]
410+ else :
411+ self .assert_eq (
412+ pdf ["this_array" ] >= pdf ["that_array" ], psdf ["this_array" ] >= psdf ["that_array" ]
413+ )
414+ self .assert_eq (
415+ pdf ["this_struct" ] >= pdf ["that_struct" ], psdf ["this_struct" ] >= psdf ["that_struct" ]
416+ )
417+
351418 self .assert_eq (
352419 pdf ["this_array" ] >= pdf ["this_array" ], psdf ["this_array" ] >= psdf ["this_array" ]
353420 )
0 commit comments