@@ -29,6 +29,7 @@ import datetime
2929import os
3030import warnings
3131import sys
32+ import re
3233
3334import narwhals.stable.v2 as nw
3435import numpy as np
@@ -42,27 +43,27 @@ import_datetime()
4243
4344cdef object unix_origin = datetime_new(1970 , 1 , 1 , 0 , 0 , 0 , 0 , None )
4445
45- cdef list sas_date_formats = [" WEEKDATE" , " MMDDYY" , " DDMMYY" , " YYMMDD" , " DATE" , " DATE9" , " YYMMDD10" ,
46- " DDMMYYB" , " DDMMYYB10" , " DDMMYYC" , " DDMMYYC10" , " DDMMYYD" , " DDMMYYD10" ,
47- " DDMMYYN6" , " DDMMYYN8" , " DDMMYYP" , " DDMMYYP10" , " DDMMYYS" , " DDMMYYS10" ,
48- " MMDDYYB" , " MMDDYYB10" , " MMDDYYC" , " MMDDYYC10" , " MMDDYYD" , " MMDDYYD10" ,
49- " MMDDYYN6" , " MMDDYYN8" , " MMDDYYP" , " MMDDYYP10" , " MMDDYYS" , " MMDDYYS10" ,
46+ cdef object format_regex = re.compile(r " ^ ( [A-Z ][A-Z0-9 ]+ [A-Z ]) ( \d + ) ? (?(2) (?: \. \d + ) ? $ | $ ) " )
47+ cdef list sas_date_formats = [" WEEKDATE" , " MMDDYY" , " DDMMYY" , " YYMMDD" , " DATE" , " YYMMDD" ,
48+ " DDMMYYB" , " DDMMYYC" , " DDMMYYD" ,
49+ " DDMMYYN" , " DDMMYYP" , " DDMMYYS" ,
50+ " MMDDYYB" , " MMDDYYC" , " MMDDYYD" ,
51+ " MMDDYYN" , " MMDDYYP" , " MMDDYYS" ,
5052 # "MONNAME", "MONTH", "WEEKDAY", "QTR", "QTRR", "YEAR","DAY", "DOWNAME" # these do not print as full dates in sas
5153 " WEEKDATX" , " DTDATE" ,
5254 " IS8601DA" , " E8601DA" , " B8601DA" ,
53- " YYMMDDB" , " YYMMDDD" , " YYMMDDN" , " YYMMDDP" , " YYMMDDS" ,]
54- cdef list sas_datetime_formats = [" DATETIME" , " DATETIME18" , " DATETIME19" , " DATETIME20" , " DATETIME21" , " DATETIME22" ,
55- " E8601DT" , " DATEAMPM" , " MDYAMPM" , " IS8601DT" , " B8601DT" , " B8601DN" ]
56- cdef list sas_time_formats = [" TIME" , " HHMM" , " TIME20.3" , " TIME20" , " TIME5" , " TOD" , " TIMEAMPM" , " IS8601TM" , " E8601TM" , " B8601TM" , ]
55+ " YYMMDDB" , " YYMMDDD" , " YYMMDDN" , " YYMMDDP" , " YYMMDDS" ]
56+ cdef list sas_datetime_formats = [" DATETIME" , " E8601DT" , " DATEAMPM" , " MDYAMPM" , " IS8601DT" , " B8601DT" , " B8601DN" ]
57+ cdef list sas_time_formats = [" TIME" , " HHMM" , " TOD" , " TIMEAMPM" , " IS8601TM" , " E8601TM" , " B8601TM" ]
5758# "HOUR" # these do not print as full time formats in sas
5859# cdef list sas_all_formats = sas_date_formats + sas_datetime_formats + sas_time_formats
5960cdef list sas_all_formats
6061cdef object sas_origin = datetime_new(1960 , 1 , 1 , 0 , 0 , 0 , 0 , None )
6162cdef object sas_secs_from_unix = total_seconds(unix_origin - sas_origin)
6263
63- cdef list spss_datetime_formats = [" DATETIME" , " DATETIME8 " , ' DATETIME17 ' , ' DATETIME20 ' , ' DATETIME23.2 ' , " YMDHMS16 " , " YMDHMS19 " , " YMDHMS19.2 " , " YMDHMS20 " ]
64- cdef list spss_date_formats = [" DATE" ,' DATE8 ' , ' DATE11 ' , ' DATE12 ' , " ADATE" ," ADATE8 " , " ADATE10 " , " EDATE" , ' EDATE8 ' , ' EDATE10 ' , " JDATE" , " JDATE5 " , " JDATE7 " , " SDATE" , " SDATE8 " , " SDATE10 " , ]
65- cdef list spss_time_formats = [" TIME" , " DTIME" , ' TIME8 ' , ' TIME5 ' , ' TIME11.2 ' ]
64+ cdef list spss_datetime_formats = [" DATETIME" , " YMDHMS " ]
65+ cdef list spss_date_formats = [" DATE" , " ADATE" , " EDATE" , " JDATE" , " SDATE" ]
66+ cdef list spss_time_formats = [" TIME" , " DTIME" ]
6667# cdef list spss_all_formats = spss_date_formats + spss_datetime_formats + spss_time_formats
6768cdef list spss_all_formats
6869cdef object spss_origin = datetime_new(1582 , 10 , 14 , 0 , 0 , 0 , 0 , None )
@@ -148,26 +149,32 @@ cdef py_datetime_format transform_variable_format(str var_format, py_file_format
148149 Transforms a readstat var_format to a date, datetime or time format label
149150 """
150151 if file_format == FILE_FORMAT_SAS:
151- if var_format in sas_all_formats:
152- if var_format in sas_date_formats:
153- return DATE_FORMAT_DATE
154- elif var_format in sas_datetime_formats:
155- return DATE_FORMAT_DATETIME
156- elif var_format in sas_time_formats:
157- return DATE_FORMAT_TIME
158- else :
159- return DATE_FORMAT_NOTADATE
152+ if var_format:
153+ format_match = format_regex.match(var_format)
154+ if format_match:
155+ var_format_name = format_match.group(1 )
156+ if var_format_name in sas_all_formats:
157+ if var_format_name in sas_date_formats:
158+ return DATE_FORMAT_DATE
159+ elif var_format_name in sas_datetime_formats:
160+ return DATE_FORMAT_DATETIME
161+ elif var_format_name in sas_time_formats:
162+ return DATE_FORMAT_TIME
163+ return DATE_FORMAT_NOTADATE
160164
161165 elif file_format == FILE_FORMAT_SPSS:
162- if var_format in spss_all_formats:
163- if var_format in spss_date_formats:
164- return DATE_FORMAT_DATE
165- elif var_format in spss_datetime_formats:
166- return DATE_FORMAT_DATETIME
167- elif var_format in spss_time_formats:
168- return DATE_FORMAT_TIME
169- else :
170- return DATE_FORMAT_NOTADATE
166+ if var_format:
167+ format_match = format_regex.match(var_format)
168+ if format_match:
169+ var_format_name = format_match.group(1 )
170+ if var_format_name in spss_all_formats:
171+ if var_format_name in spss_date_formats:
172+ return DATE_FORMAT_DATE
173+ elif var_format_name in spss_datetime_formats:
174+ return DATE_FORMAT_DATETIME
175+ elif var_format_name in spss_time_formats:
176+ return DATE_FORMAT_TIME
177+ return DATE_FORMAT_NOTADATE
171178
172179 elif file_format == FILE_FORMAT_STATA:
173180 if var_format in stata_all_formats:
@@ -1267,27 +1274,27 @@ cdef object run_conversion(object filename_path, py_file_format file_format, py_
12671274
12681275 if extra_date_formats is not None :
12691276 if file_format == FILE_FORMAT_SAS:
1270- sas_date_formats.extend(extra_date_formats)
1277+ sas_date_formats.extend([format_regex.match(edf).group( 1 ) for edf in extra_date_formats if format_regex.match(edf)] )
12711278 elif file_format == FILE_FORMAT_SPSS:
1272- spss_date_formats.extend(extra_date_formats)
1279+ spss_date_formats.extend([format_regex.match(edf).group( 1 ) for edf in extra_date_formats if format_regex.match(edf)] )
12731280 elif file_format == FILE_FORMAT_STATA:
12741281 stata_date_formats.extend(extra_date_formats)
12751282 else :
12761283 raise PyreadstatError(" Unknown file format" )
12771284 if extra_datetime_formats is not None :
12781285 if file_format == FILE_FORMAT_SAS:
1279- sas_datetime_formats.extend(extra_datetime_formats)
1286+ sas_datetime_formats.extend([format_regex.match(edtf).group( 1 ) for edtf in extra_datetime_formats if format_regex.match(edtf)] )
12801287 elif file_format == FILE_FORMAT_SPSS:
1281- spss_datetime_formats.extend(extra_datetime_formats)
1288+ spss_datetime_formats.extend([format_regex.match(edtf).group( 1 ) for edtf in extra_datetime_formats if format_regex.match(edtf)] )
12821289 elif file_format == FILE_FORMAT_STATA:
12831290 stata_datetime_formats.extend(extra_datetime_formats)
12841291 else :
12851292 raise PyreadstatError(" Unknown file format" )
12861293 if extra_time_formats is not None :
12871294 if file_format == FILE_FORMAT_SAS:
1288- sas_time_formats.extend(extra_time_formats)
1295+ sas_time_formats.extend([format_regex.match(etf).group( 1 ) for etf in extra_time_formats if format_regex.match(etf)] )
12891296 elif file_format == FILE_FORMAT_SPSS:
1290- spss_time_formats.extend(extra_time_formats)
1297+ spss_time_formats.extend([format_regex.match(etf).group( 1 ) for etf in extra_time_formats if format_regex.match(etf)] )
12911298 elif file_format == FILE_FORMAT_STATA:
12921299 stata_time_formats.extend(extra_time_formats)
12931300 else :
0 commit comments