![]() ![]() dtype = object : -> 108 return _encode_python ( values, uniques, encode ) 109 else : 110 return _encode_numpy ( values, uniques, encode ) ~/ sandbox / rf / automl - rf / venv / lib / python3. py in _encode ( values, uniques, encode ) 107 if values. 6 / site - packages / sklearn / preprocessing / label. classes_, encode = True ) 258 return y 259 ~/ sandbox / rf / automl - rf / venv / lib / python3. py in transform ( self, y ) 255 return np. transform ( X_test ) ~/ sandbox / rf / automl - rf / venv / lib / python3. fit_transform ( X_train ) -> 6 X_test = categorical_convert. array ( for v in values ]) 69 except KeyError as e : KeyError : 'United-States' During handling of the above exception, another exception occurred : ValueError Traceback ( most recent call last ) 4 categorical_convert = LabelEncoder () 5 X_train = categorical_convert. array ( for v in values ]) 69 except KeyError as e : ~/ sandbox / rf / automl - rf / venv / lib / python3. py in _encode_python ( values, uniques, encode ) 67 try : -> 68 encoded = np. ![]() KeyError Traceback ( most recent call last ) ~/ sandbox / rf / automl - rf / venv / lib / python3. Uhhh!!! Now we got the error because there are missing values in the column. dtype ) 65 if encode : TypeError : '<' not supported between instances of 'str' and 'float' py in _encode_python ( values, uniques, encode ) 61 # only used in _encode below, see docstring there for detailsĦ2 if uniques is None : -> 63 uniques = sorted ( set ( values )) 64 uniques = np. classes_, y = _encode ( y, encode = True ) 237 return y 238 ~/ sandbox / rf / automl - rf / venv / lib / python3. py in fit_transform ( self, y ) 235 y = column_or_1d ( y, warn = True ) -> 236 self. fit_transform ( X_train ) 6 X_test = categorical_convert. TypeError Traceback ( most recent call last ) 3 for column in : 4 categorical_convert = LabelEncoder () -> 5 X_train = categorical_convert. Let’s convert categorical values to integers. The common option here is one-hot encoding or converting into integers. We need to convert our categorical columns into numerical values. Hmmm …, this is strange, the Random Forest is an ensemble of decision trees, and decision trees should work with categorical values … Quick googling, and you got confirmation that scikit-learn Random Forest doesn’t work with categorical values and that somebody is working on this in sklearn ( stackoverflow link). We have ValueError, because there is ‘Private’ value in the workclass column. py in asarray ( a, dtype, order ) 536 -> 538 return array ( a, dtype, copy = False, order = order ) 539 540 ValueError : could not convert string to float : 'Private' 6 / site - packages / numpy / core / numeric. ![]() asarray ( array, dtype = dtype, order = order ) 528 except ComplexWarning : 529 raise ValueError ( "Complex data not supported \n " ~/ sandbox / rf / automl - rf / venv / lib / python3. simplefilter ( 'error', ComplexWarning ) -> 527 array = np. py in check_array ( array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator ) 525 try : 526 warnings. 6 / site - packages / sklearn / utils / validation. > 250 X = check_array ( X, accept_sparse = "csc", dtype = DTYPE ) 251 y = check_array ( y, accept_sparse = 'csc', ensure_2d = False, dtype = None ) 252 if sample_weight is not None : ~/ sandbox / rf / automl - rf / venv / lib / python3. py in fit ( self, X, y, sample_weight ) 248 249 # Validate or convert input data 6 / site - packages / sklearn / ensemble / forest. fit ( X_train, y_train ) ~/ sandbox / rf / automl - rf / venv / lib / python3. ![]() ValueError Traceback ( most recent call last ) 1 rf = RandomForestClassifier ( n_estimators = 1000 ) -> 2 rf = rf. ![]()
0 Comments
Leave a Reply. |
AuthorWrite something about yourself. No need to be fancy, just an overview. ArchivesCategories |