Python ML: Load dataset
Load data from default repo
> python |
Python 3.12.2
(tags/v3.12.2:6abddd9, Feb 6 2024,
21:26:36) [MSC v.1937 64 bit (AMD64)] on win32 Type "help",
"copyright", "credits" or "license" for more
information.
|
>>>
import sklearn >>>
print(sklearn.__version__)
|
1.6.1
|
>>> from
sklearn import datasets >>>
dir(datasets)
|
['__all__', '__builtins__',
'__cached__', '__doc__', '__file__', '__getattr__', '__loader__', '__name__',
'__package__', '__path__', '__spec__', '_arff_parser', '_base',
'_california_housing', '_covtype', '_kddcup99', '_lfw', '_olivetti_faces',
'_openml', '_rcv1', '_samples_generator', '_species_distributions',
'_svmlight_format_fast', '_svmlight_format_io', '_twenty_newsgroups',
'clear_data_home', 'dump_svmlight_file', 'fetch_20newsgroups',
'fetch_20newsgroups_vectorized', 'fetch_california_housing', 'fetch_covtype',
'fetch_file', 'fetch_kddcup99', 'fetch_lfw_pairs', 'fetch_lfw_people',
'fetch_olivetti_faces', 'fetch_openml', 'fetch_rcv1',
'fetch_species_distributions', 'get_data_home', 'load_breast_cancer',
'load_diabetes', 'load_digits', 'load_files', 'load_iris', 'load_linnerud',
'load_sample_image', 'load_sample_images', 'load_svmlight_file',
'load_svmlight_files', 'load_wine', 'make_biclusters', 'make_blobs',
'make_checkerboard', 'make_circles', 'make_classification', 'make_friedman1',
'make_friedman2', 'make_friedman3', 'make_gaussian_quantiles',
'make_hastie_10_2', 'make_low_rank_matrix', 'make_moons',
'make_multilabel_classification', 'make_regression', 'make_s_curve',
'make_sparse_coded_signal', 'make_sparse_spd_matrix',
'make_sparse_uncorrelated', 'make_spd_matrix', 'make_swiss_roll', 'textwrap']
|
>>>
iris=datasets.load_iris() >>>
print(iris.feature_names)
|
['sepal length (cm)', 'sepal
width (cm)', 'petal length (cm)', 'petal width (cm)']
|
>>>
print(iris.data)
|
[[5.1 3.5 1.4 0.2] [4.9 3.
1.4 0.2] [4.7 3.2 1.3 0.2] [4.6 3.1 1.5 0.2] [5.
3.6 1.4 0.2] [5.4 3.9 1.7 0.4] [4.6 3.4 1.4 0.3] [5.
3.4 1.5 0.2] [4.4 2.9 1.4 0.2] [4.9 3.1 1.5 0.1] …… |
>>>
print(iris.target)
|
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2] |
>>>
print(iris.DESCR)
|
.. _iris_dataset:
Iris plants dataset --------------------
**Data Set Characteristics:**
:Number of Instances: 150 (50
in each of three classes) :Number of Attributes: 4
numeric, predictive attributes and the class :Attribute Information: - sepal length in cm - sepal width in cm - petal length in cm - petal width in cm - class: - Iris-Setosa - Iris-Versicolour - Iris-Virginica
:Summary Statistics:
============== ==== ====
======= ===== ==================== Min Max
Mean SD Class Correlation ============== ==== ====
======= ===== ==================== sepal length: 4.3
7.9 5.84 0.83
0.7826 sepal width: 2.0
4.4 3.05 0.43
-0.4194 petal length: 1.0
6.9 3.76 1.76
0.9490 (high!) petal width: 0.1
2.5 1.20 0.76
0.9565 (high!) ============== ==== ====
======= ===== ====================
……… |
Load dataset from openML
To load data from OpenML
C:\Windows\System32>python |
Python 3.12.2
(tags/v3.12.2:6abddd9, Feb 6 2024,
21:26:36) [MSC v.1937 64 bit (AMD64)] on win32 Type "help",
"copyright", "credits" or "license" for more
information. |
>>>
import sklearn >>> from
sklearn import datasets >>> from
sklearn.datasets import fetch_openml >>>
mice=fetch_openml(name='miceprotein', version=4) >>>
mice.details |
{'id': '40966', 'name':
'MiceProtein', 'version': '4', 'description_version': '1', 'format': 'ARFF',
'upload_date': '2017-11-08T16:00:15', 'licence': 'Public', 'url':
'https://api.openml.org/data/v1/download/17928620/MiceProtein.arff',
'parquet_url': 'https://data.openml.org/datasets/0004/40966/dataset_40966.pq',
'file_id': '17928620', 'default_target_attribute': 'class',
'row_id_attribute': 'MouseID', 'ignore_attribute': ['Genotype', 'Treatment',
'Behavior'], 'tag': ['Biology', 'Data Science', 'Health', 'OpenML-CC18',
'Research', 'study_135', 'study_98', 'study_99'], 'visibility': 'public',
'minio_url': 'https://data.openml.org/datasets/0004/40966/dataset_40966.pq',
'status': 'active', 'processing_date': '2018-10-04 00:49:58', 'md5_checksum':
'3c479a6885bfa0438971388283a1ce32'} >>>
|
Ulasan