Saturday, March 16, 2019

Download Kaggle Datasets on Google Colab



In [0]:
! pip install -q kaggle
In [0]:
from google.colab import files
In [ ]:
files.upload()
In [0]:
! mkdir ~/.kaggle
In [0]:
! cp kaggle.json ~/.kaggle/
In [0]:
! chmod 600 ~/.kaggle/kaggle.json
In [7]:
! kaggle datasets list
ref                                                          title                                                size  lastUpdated          downloadCount  
-----------------------------------------------------------  --------------------------------------------------  -----  -------------------  -------------  
russellyates88/suicide-rates-overview-1985-to-2016           Suicide Rates Overview 1985 to 2016                 396KB  2018-12-01 19:18:25          12009  
ronitf/heart-disease-uci                                     Heart Disease UCI                                     3KB  2018-06-25 11:33:56          16139  
karangadiya/fifa19                                           FIFA 19 complete player dataset                       2MB  2018-12-21 03:52:59          13725  
mohansacharya/graduate-admissions                            Graduate Admissions                                   9KB  2018-12-28 10:07:14          13531  
lava18/google-play-store-apps                                Google Play Store Apps                                2MB  2019-02-03 13:55:47          42229  
bigquery/crypto-ethereum-classic                             Ethereum Classic Blockchain                          69GB  2019-03-04 14:57:33              0  
iarunava/cell-images-for-detecting-malaria                   Malaria Cell Images Dataset                         337MB  2018-12-05 05:40:21           2806  
vjchoudhary7/customer-segmentation-tutorial-in-python        Mall Customer Segmentation Data                       2KB  2018-08-11 07:23:02           4391  
jessicali9530/celeba-dataset                                 CelebFaces Attributes (CelebA) Dataset                1GB  2018-06-01 20:08:48           5050  
noriuk/us-education-datasets-unification-project             U.S. Education Datasets: Unification Project         85MB  2019-03-02 18:41:52           2086  
jessicali9530/stanford-dogs-dataset                          Stanford Dogs Dataset                               735MB  2019-02-13 05:45:25           1230  
safegraph/census-block-group-american-community-survey-data  Census Block Group American Community Survey Data     2GB  2018-12-22 00:29:56            465  
safegraph/visit-patterns-by-census-block-group               Consumer & Visitor Insights For Neighborhoods        66MB  2018-12-19 21:31:50            798  
cityofLA/los-angeles-parking-citations                       Los Angeles Parking Citations                       253MB  2019-03-15 22:11:26           2286  
jutrera/stanford-car-dataset-by-classes-folder               Stanford Car Dataset by classes folder                2GB  2018-07-02 07:35:45           2172  
pavansanagapati/urban-sound-classification                   Urban Sound Classification                            6GB  2018-06-16 13:44:36           1765  
rmisra/news-headlines-dataset-for-sarcasm-detection          News Headlines Dataset For Sarcasm Detection          2MB  2018-06-09 22:14:56           1800  
fivethirtyeight/fivethirtyeight-comic-characters-dataset     FiveThirtyEight Comic Characters Dataset            577KB  2019-02-01 15:02:23           1578  
anokas/kuzushiji                                             Kuzushiji-MNIST                                     318MB  2018-12-17 01:19:31            596  
mdhrumil/top-5000-youtube-channels-data-from-socialblade     Top 5000 Youtube channels data from Socialblade.    128KB  2018-09-09 14:05:54           4998  
In [8]:
! kaggle competitions download -c 'santander-customer-transaction-prediction'
Downloading train.csv.zip to /content
 89% 109M/122M [00:01<00:00, 62.5MB/s] 
100% 122M/122M [00:01<00:00, 76.7MB/s]
Downloading sample_submission.csv.zip to /content
  0% 0.00/463k [00:00<?, ?B/s]
100% 463k/463k [00:00<00:00, 148MB/s]
Downloading test.csv.zip to /content
 95% 116M/122M [00:01<00:00, 75.8MB/s]
100% 122M/122M [00:01<00:00, 82.0MB/s]
In [9]:
import os
os.listdir()
Out[9]:
['.config',
 'kaggle.json',
 'train.csv.zip',
 'sample_submission.csv.zip',
 'test.csv.zip',
 'sample_data']
In [10]:
!pwd
/content
In [0]:
!mkdir train
!mkdir test
!mkdir sub
In [12]:
! unzip train.csv.zip -d train
Archive:  train.csv.zip
  inflating: train/train.csv         
In [13]:
! unzip test.csv.zip -d test
Archive:  test.csv.zip
  inflating: test/test.csv           
In [14]:
! unzip sample_submission.csv.zip -d sub
Archive:  sample_submission.csv.zip
  inflating: sub/sample_submission.csv  
In [15]:
os.listdir("train")
Out[15]:
['train.csv']
In [0]:
import pandas as pd
In [0]:
train = pd.read_csv("train/train.csv")
In [22]:
train.head()
Out[22]:
ID_code target var_0 var_1 var_2 var_3 var_4 var_5 var_6 var_7 ... var_190 var_191 var_192 var_193 var_194 var_195 var_196 var_197 var_198 var_199
0 train_0 0 8.9255 -6.7863 11.9081 5.0930 11.4607 -9.2834 5.1187 18.6266 ... 4.4354 3.9642 3.1364 1.6910 18.5227 -2.3978 7.8784 8.5635 12.7803 -1.0914
1 train_1 0 11.5006 -4.1473 13.8588 5.3890 12.3622 7.0433 5.6208 16.5338 ... 7.6421 7.7214 2.5837 10.9516 15.4305 2.0339 8.1267 8.7889 18.3560 1.9518
2 train_2 0 8.6093 -2.7457 12.0805 7.8928 10.5825 -9.0837 6.9427 14.6155 ... 2.9057 9.7905 1.6704 1.6858 21.6042 3.1417 -6.5213 8.2675 14.7222 0.3965
3 train_3 0 11.0604 -2.1518 8.9522 7.1957 12.5846 -1.8361 5.8428 14.9250 ... 4.4666 4.7433 0.7178 1.4214 23.0347 -1.2706 -2.9275 10.2922 17.9697 -8.9996
4 train_4 0 9.8369 -1.4834 12.8746 6.6375 12.2772 2.4486 5.9405 19.2514 ... -1.4905 9.5214 -0.1508 9.1942 13.2876 -1.5121 3.9267 9.5031 17.9974 -8.8104
5 rows × 202 columns
In [23]:
train.shape
Out[23]:
(200000, 202)

No comments :

Post a Comment