Tuesday, April 25, 2017

Prepare Python machine learning environment on Centos 6.6 to train data

# major points: 1. Has to use Python 2.7, not 2.6.  But Centos 6.6 uses Python 2.6 for OS so upgrading to 2.7 is not a solution.  Need to install Python 2.7 in addition to 2.6.  2. Use setuptool to install pip and use pip to install rest.

# Download dependency files
yum groupinstall "Development tools"
yum -y install gcc gcc-c++ numpy python-devel scipy
yum install zlib-devel bzip2-devel openssl-devel ncurses-devel sqlite-devel

# Compile and install Python 2.7.13
wget https://www.python.org/ftp/python/2.7.13/Python-2.7.13.tgz
tar xzf Python-2.7.13.tgz
cd Python-2.7.13
./configure
# make altinstall is used to prevent replacing the default python binary file /usr/bin/python.
make altinstall

# Download setuptools using wget:
wget --no-check-certificate https://pypi.python.org/packages/source/s/setuptools/setuptools-1.4.2.tar.gz
# Extract the files from the archive:
tar -xvf setuptools-1.4.2.tar.gz
# Enter the extracted directory:
cd setuptools-1.4.2

# Install setuptools using the Python we've installed (2.7.6)
# python2.7 setup.py install
/opt/python-2.7.13/Python-2.7.13/python ./setup.py install

# install pip
curl https://raw.githubusercontent.com/pypa/pip/master/contrib/get-pip.py | python2.7 -

or (following works for me)

[root@centos python-2.7.13]# /opt/python-2.7.13/Python-2.7.13/python ./setuptools/setuptools-1.4.2/easy_install.py pip

# install numpy
[root@centos python-2.7.13]# /opt/python-2.7.13/Python-2.7.13/python -m pip install numpy

# Install SciPy
[root@centos python-2.7.13]# /opt/python-2.7.13/Python-2.7.13/python -m pip install scipy

# Install Scikit
[root@centos python-2.7.13]# /opt/python-2.7.13/Python-2.7.13/python -m pip install scikit-learn

# Install nltk
[root@centos python-2.7.13]# /opt/python-2.7.13/Python-2.7.13/python -m pip install nltk

# Download nltk data (will be stored under /root/nltk_data)
[root@centos SVM]# /opt/python-2.7.13/Python-2.7.13/python -m nltk.downloader all