# need to change following files
#/etc/host.conf
#/etc/sysconfig/network-scripts/ifcfg-em1
# add following to ifcfg-xxx file
DNS1=10.1.0.100
DNS2=10.1.0.14
# prefix for privnet should be 24
PREFIX=24
or
NETMASK=255.255.255.0
#
#/etc/resolv.conf (written by NetworkManager)
#etc/hostname
#/etc/sysconfig/network
#keep the firewall off for internal IP only nodes firewalld
# add default route for two network interface computers
route add default gw 128.8.216.1 eno2
echo "export HISTSIZE=100000" >> .bashrc
echo "export HISTFILESIZE=100000" >> .bashrc
#give jabeen and bhatti sudo privileges.
usermod -aG wheel jabeen
usermod -aG wheel bhatti
hostname
systemctl status NetworkManager
ip address show
ip link
nmcli d
#route -n
#iptables -F
nmcli device status
When an address is rejected it appears in iptables with -A
root@siab-1 ~# iptables -S
-A f2b-sshd -s 10.104.122.175/32 -j REJECT --reject-with icmp-port-unreachable
to remove this ip from iptables so it is not rejected again use -D option to delete it from iptables.
root@siab-1 ~#iptables -D f2b-sshd -s 10.104.122.175/32 -j REJECT --reject-with icmp-port-unreachable
https://www.tecmint.com/create-nic-channel-bonding-in-redhat-centos-fedora/
https://www.tecmint.com/configure-network-interface-in-rhel-centos-7-0/
yum update
#
getenforce
#
# in principle one should reboot computer here.
#
#dnf config-manager --set-enabled powertools
#yum -y install epel-release
yum -y install https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm
yum -y install https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm
yum -y install epel-release
#
#####yum -y install yum-plugin-priorities
#Ensure that the extras repo in /etc/yum.repos.d/almalinux.repo is enabled.
#Ensure that the powertools repo in /etc/yum.repos.d/almalinux-powertools.repo is enabled.
#
yum install -y libmacaroons
#
yum -y install xorg-x11-xauth
#
yum -y install nfs-utils
#
yum -y install yum-utils
#
yum -y install traceroute
#
#yum -y install ypbind rpcbind yp-tools ypserv
dnf install -y sssd realmd oddjob oddjob-mkhomedir adcli nss-pam-ldapd openldap-clients authselect
#
yum -y install tcsh
#
yum -y install hostname
#
#yum -y install yum-plugin-priorities
#
yum -y install libXpm
#
yum -y install git
#
yum -y install vim
#
yum -y install atlas atlas-devel
#
yum -y install blas
#
yum -y install lapack64
#
yum -y install unzip
#
yum -y install clustershell
#
yum -y install nano wget curl net-tools lsof
#
#yum -y install singularity-runtime
#
#yum -y install singularity-ce
#
yum -y install bind-utils
#
yum -y install krb5-workstation
#
yum -y install gparted
#
yum -y install gcc
#
yum -y install gcc-c++
#
yum -y install mesa-libGLU
#
yum -y install gedit
#
yum -y install PackageKit-gtk3-module
#
yum -y install libcanberra-gtk3
#
yum -y install nano
#
yum -y install sysstat
#
yum -y install tmux
#
yum -y install screen
#
yum -y install tk
#
yum -y install lzma
#
yum -y install cmake3
#
yum -y install emacs
#
yum install -y motif motif-devel motif-static
#
ln -s /lib64/libEGL.so.1 /lib64/libEGL.so
#
yum -y install zsh
#
yum -y install libcryptui
#
yum -y install libcryptui-devel
#
yum -y install perl-Digest-MD5
#
#yum -y install mesa-dri-driversxerces-c
#
yum -y install xerces-c
#
yum -y install libnsl
#
yum -y install java-1.8.0-openjdk-headless
#yum -y install java-latest-openjdk-headless
#
yum -y install compat-openssl10
#
dnf install -y xxhash xxhash-devel xxhash-libs
dnf install sendmail sendmail-cf
dnf install postfix
systemctl enable sendmail
systemctl start sendmail
systemctl status sendmail
echo " this is mail $HOSTNAME " | mail -s "test " bhatti@umd.edu
#
cd
git clone https://github.com/oneapi-src/oneTBB.git
cd oneTBB/
mkdir build && cd build
cmake .. -DCMAKE_INSTALL_PREFIX=$HOME/local/tbb
make -j$(nproc)
make install
#
dnf groupinstall "Development Tools"
dnf install glibc-devel
dnf install xpdf
yum -y install nfs-utils
#/etc/exports - to allow the NFS disk to be shared over the internal IP address space
#/etc/fstab
#
#edit fstab and add following lines
10.1.0.81:/data /data nfs rw,async,intr,nolock,nfsvers=3 0 0
10.1.0.100:/data2 /data2 nfs rw,async,intr,nolock,nfsvers=3 0 0
10.1.0.1:/export/home /home nfs rw,async,intr,nolock,nfsvers=3 0 0
#change /home ===> /scratch
#
lsblk
df -h
#
umount /home
mkdir -p /data
mkdir -p /data2
mkdir -p /scratch
#
umount /home
#
mount /scratch
mount /home
mount /data
mount /data2
#
mount -a
#Commands to be run:
dnf install -y sssd realmd oddjob oddjob-mkhomedir adcli nss-pam-ldapd openldap-clients authselect
ldapsearch -x -H ldap://hepcms-henrietta.privnet -b "dc=privnet,dc=local"
authselect select sssd with-mkhomedir --force
systemctl enable --now oddjobd.service
cd /etc/sssd/conf.d/
cp /data/users/root/Alma9/sssd.conf /etc/sssd/conf.d/
chmod 600 /etc/sssd/conf.d/sssd.conf
systemctl enable sssd
systemctl start sssd
getent passwd bhatti
yum -y install ypbind rpcbind yp-tools
yum -y install ypserv
https://www.cyberciti.biz/faq/howto-move-migrate-user-accounts-old-to-new-server/
https://www.server-world.info/en/note?os=Rocky_Linux_8&p=nis&f=2
#edit /etc/sysconfig/network and /etc/yp.conf
systemctl enable --now sssd
authselect select nis
systemctl start ypbind
systemctl enable ypbind
systemctl status ypbind
ypwhich # Displays the NIS network the server is connected to
systemctl status ypbind # Displays status information about the service
#
yum -y install ganglia ganglia-gmond
cd /etc/ganglia
mv gmond.conf gmond.conf.bak
cd /etc/ganglia
cp /data/users/root/Alma8/etc/ganglia/gmond.conf /etc/ganglia/
#It works! Using host = 10.1.0.1 for internal network
#Make it start on reboot: and start the service
systemctl enable gmond.service
systemctl start gmond.service
systemctl status gmond.service
yum clean all
# Following gives error
#wget -q -O ???http://linux.dell.com/repo/hardware/latest/bootstrap.cgi | bash
#Downloading GPG key: https://linux.dell.com/repo/hardware/latest/public.key
# Importing key into RPM.
#Downloading GPG key: https://linux.dell.com/repo/hardware/latest/public_gpg3.key
# Importing key into RPM.
#Write repository configuration
#Done!
# install missing packaging
yum install -y net-snmp-utils libcmpiCppImpl libcmpiCppImpl0 openwsman-server sblim-sfcb sblim-sfcc pciutils libwsman libwsman_client libwsman_curl_client_transport openwsman-client libxslt
# from http://linux.dell.com/repo/hardware/dsu/
curl -O https://linux.dell.com/repo/hardware/dsu/bootstrap.cgi
bash bootstrap.cgi
yum -y install srvadmin-all
exit # logout and login again
#
ssh root@[nodeName]
/opt/dell/srvadmin/sbin/srvadmin-services.sh restart
srvadmin-services.sh status
#
# test if it is working
omreport storage controller
#
omreport storage pdisk controller=0
#
#NOTE: After installing Dell OpenManage Server Administrator, ensure that you log out and log in to reset the path to access Dell OpenManage CLI utilities.
/opt/dell/srvadmin/sbin/srvadmin-services.sh restart
srvadmin-services.sh status
Issue
OMSA: No controllers found
Please go to /opt/dell/srvadmin/etc and run:
# ./autoconf_cim_component.sh
Then restart srvadmin services:
# srvadmin-services.sh restart
Then try again:
# omreport storage controller
# OSG-23/24
dnf -y clean all --enablerepo=*
dnf -y install https://repo.opensciencegrid.org/osg/23-main/osg-23-main-el8-release-latest.rpm
dnf -y install https://repo.opensciencegrid.org/osg/24-main/osg-24-main-el8-release-latest.rpm
dnf -y install https://repo.opensciencegrid.org/osg/24-main/osg-24-main-el9-release-latest.rpm
dnf -y install osg-oasis
dnf -y install osg-wn-client osg-configure
dnf -y install lcmaps vo-client-lcmaps-voms osg-configure-misc voms-clients
dnf -y install xrootd xrootd-client xrootd-scitokens
dnf -y install osg-ce osg-wn-client osg-xrootd
dnf -y install condor
dnf -y install osg-configure-cluster
dnf -y install osg-configure-condor
dnf -y install osg-configure-gateway
osg-configure -c -d
more /var/log/osg/osg-configure.log
systemctl stop firewalld
systemctl disable firewalld
systemctl status firewalld
yum -y install hostname
setsebool -P use_nfs_home_dirs true
cp /data/users/root/Alma8/etc/cvmfs/default.local /etc/cvmfs/
cp /data/users/root/Alma8/etc/auto.master.d/* /etc/auto.master.d/
cp /data/users/root/Alma8/etc/cvmfs/config.d/cms.cern.ch.local /etc/cvmfs/config.d/
#
#echo "export CMS_LOCAL_SITE=T3_US_UMD" > /etc/cvmfs/config.d/cms.cern.ch.local
#
mkdir -p /scratch/cvmfs
chcon -R -t cvmfs_cache_t /scratch/cvmfs
#
#
systemctl enable autofs
systemctl start autofs
ls /cvmfs/cms.cern.ch
#
https://gitlab.cern.ch/SITECONF/T3_US_UMD
Get xrootd-hdfs from
http://mirror.hep.wisc.edu/stable/MIT_bigtop-3.3.0-20240417-hadoop-3.3.6-isal/
Setup /etc/grid-securiy (install certificates)
chown xrootd:xrootd /etc/grid-security/xrd/xrdcert.pem
chomod 644 /etc/grid-security/xrd/xrdcert.pem
chmod 644 /etc/grid-security/xrd/xrdcert.pem
chown xrootd:xrootd /etc/grid-security/xrd/xrdkey.pem
chomod 600 /etc/grid-security/xrd/xrdkey.pem
chmod 600 /etc/grid-security/xrd/xrdkey.pem
yum clean all --enablerepo=*
yum-config-manager --disable osg-upcoming
rpm -e osg-release
#RHEL 9 yum install https://repo.opensciencegrid.org/osg/23-main/osg-23-main-el9-release-latest.rpm
yum -y install https://repo.opensciencegrid.org/osg/23-main/osg-23-main-el8-release-latest.rpm
yum -y install osg-xrootd
yum -y install xrootd
yum -y install xrootd-server
yum -y install xrootd-libs
yum -y install xrootd-server-libs
yum -y install xrootd-client
yum -y install xrootd-client-libs
yum -y install xrootd-scitokens
yum -y install python3-scitokens
yum -y install gfal2-plugin-xrootd
yum -y install xrootd-multiuser
yum -y install xrootd-voms
yum -y install scitokens-cpp
yum -y install xrootd-cmstfc --enablerepo=osg-contrib
yum -y install rsyslog
ln -s /usr/lib64/libXrdSec-5.so /usr/lib64/libXrdSec.so
ln -s /data/osg/scripts/grid-mapfile /etc/grid-security/voms-mapfile
ln -s /usr/lib64/libXrdHttpTPC-5.so /usr/lib64/libXrdHttpTPC.so
ln -s /usr/lib64/libXrdVoms-5.so /usr/lib64/libXrdVoms.so
systemctl enable xrootd-privileged@clustered
systemctl enable cmsd@clustered
http://mirror.hep.wisc.edu/stable/MIT_bigtop-3.3.0-20240417-hadoop-3.3.6-isal/
You need the xrootd hdfs plugin. You could download it from:
but it is probably better to build your own version. Download, these two files.
You should be able to run BuildXrootdHdfsPlugin.
#
## debugging umount previously mounted directories if transport point is not connected.
#
# /etc/condor/condor_config, /etc/condor/config.d/local.conf, /etc/condor/config.d/cluster.conf - Config files
# It actually does not matter which variable is set where. It reads condor_config first and then move on to the files in config.d in alphabetical order
# The password can be copied with the following command. <node> must be replaced with a CENTOS7 node with an accurate password, such as r720-0-1.
# scp -r root@<node>:/etc/condor/passwords.d/POOL /etc/condor/passwords.d/POOL
# You will need the root password for <node> to do this.
# copy all config files from what is used on r540-0-20. It does copy password file but it may be old and may need updating using command above.
#
# turn off firewall
#Firewall should not be active
systemctl stop firewalld
systemctl disable firewalld
systemctl status firewalld
mkdir -p /scratch/condor/execute/
mkdir -p /scratch/condor/log/
mkdir -p /scratch/condor/spool/
chown condor:condor /scratch/condor/execute
chown condor:condor /scratch/condor/log
EXECUTE = /scratch/condor/execute
LOG = /scratch/condor/log
SPOOL = /scratch/condor/spool/
#To support singularity following lines should be /etc/condor/local.conf
#SINGULARITY_JOB = !isUndefined(TARGET.SingularityImage)
#SINGULARITY_IMAGE_EXPR = TARGET.SingularityImage
#cp -rf /data/users/root/r540-0-20.privnet/etc/condor/* /etc/condor/
cp -f /data/users/root/Alma8/etc/condor/config.d/local.conf /etc/condor/config.d/
cp -f /data/users/root/Alma8/etc/condor/config.d/50-cluster.conf /etc/condor/config.d/
cp /data/users/root/Alma9/etc/condor/config.d/50-cluster.conf /etc/condor/config.d/
cp /data/users/root/Alma9/etc/condor/config.d/local.conf /etc/condor/config.d/
# edit local.conf to change NETWORK_INTERFACE and HOSTNAME
# Be careful. Do not rename files in cluster.conf and/or local.conf and leave them in the same directory. condor may use both files for initialization.
# If want to save old configuration file move them to /root area.
Option B. Run two commands (as root) on every machine in your pool to enable the recommended security configuration appropriate for v8.9.13. When prompted, type the same password for every machine. (Note: if typing a password is problematic, see the condor_store_cred manual page for other options such as reading the password from a file or command-line).
# condor_store_cred -c add
# umask 0077; condor_token_create -identity condor@privnet > /etc/condor/tokens.d/condor@privnet
### umask 0077; condor_token_create -identity condor@mypool > /etc/condor/tokens.d/condor@mypool
cp /data/users/root/Alma8/passwords.d/* /etc/condor/passwords.d
chown -R root:root /etc/condor/tokens.d
systemctl enable condor
systemctl start condor
systemctl status condor
condor_status
ausearch -c 'condor_procd' --raw | audit2allow -M my-condorprocd
semodule -X 300 -i my-condorprocd.pp
# debugging of condor if it does not work
condor_status r540-0-21
condor_status -schedd
condor --version
cd /etc/condor/config.d/
condor_config_val start
watch condor_status r540-0-21
condor_config_val start
condor_config_val -dump | grep 129.2.116.129
grep IP /etc/condor -r
env _CONDOR_TOOL_DEBUG=D_HOSTNAME condor_config_val -debug FULL_HOSTNAME
/etc/condor/config.d/local.conf
condor_config_val IP_ADDRESS
condor_config_val IPV4_ADDRESS
vim /etc/condor/config.d/local.conf
condor_config_val IPV4_ADDRESS
condor_config_val IP_ADDRESS
hostid
cd condor
less StarterLog.slot1
condor_q -name siab-1.umd.edu
condor_config_val condor_q -name siab-1.umd.edu
condor_config_val allow-read
condor_config_val allow_read
condor_config_val SEC_READ_AUTHENTICATION_METHODS
condor_config_val SEC_*_AUTHENTICATION_METHODS
condor_config_val SEC_DAEMON_AUTHENTICATION_METHODS
getenforce
setenforce 0
Hadoop
# Following is needed on all nodes (clients read/write) All data/worker/interactive nodes must mount /mnt/hadoop and associated software.
#Firewall should not be active
systemctl stop firewalld
systemctl disable firewalld
systemctl status firewalld
ln -s /data/osg/scripts/grid-mapfile /etc/grid-security/grid-mapfile
cp /data/users/root/Alma8/bigtop.repo /etc/yum.repos.d/
yum -y install hadoop-client hadoop-hdfs-fuse
edit /etc/hadoop/conf/hadoop-env.sh
export HADOOP_LOG_DIR=/scratch/hadoop/hadoop-hdfs
mkdir -p /scratch/hadoop/hadoop-hdfs
chown hdfs:hadoop /scratch/hadoop/hadoop-hdfs # Replace with correct user/group
chmod 755 /scratch/hadoop/hadoop-hdfs
yum -y install hadoop-hdfs-datanode hadoop-hdfs-fuse
#cp -f /data/site_conf/centos7-r540-0-20/hadoop-working/configs-withoutlinks/conf/hdfs-site.xml /etc/hadoop/conf/
#cp -f /data/site_conf/centos7-r540-0-20/hadoop-working/configs-withoutlinks/conf/core-site.xml /etc/hadoop/conf/
cp -f /data/osg/namenode/AlmaLinux8/hdfs-site.xml /etc/hadoop/conf/
cp -f /data/osg/namenode/AlmaLinux8/core-site.xml /etc/hadoop/conf/
# add following lines to hdfs-site.xml file
#
# make main directory to munt hdfs system
#Edit /etc/fstab and add following line
hadoop-fuse-dfs /mnt/hadoop fuse server=hepcms-namenode.privnet,port=9000,rdbuffer=131072,allow_other 0 0
#
mkdir -p /mnt/hadoop
chown hdfs:hadoop /mnt/hadoop
mount -a /mnt/hadoop
# we also need a soft link
ln -s /mnt/hadoop/cms/store /store
# data nodes ==============
#
<property>
<name>dfs.disk.balancer.enabled</name>
<value>true</value>
</property>
#
# make sure all the data disks are listed. For r510-0-xx copy from /data/users/root/r510-0-1/etc/ area.
#
mkdir -p /scratch/hadoop
mkdir -p /scratch/hadoop/hadoop-hdfs
chown hdfs:hadoop /scratch/hadoop/hadoop-hdfs
chown hdfs:hadoop /scratch/hadoop/
##
# Hadoop datanode
#
# format the disks, mount and start hadoop-hdfs-datanode
#
#mkfs command is done right after the install to make partitions of the individual disks.
#make sure you mkfs.ext4 the partitions which may be /dev/sda or /dev/sda1 depending on the how the disk was partitioned.
ls -1 /dev/[sv]d[a-z]
blkdir
lsblk -o /dev/sda
lsblk -o sda
lsblk -o name,mountpoint,size,uuid
# Use parted to change partition table (may be use gpt partition)
# if you have partition /dev/sdc1 on disk /dev/sdc, you should mkfs.ext4 the partition (/dev/sdc1) and not the disk (dev/sdc)
# Use parted to make partition table (mklabel) and partition (mkpart /dev/sdb1 is partition name) as below.
use
#parted /dev/sdb unit s mkpart primary ext4 0% 100%
parted /dev/sdb
mklabel gpt
unit s
mkpart primary ext4 0% 100%
quit
# do it for all data disks
# format the all partitions
# Use parted to change partition table (may be use gpt partition)
# if you have partition /dev/sdc1 on disk /dev/sdc, you should mkfs.ext4 the partition (/dev/sdc1) and not the disk (dev/sdc)
mkfs.ext4 /dev/sdc1
#make hadoop directories to mount to the physical disks 1-8 0r 12 depending upon the node.
mkdir -p /hadoop1/
mkdir -p /hadoop2/
mkdir -p /hadoop3/
mkdir -p /hadoop4/
mkdir -p /hadoop5/
mkdir -p /hadoop6/
mkdir -p /hadoop7/
mkdir -p /hadoop8/
mkdir -p /hadoop9/
mkdir -p /hadoop10/
mkdir -p /hadoop11/
mkdir -p /hadoop12
### mount the disks
mount /dev/sdb1 /hadoop1
mount /dev/sdc1 /hadoop2
mount /dev/sdd1 /hadoop3
mount /dev/sde1 /hadoop4
mount /dev/sdf1 /hadoop5
mount /dev/sdg1 /hadoop6
mount /dev/sdh1 /hadoop7
mount /dev/sdi1 /hadoop8
mount /dev/sdj1 /hadoop9
mount /dev/sdk1 /hadoop10
mount /dev/sdl1 /hadoop11
#
#mount /dev/sdc1 /hadoop3
#make hadoop/data directories
mkdir -p /hadoop1/data
mkdir -p /hadoop2/data
mkdir -p /hadoop3/data
mkdir -p /hadoop4/data
mkdir -p /hadoop5/data
mkdir -p /hadoop6/data
mkdir -p /hadoop7/data
mkdir -p /hadoop8/data
mkdir -p /hadoop9/data
mkdir -p /hadoop10/data
mkdir -p /hadoop11/data
mkdir -p /hadoop12/data
.....
# /hadoop is owned by root:root and hadoop/data by hdfs:hadoop
#
chown hdfs:hadoop /hadoop1/data
chown hdfs:hadoop /hadoop2/data
chown hdfs:hadoop /hadoop3/data
chown hdfs:hadoop /hadoop4/data
chown hdfs:hadoop /hadoop5/data
chown hdfs:hadoop /hadoop6/data
chown hdfs:hadoop /hadoop7/data
chown hdfs:hadoop /hadoop8/data
chown hdfs:hadoop /hadoop9/data
chown hdfs:hadoop /hadoop10/data
chown hdfs:hadoop /hadoop11/data
chown hdfs:hadoop /hadoop12/data
#check everything is correctly mounted
df -h
# Add something like following to fstab (/dev/sdb1 is partiton on disk /dev/sdb
/dev/sdb1 /hadoop1 ext4 defaults 0 0
/dev/sdc1 /hadoop2 ext4 defaults 0 0
/dev/sdd1 /hadoop3 ext4 defaults 0 0
/dev/sde1 /hadoop4 ext4 defaults 0 0
/dev/sdf1 /hadoop5 ext4 defaults 0 0
/dev/sdg1 /hadoop6 ext4 defaults 0 0
/dev/sdh1 /hadoop7 ext4 defaults 0 0
/dev/sdi1 /hadoop8 ext4 defaults 0 0
/dev/sdj1 /hadoop9 ext4 defaults 0 0
/dev/sdk1 /hadoop10 ext4 defaults 0 0
/dev/sdl1 /hadoop11 ext4 defaults 0 0
Finally start the service and check status to see if it works
systemctl start hadoop-hdfs-datanode
systemctl status hadoop-hdfs-datanode
balancing the hadoop storage on all nodes
hdfs balancer
# run rebalancer in background.
date=$(date +"%Y_%m_%d")
nohup hdfs balancer > rebalancer_${date}.out 2> rebalancer_${date}.log &
#
Checking everything is fine
nmcli device status
id hdfs
df -ah
You can open firefox on one of the interactives and check hadoop status:
NOTE: make sure to properly close the browser not just clix the x-window. Or the process will stay alive in the background and next time firefox will not start as it is already running.
http://hepcms-namenode.privnet:50070/dfshealth.jsp
here you can look at the livenodes.
To install:
yum -y install clustershell
Copy over the configuration files (everything in hepcms-in1:/etc/clustershell) into the node. [not work as hepcms-in has been updated but not configured yet. Need to edit
Edit /etc/clustershell/groups.d/local.cfg to include nodes
# generate local ssh key
ssh-keygen
# on hepcms-in1.umd.edu run ssh-copy-id to copy hepcms-in1 key to other machines (eq r510-0-1)
#
ssh-copy-id -f r510-0-1
ssh-copy-id -f r510-0-11
ssh-copy-id -f r510-0-6
#give jabeen and bhatti sudo privileges.
usermod -aG wheel jabeen
usermod -aG wheel bhatti
#
Trying to run
clush -w <node name here> df -ah
to verify that clush is working will result in a "host key verification failed" error.To solve this issue, copy the .ssh/authorised_keys files from a working node (like r540-0-20) to the new node.
Then, while you're in the ssh-agent $SHELL mode, ssh into that node and add its identity, then exit.
then exit out of the ssh-agent $SHELL mode then re-enter it, then run
clush -a
-> date
to check if you're able to connect ( the date from each node should return as a message )
If you are getting the issue of "Could not open a connection to your authentication agent" when trying to run the previous command, use
eval 'ssh-agent -s'
ssh-add
Install crontab/nightly monitoring
#
mkdir -p /root/cronscripts
cd /root/cronscripts
ls /data/site_conf/cronscripts/
cp /data/site_conf/cronscripts/crontab_new .
cp /data/site_conf/cronscripts/tar_script.sh .
cp /data/site_conf/cronscripts/run_omreport.sh .
cp /data/site_conf/cronscripts/Hardware_Checks.py .
cp /data/site_conf/cronscripts/Monitoring-Checks.py .
#
crontab crontab_new
After installation is complete
yum -y install nano wget curl net-tools lsof
# ifconfig
# netstat -tulpn
# lsof -i
systemctl list-unit-files
service network status
# ifconfig
# ip addr show
yum install NetworkManager-tui
nmtui edit eno16777736
# nmtui connect eno16777736
script for making partitions
# to create the partitions programatically (rather than manually) # we're going to simulate the manual input to fdisk # The sed script strips off all the comments so that we can # document what we're doing in-line with the actual commands # Note that a blank line (commented as "defualt" will send a empty # line terminated with a newline to take the fdisk default. sed -e 's/\s*\([\+0-9a-zA-Z]*\).*/\1/' << EOF | fdisk ${TGTDEV} o # clear the in memory partition table n # new partition p # primary partition 1 # partition number 1 # default - start at beginning of disk +100M # 100 MB boot parttion n # new partition p # primary partition 2 # partion number 2 # default, start immediately after preceding partition # default, extend partition to end of disk a # make a partition bootable 1 # bootable partition is partition 1 -- /dev/sda1 p # print the in-memory partition table w # write the partition table q # and we're done EOF
[root@hepcms-gridftp ~]# more /etc/sysconfig/network-scripts/ifcfg-eno2
TYPE=Ethernet
NAME=eno2
UUID=bfd8a52f-9440-4c07-8104-a70ea536879c
GATEWAY=128.8.216.1
#IPADDR=128.8.216.195
DEVICE=eno2
ONBOOT=yes
NM_CONTROLLER="yes"
HWADDR="00:1D:09:68:09:99"
BOOTPROTO="static"
#HOSTNAME="hepcms-gridftp.umd.edu"
#IPV6INIT="yes"
NETMASK="255.255.255.0"
[root@hepcms-gridftp ~]#
[root@hepcms-gridftp ~]# more /etc/sysconfig/network-scripts/ifcfg-eno1
BOOTPROTO="none"
IPADDR="10.1.0.19"
NETMASK="255.255.0.0"
GATEWAY="10.1.0.1"
DEVICE="eno0"
HWADDR="00:1d:09:68:09:97"
ONBOOT=yes
PEERDNS=no
#PEERROUTES=no
[root@hepcms-gridftp ~]# hostname -i
128.8.216.195
[root@hepcms-gridftp ~]# hostname -I
10.1.0.19 128.8.216.195
[root@hepcms-gridftp ~]# hostname -d
umd.edu
[root@hepcms-gridftp ~]# hostname -f
hepcms-gridftp.umd.edu
[root@hepcms-gridftp ~]# hostname -A
compute-0-5.privnet hepcms-gridftp.umd.edu
[root@hepcms-gridftp ~]# hostname -a
[root@hepcms-gridftp ~]# rpm -q centos-release
centos-release-7-5.1804.el7.centos.x86_64
From https://opensciencegrid.org/docs/common/yum/
[root@hepcms-gridftp ~]# yum install yum-plugin-priorities
Ensure that /etc/yum.conf has the following line in the [main] section:
plugins=1
On CentOS, ensure that the extras repo in /etc/yum.repos.d/CentOS-Base.repo is enabled.
A repository is enabled if it has enabled=1 in its definition, or if the enabled line is missing (i.e. it is enabled unless specified otherwise.)
it was 0 by default I replced it with 1
[root@hepcms-gridftp ~]# yum install https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
[root@hepcms-gridftp ~]# more /etc/yum.repos.d/epel.repo
[epel]
name=Extra Packages for Enterprise Linux 7 - $basearch
#baseurl=http://download.fedoraproject.org/pub/epel/7/$basearch
metalink=https://mirrors.fedoraproject.org/metalink?repo=epel-7&arch=$basearch&infra=$infra&content
=$contentdir
failovermethod=priority
enabled=1
Check!
root@hepcms-gridftp ~]# yum install https://repo.opensciencegrid.org/osg/3.5/osg-3.5-el7-release-latest.rpm
To disable automatic updates entirely:
service yum-cron stop or systemctl stop yum-cron.service
[root@hepcms-gridftp ~]# systemctl stop firewalld
[root@hepcms-gridftp ~]# systemctl status firewalld
[root@hepcms-gridftp ~]#
-P INPUT ACCEPT
-P FORWARD ACCEPT
-P OUTPUT ACCEPT
[root@hepcms-gridftp ~]#
[root@hepcms-gridftp ~]# yum clean all --enablerepo=*
-bash: yum: command not found
[root@hepcms-gridftp ~]# yum clean all --enablerepo=*
[root@hepcms-gridftp ~]# yum update
[root@hepcms-gridftp ~]# yum install osg-wn-client
[root@hepcms-gridftp ~]# yum install xorg-x11-xauth
[root@hepcms-gridftp ~]# yum -y install ypbind rpcbind
L
[root@hepcms-gridftp ~]# yum install ypbind yptools
[root@hepcms-gridftp ~]# service ypbind status
[root@hepcms-gridftp ~]# yum install tcsh
[root@hepcms-gridftp ~]# yum install nfs nfs-utils
[root@hepcms-gridftp ~]# yum install ganglia ganglia-gmond
List excluded packages and check if omsa is one of them
[root@hepcms-gridftp ~]# yum list updates -d3
[root@hepcms-gridftp ~]# curl -s http://linux.dell.com/repo/hardware/dsu/bootstrap.cgi | bash
[root@hepcms-gridftp ~]# yum install srvadmin-all
logout and log back in to make omsa cmmands work
[root@hepcms-gridftp ~]# srvadmin-services.sh status
[root@hepcms-gridftp ~]# yum install condor
[root@hepcms-gridftp ~]# systemctl start condor
[root@hepcms-gridftp ~]# systemctl status condor
[root@hepcms-gridftp ~]# condor_status
[root@hepcms-gridftp ~]# yum install osg-oasis
[root@hepcms-gridftp ~]# systemctl enable autofs
[root@hepcms-gridftp ~]# systemctl start autofs
[root@hepcms-gridftp ~]# vi /etc/auto.master.d/cvmfs.autof
[root@hepcms-gridftp ~]# vi /etc/auto.master.d/cvmfs.autofs
[root@hepcms-gridftp ~]# systemctl restart autofs
[root@hepcms-gridftp ~]# more /etc/cvmfs/default.local
/etc/cvmfs/default.local: No such file or directory
[root@hepcms-gridftp ~]# vi /etc/cvmfs/default.local
[root@hepcms-gridftp ~]# more /etc/cvmfs/default.local
CVMFS_REPOSITORIES="`echo $((echo oasis.opensciencegrid.org;echo cms.cern.ch;ls /cvmfs)|sort -u)|tr
' '
,`"
CVMFS_STRICT_MOUNT=no
CVMFS_CACHE_BASE=/tmp/cvmfs
CVMFS_QUOTA_LIMIT=20000
CVMFS_HTTP_PROXY="http://hepcms-squid.privnet:3128"
GLITE_VERSION=
[root@hepcms-gridftp ~]# ls /cvmfs
[root@hepcms-gridftp ~]# ls -l /cvmfs/atlas.cern.ch
[root@hepcms-gridftp ~]# ls -l /cvmfs/oasis.opensciencegrid.org/cmssoft
[root@hepcms-gridftp ~]# yum install osg-se-hadoop-datanode
[root@hepcms-gridftp ~]# yum install nfs nfs-utils
[root@hepcms-gridftp ~]# more /etc/fstab
#
# /etc/fstab
# Created by anaconda on Tue Jul 2 16:36:42 2019
#
# Accessible filesystems, by reference, are maintained under '/dev/disk'
# See man pages fstab(5), findfs(8), mount(8) and/or blkid(8) for more info
#
/dev/mapper/centos-root / xfs defaults 0 0
UUID=c9228540-db46-4ed9-944f-f0a8dc23b1f5 /boot xfs defaults 0 0
/dev/mapper/centos-home /home xfs defaults 0 0
/dev/mapper/centos-swap swap swap defaults 0 0
[root@hepcms-gridftp ~]#
mount -a /home
mkdir data
mount -a /data
[root@compute-0-5 etc]# more /etc/fstab
#
# /etc/fstab
# Created by anaconda on Tue Jul 2 16:36:42 2019
#
# Accessible filesystems, by reference, are maintained under '/dev/disk'
# See man pages fstab(5), findfs(8), mount(8) and/or blkid(8) for more info
#
/dev/mapper/centos-root / xfs defaults 0 0
UUID=c9228540-db46-4ed9-944f-f0a8dc23b1f5 /boot xfs defaults 0 0
#/dev/mapper/centos-home /home xfs defaults 0 0
/dev/mapper/centos-swap swap swap defaults 0 0
10.1.0.1:/export/home /home nfs rw,async,intr,nolock,nfsvers=3 0 0
10.1.0.7:/data /data nfs rw,async,intr,nolock,nfsvers=3 0 0
++++++++++++++++++++++++++++
[root@r540-0-20 ~]#
NAME UUID TYPE DEVICE
em1 ec53ccd8-1cb5-4e58-b074-42f7c0c2477d ethernet em1
em2 d846966e-ce5e-4c75-a6cf-59c8ed87e3d3 ethernet --
p1p1 c6ca2690-76f4-4635-a31d-0c892d5c6c1d ethernet --
p1p2 9706cd02-c18e-42dc-8e18-918eec2fb895 ethernet --
[[root@r540-0-20 ~]# systemctl status firewalld
● firewalld.service - firewalld - dynamic firewall daemon
Loaded: loaded (/usr/lib/systemd/system/firewalld.service; enabled; vendor preset: enabled)
Active: inactive (dead) since Sat 2019-09-28 03:07:55 EDT; 2 months 13 days ago
Docs: man:firewalld(1)
Main PID: 125570 (code=exited, status=0/SUCCESS)
[root@r540-0-20 ~]# /etc/resolv.conf
-bash: /etc/resolv.conf: Permission denied
[root@r540-0-20 ~]# more /etc/resolv.conf
nameserver 10.1.0.2
search privnet umd.edu
options rotate timeout:1
[root@r540-0-20 ~]# more /etc/hostname
r540-0-20.privnet
[root@r540-0-20 ~]# more /etc/sysconfig/network
NETWORKING=yes
NISDOMAIN=nishepcms.privnet
GATEWAY=129.2.116.1
[root@r540-0-20 ~]# more /etc/host.conf
multi on
[root@r540-0-20 ~]# more /etc/sysconfig/network-scripts/ifcfg-em1
TYPE=Ethernet
HOSTNAME=r540-0-20.privnet
PROXY_METHOD=none
BROWSER_ONLY=no
BOOTPROTO=none
DEFROUTE=yes
IPV4_FAILURE_FATAL=no
IPV6INIT=yes
IPV6_AUTOCONF=yes
IPV6_DEFROUTE=yes
IPV6_FAILURE_FATAL=no
IPV6_ADDR_GEN_MODE=stable-privacy
NAME=em1
UUID=ec53ccd8-1cb5-4e58-b074-42f7c0c2477d
DEVICE=em1
ONBOOT=yes
IPADDR=10.1.0.101
NETMASK=255.255.0.0
GATEWAY=10.1.0.1
IPV6_PRIVACY=no
PREFIX=16
[root@r540-0-21 ~]# ping google.com
PING google.com (172.217.13.238) 56(84) bytes of data.
64 bytes from iad23s61-in-f14.1e100.net (172.217.13.238): icmp_seq=1 ttl=54 time=3.40 ms
64 bytes from iad23s61-in-f14.1e100.net (172.217.13.238): icmp_seq=2 ttl=54 time=3.18 ms
64 bytes from iad23s61-in-f14.1e100.net (172.217.13.238): icmp_seq=3 ttl=54 time=3.66 ms
^C
###
New machine was accessible from outside campus because of routing order. Did following and it worked.
root@hepcms-hn2 ~]# cd /etc/sysconfig/network-scripts/
[root@hepcms-hn2 network-scripts]#
[root@hepcms-hn2 network-scripts]# ./ifdown eno1
Device 'eno1' successfully disconnected.
[root@hepcms-hn2 network-scripts]#
[root@hepcms-hn2 network-scripts]# ./ifup eno1
Connection successfully activated (D-Bus active path: /org/freedesktop/NetworkManager/ActiveConnection/3)
[root@hepcms-hn2 network-scripts]#
[root@hepcms-hn2 network-scripts]# ip route
default via 128.8.216.1 dev eno2 proto static metric 101
default via 10.1.0.1 dev eno1 proto static metric 102
10.1.0.0/16 dev eno1 proto kernel scope link src 10.1.0.36 metric 102
128.8.216.0/23 dev eno2 proto kernel scope link src 128.8.216.200 metric 101
# rebooted the machine. Had to repeat above steps.