Skip to content

Code for Elog scraping

I am using the web site of the elogook to convert the data in a pandas dataframe.

# To get the stack you are using
!which python
/cvmfs/sft.cern.ch/lcg/views/LCG_95apython3/x86_64-centos7-gcc7-opt/bin/python
# To get your path on EOS of your notebook
pwd
'/eos/user/s/sterbini/MD_ANALYSIS/bblumi/docs/how-tos/ElogScraping'
import re
import requests
import pandas as pd
from bs4 import BeautifulSoup

def getSoup(date='20180612', shift=1, elog=60):
    ''' This method will convert a specific shift of the elogbook in BeautifulSoup object
    - date is in yyyymmdd format
    - shift is an integer (1= 'morning', 2='afternoon', 3='night')
    - elog is the elog code (e.g., 60='LHC_OP')
    '''
    address=f'http://elogbook.cern.ch/eLogbook/eLogbook.jsp?lgbk={elog}&date={date}&shift={shift}'
    website_url = requests.get(address).text
    soup = BeautifulSoup(website_url,'lxml') 
    return {'soup':soup,'elog':elog,'date':date,'shift':shift}

def getDictionary(soup):
    ''' This method will convert a BeautifulSoup object from elogbook to a pandas DF
    - soup is the dictionary coming from the getSoup method
    '''
    # Table
    myTable = soup['soup'].find('table',{'id':'events_table'})

    # Table header and column description
    tableHead = myTable.find('thead')
    tableDescription=tableHead.find_all('b')
    fieldNumber=len(tableDescription)
    myFields=[]
    for i in tableDescription:
        myFields.append(i.getText())

    # Table body    
    tableBody = myTable.find('tbody',{'id':'body_events'})

    # From the table body to the events
    myEvents=list(tableBody.children)
    myEvents=myEvents[1::2]
    myEventsShort=[]
    dictionaryList=[]
    for i in myEvents:
        myEvent= list(i.children)
        myEvent=myEvent[1::2]
        myDictionary={}
        for j,z in zip(myEvent,myFields):
            myDictionary[z]=list(j.children)[1].getText()
        myLink=myEvent[1].find_all('a', href=True)
        myDictionary['link']='http://elogbook.cern.ch/eLogbook/'+myLink[0]['href']
        dictionaryList.append(myDictionary)

    myDF=pd.DataFrame(dictionaryList)[myFields+['link']]
    myDF['Time']=myDF['Time'].apply(lambda x:re.sub('[^A-Za-z0-9:./;\-,_]+', '', x))
    myDF['Time']=soup['date'] + ' ' + myDF['Time']
    myDF['Time']=myDF['Time'].apply(lambda x : pd.Timestamp(x))
    myDF=myDF.set_index('Time')
    # this is useful for the change of the day
    if soup['shift']==3:
        aux1= myDF.between_time('21:00','00:00')
        aux2=myDF.between_time('00:00','07:00')
        aux2.index=aux2.index+pd.Timedelta('1d')
        myDF=pd.concat([aux1,aux2])
    myDF.index=myDF.index.tz_localize('CET').tz_convert('UTC')
    myDF.index.name=None
    return myDF

An example

# first I get the soup (this is a bit slow, but is comparable to the loading time of a elog page)
soup=getSoup(date='20180702', shift=3, elog=60)
# after I cast it in a pandas DF
aux=getDictionary(soup)
aux
# PROTONPHY Comment link
2018-07-02 21:00:00+00:00 1 \n\n\n\nNB\n\n\n\n \n\n\n\n\nGuy and Michaela \n\n\n\ncreated by ... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-02 21:52:00+00:00 2 \n\n\n\nNB\n\n\n\n \n\n\n\n\nStart precycle of EIS and RD1.LR5 \n... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-02 21:52:00+00:00 3 \n\n\n\n 1 \n\n\n\n \n\n\n\n\nRestore RSS.A56B1, which had tripped... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-02 22:08:00+00:00 4 \n\n\n\nSUP\n\n\n\n \n\n\n\n\n\n\n\nBEAM MODE > SETUP \n\n\n\n\n\... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-02 22:15:00+00:00 5 \n\n\n\nSUP\n\n\n\n \n\n\n\n\nPrecycle of RD1.LR5 complete \n\n\n\... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-02 22:19:00+00:00 6 \n\n\n\nSUP\n\n\n\n \n\n\n\n\nLHC SEQ: QPS configuration cross che... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-02 22:30:00+00:00 7 \n\n\n\nBI\n\n\n\n \n\n\n\n\n\n\n\nBEAM MODE > INJECTION PROBE BE... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-02 22:31:00+00:00 8 \n\n\n\nBI\n\n\n\n \n\n\n\n\nDry dump, XPOC interlock B2, MKD ris... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-02 22:38:00+00:00 9 \n\n\n\nBI\n\n\n\n \n\n\n\n\nRD1 replaced. SIS needs to be update... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-02 22:39:00+00:00 10 \n\n\n\nBI\n\n\n\n \n\n\n\n\nAnother dry dump to test the XPOC is... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-02 22:39:00+00:00 11 \n\n\n\nBI\n\n\n\n \n\n\n\n\n|*** XPOC error has been reset by us... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-02 22:49:00+00:00 12 \n\n\n\nBI\n\n\n\n \n\n\n\n\nRamping the RD1.LR5 to nominal curre... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-02 23:16:00+00:00 13 \n\n\n\nBI\n\n\n\n \n\n\n\n\nSent FGC.FAULTS -> FGC_STATE to RD1 ... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-02 23:20:00+00:00 14 \n\n\n\nBI\n\n\n\n \n\n\n\n\nRD1 tripped when restetting and goin... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-02 23:25:00+00:00 15 \n\n\n\nBI\n\n\n\n \n\n\n\n\nSending a few times the off command ... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-02 23:27:00+00:00 16 \n\n\n\nBI\n\n\n\n \n\n\n\n\nLHC SEQ: injection handshake closed;... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-02 23:27:00+00:00 17 \n\n\n\nNB\n\n\n\n \n\n\n\n\n\n\n\nBEAM MODE > NO BEAM \n\n\n\n\... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-02 23:45:00+00:00 18 \n\n\n\nNB\n\n\n\n \n\n\n\n\nFGCM test analysis by Markus Zerlaut... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-02 23:54:00+00:00 19 \n\n\n\nNB\n\n\n\n \n\n\n\n\nPrecycle of RD1.LR5 complete, and se... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-02 23:55:00+00:00 20 \n\n\n\nSUP\n\n\n\n \n\n\n\n\n\n\n\nBEAM MODE > SETUP \n\n\n\n\n\... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-02 23:57:00+00:00 21 \n\n\n\nBI\n\n\n\n \n\n\n\n\n\n\n\nBEAM MODE > INJECTION PROBE BE... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-03 00:03:00+00:00 22 \n\n\n\nBI\n\n\n\n \n\n\n\n\nBad tune signal at injection, many l... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-03 00:07:00+00:00 23 \n\n\n\nBI\n\n\n\n \n\n\n\n\nChroma far off ~20 in V and ~10 in H... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-03 00:12:00+00:00 24 \n\n\n\nBI\n\n\n\n \n\n\n\n\n\n\n\nBEAM MODE > INJECTION PHYSICS ... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-03 00:23:00+00:00 25 \n\n\n\nBI\n\n\n\n \n\n\n\n\nTL with 12b \n\n\n\n\n\n\n\n\n\n\n\n... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-03 00:25:00+00:00 26 \n\n\n\nBI\n\n\n\n \n\n\n\n\nreset BSRT intensifier on both beams... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-03 00:29:00+00:00 27 \n\n\n\nBI\n\n\n\n \n\n\n\n\nAverage emittance from Wirescanner ... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-03 00:29:00+00:00 28 \n\n\n\nBI\n\n\n\n \n\n\n\n\nIQC complains about bad scraping, bu... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-03 00:35:00+00:00 29 \n\n\n\nBI\n\n\n\n \n\n\n\n\nCalc all optics task failed. \n\n\n\... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-03 00:40:00+00:00 30 \n\n\n\n 2 \n\n\n\n \n\n\n\n\nGlobal Post Mortem Event Event Time... http://elogbook.cern.ch/eLogbook/event_viewer....
... ... ... ... ...
2018-07-03 02:14:00+00:00 55 \n\n\n\nBI\n\n\n\n \n\n\n\n\nLHC SEQ: LOAD SPS FREQUENCY FOR PROT... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-03 02:14:00+00:00 56 \n\n\n\nBI\n\n\n\n \n\n\n\n\nARNAUD ANDRE BESSONNAT(ABESSONN) ass... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-03 02:32:00+00:00 57 \n\n\n\nBI\n\n\n\n \n\n\n\n\nEPC piquet called back. The PC is fi... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-03 02:35:00+00:00 58 \n\n\n\nBI\n\n\n\n \n\n\n\n\nprecycle RD1.LR5 \n\n\n\ncreated by ... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-03 02:37:00+00:00 59 \n\n\n\nBI\n\n\n\n \n\n\n\n\nLHC SEQ: injection handshake closed;... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-03 02:37:00+00:00 60 \n\n\n\nSUP\n\n\n\n \n\n\n\n\n\n\n\nBEAM MODE > SETUP \n\n\n\n\n\... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-03 02:41:00+00:00 61 \n\n\n\nSUP\n\n\n\n \n\n\n\n\nLHC SEQ: RF LBDS frequency checks do... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-03 02:43:00+00:00 62 \n\n\n\nSUP\n\n\n\n \n\n\n\n\nLHC SEQ: resynchronize RF beam contr... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-03 02:49:00+00:00 63 \n\n\n\nSUP\n\n\n\n \n\n\n\n\nnew part of RF preparation sequence ... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-03 03:06:00+00:00 64 \n\n\n\nBI\n\n\n\n \n\n\n\n\n\n\n\nBEAM MODE > INJECTION PROBE BE... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-03 03:11:00+00:00 65 \n\n\n\n 10 \n\n\n\n \n\n\n\n\nQPS not OK on RQ9.L1 RQ4.R1 RQ9.R5 ... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-03 03:25:00+00:00 66 \n\n\n\nBI\n\n\n\n \n\n\n\n\n\n\n\nBEAM MODE > INJECTION PHYSICS ... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-03 03:32:00+00:00 67 \n\n\n\nBI\n\n\n\n \n\n\n\n\nTL with 12b \n\n\n\n\n\n\n\n\n\n\n\n... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-03 03:47:00+00:00 68 \n\n\n\nBI\n\n\n\n \n\n\n\n\nLosses during injection 05:33:52 ... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-03 03:48:00+00:00 69 \n\n\n\nBI\n\n\n\n \n\n\n\n\n\n\n\nBEAM MODE > PREPARE RAMP \n\n... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-03 03:48:00+00:00 70 \n\n\n\nBI\n\n\n\n \n\n\n\n\nLHC BEAM Process Time :The minimum t... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-03 03:48:00+00:00 71 \n\n\n\nBI\n\n\n\n \n\n\n\n\nBSRT Emittance scan \n\n\n\n\n\n\n\n... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-03 03:49:00+00:00 72 \n\n\n\nBI\n\n\n\n \n\n\n\n\n'LHC Fast BCT v1.0.5' application ha... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-03 03:49:00+00:00 73 \n\n\n\nBI\n\n\n\n \n\n\n\n\nLHC SEQ: INJ PROT COLLIMATORS ARE OU... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-03 03:51:00+00:00 74 \n\n\n\nBI\n\n\n\n \n\n\n\n\n\n\n\nBEAM MODE > RAMP \n\n\n\n\n\n... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-03 03:51:00+00:00 75 \n\n\n\nBI\n\n\n\n \n\n\n\n\nLHC SEQ: ramp started \n\n\n\ncreate... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-03 04:12:00+00:00 76 \n\n\n\nBI\n\n\n\n \n\n\n\n\n\n\n\nBEAM MODE > FLAT TOP \n\n\n\n... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-03 04:13:00+00:00 77 \n\n\n\nBI\n\n\n\n \n\n\n\n\nLHC SEQ: END OF QCHANGE \n\n\n\ncrea... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-03 04:14:00+00:00 78 \n\n\n\nBI\n\n\n\n \n\n\n\n\n\n\n\nBEAM MODE > SQUEEZE \n\n\n\n\... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-03 04:15:00+00:00 79 \n\n\n\nBI\n\n\n\n \n\n\n\n\n'LHC Beam Quality Monitor' applicati... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-03 04:16:00+00:00 80 \n\n\n\nBI\n\n\n\n \n\n\n\n\nBSRT Emittance scan B1H blew up dur... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-03 04:18:00+00:00 81 \n\n\n\nBI\n\n\n\n \n\n\n\n\nstrong snapback \n\n\n\n\n\n\n\n\n\n... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-03 04:45:00+00:00 82 \n\n\n\nBI\n\n\n\n \n\n\n\n\nEND OF SQUEEZE SEGMENT: Beta* = 75 m... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-03 04:56:00+00:00 83 \n\n\n\nBI\n\n\n\n \n\n\n\n\nBSRT Emittance scan \n\n\n\n\n\n\n\n... http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-03 04:56:00+00:00 84 \n\n\n\nBI\n\n\n\n \n\n\n\n\n\n\n\nINFO > Shift Summary \n\n\n\n... http://elogbook.cern.ch/eLogbook/event_viewer....

84 rows × 4 columns

# Memory of usage in MB
aux.memory_usage().sum()/1024/1024
0.003204345703125
# Some naive filtering
aux.loc['2018-07-03 04:56:00+00:00']['link']
2018-07-03 04:56:00+00:00    http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-03 04:56:00+00:00    http://elogbook.cern.ch/eLogbook/event_viewer....
Name: link, dtype: object