Code for Elog scraping
I am using the web site of the elogook to convert the data in a pandas dataframe.
# To get the stack you are using
!which python
/cvmfs/sft.cern.ch/lcg/views/LCG_95apython3/x86_64-centos7-gcc7-opt/bin/python
# To get your path on EOS of your notebook
pwd
'/eos/user/s/sterbini/MD_ANALYSIS/bblumi/docs/how-tos/ElogScraping'
import re
import requests
import pandas as pd
from bs4 import BeautifulSoup
def getSoup(date='20180612', shift=1, elog=60):
''' This method will convert a specific shift of the elogbook in BeautifulSoup object
- date is in yyyymmdd format
- shift is an integer (1= 'morning', 2='afternoon', 3='night')
- elog is the elog code (e.g., 60='LHC_OP')
'''
address=f'http://elogbook.cern.ch/eLogbook/eLogbook.jsp?lgbk={elog}&date={date}&shift={shift}'
website_url = requests.get(address).text
soup = BeautifulSoup(website_url,'lxml')
return {'soup':soup,'elog':elog,'date':date,'shift':shift}
def getDictionary(soup):
''' This method will convert a BeautifulSoup object from elogbook to a pandas DF
- soup is the dictionary coming from the getSoup method
'''
# Table
myTable = soup['soup'].find('table',{'id':'events_table'})
# Table header and column description
tableHead = myTable.find('thead')
tableDescription=tableHead.find_all('b')
fieldNumber=len(tableDescription)
myFields=[]
for i in tableDescription:
myFields.append(i.getText())
# Table body
tableBody = myTable.find('tbody',{'id':'body_events'})
# From the table body to the events
myEvents=list(tableBody.children)
myEvents=myEvents[1::2]
myEventsShort=[]
dictionaryList=[]
for i in myEvents:
myEvent= list(i.children)
myEvent=myEvent[1::2]
myDictionary={}
for j,z in zip(myEvent,myFields):
myDictionary[z]=list(j.children)[1].getText()
myLink=myEvent[1].find_all('a', href=True)
myDictionary['link']='http://elogbook.cern.ch/eLogbook/'+myLink[0]['href']
dictionaryList.append(myDictionary)
myDF=pd.DataFrame(dictionaryList)[myFields+['link']]
myDF['Time']=myDF['Time'].apply(lambda x:re.sub('[^A-Za-z0-9:./;\-,_]+', '', x))
myDF['Time']=soup['date'] + ' ' + myDF['Time']
myDF['Time']=myDF['Time'].apply(lambda x : pd.Timestamp(x))
myDF=myDF.set_index('Time')
# this is useful for the change of the day
if soup['shift']==3:
aux1= myDF.between_time('21:00','00:00')
aux2=myDF.between_time('00:00','07:00')
aux2.index=aux2.index+pd.Timedelta('1d')
myDF=pd.concat([aux1,aux2])
myDF.index=myDF.index.tz_localize('CET').tz_convert('UTC')
myDF.index.name=None
return myDF
An example
# first I get the soup (this is a bit slow, but is comparable to the loading time of a elog page)
soup=getSoup(date='20180702', shift=3, elog=60)
# after I cast it in a pandas DF
aux=getDictionary(soup)
aux
# | PROTONPHY | Comment | link | |
---|---|---|---|---|
2018-07-02 21:00:00+00:00 | 1 | \n\n\n\nNB\n\n\n\n | \n\n\n\n\nGuy and Michaela \n\n\n\ncreated by ... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-02 21:52:00+00:00 | 2 | \n\n\n\nNB\n\n\n\n | \n\n\n\n\nStart precycle of EIS and RD1.LR5 \n... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-02 21:52:00+00:00 | 3 | \n\n\n\n 1 \n\n\n\n | \n\n\n\n\nRestore RSS.A56B1, which had tripped... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-02 22:08:00+00:00 | 4 | \n\n\n\nSUP\n\n\n\n | \n\n\n\n\n\n\n\nBEAM MODE > SETUP \n\n\n\n\n\... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-02 22:15:00+00:00 | 5 | \n\n\n\nSUP\n\n\n\n | \n\n\n\n\nPrecycle of RD1.LR5 complete \n\n\n\... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-02 22:19:00+00:00 | 6 | \n\n\n\nSUP\n\n\n\n | \n\n\n\n\nLHC SEQ: QPS configuration cross che... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-02 22:30:00+00:00 | 7 | \n\n\n\nBI\n\n\n\n | \n\n\n\n\n\n\n\nBEAM MODE > INJECTION PROBE BE... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-02 22:31:00+00:00 | 8 | \n\n\n\nBI\n\n\n\n | \n\n\n\n\nDry dump, XPOC interlock B2, MKD ris... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-02 22:38:00+00:00 | 9 | \n\n\n\nBI\n\n\n\n | \n\n\n\n\nRD1 replaced. SIS needs to be update... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-02 22:39:00+00:00 | 10 | \n\n\n\nBI\n\n\n\n | \n\n\n\n\nAnother dry dump to test the XPOC is... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-02 22:39:00+00:00 | 11 | \n\n\n\nBI\n\n\n\n | \n\n\n\n\n|*** XPOC error has been reset by us... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-02 22:49:00+00:00 | 12 | \n\n\n\nBI\n\n\n\n | \n\n\n\n\nRamping the RD1.LR5 to nominal curre... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-02 23:16:00+00:00 | 13 | \n\n\n\nBI\n\n\n\n | \n\n\n\n\nSent FGC.FAULTS -> FGC_STATE to RD1 ... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-02 23:20:00+00:00 | 14 | \n\n\n\nBI\n\n\n\n | \n\n\n\n\nRD1 tripped when restetting and goin... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-02 23:25:00+00:00 | 15 | \n\n\n\nBI\n\n\n\n | \n\n\n\n\nSending a few times the off command ... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-02 23:27:00+00:00 | 16 | \n\n\n\nBI\n\n\n\n | \n\n\n\n\nLHC SEQ: injection handshake closed;... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-02 23:27:00+00:00 | 17 | \n\n\n\nNB\n\n\n\n | \n\n\n\n\n\n\n\nBEAM MODE > NO BEAM \n\n\n\n\... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-02 23:45:00+00:00 | 18 | \n\n\n\nNB\n\n\n\n | \n\n\n\n\nFGCM test analysis by Markus Zerlaut... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-02 23:54:00+00:00 | 19 | \n\n\n\nNB\n\n\n\n | \n\n\n\n\nPrecycle of RD1.LR5 complete, and se... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-02 23:55:00+00:00 | 20 | \n\n\n\nSUP\n\n\n\n | \n\n\n\n\n\n\n\nBEAM MODE > SETUP \n\n\n\n\n\... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-02 23:57:00+00:00 | 21 | \n\n\n\nBI\n\n\n\n | \n\n\n\n\n\n\n\nBEAM MODE > INJECTION PROBE BE... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-03 00:03:00+00:00 | 22 | \n\n\n\nBI\n\n\n\n | \n\n\n\n\nBad tune signal at injection, many l... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-03 00:07:00+00:00 | 23 | \n\n\n\nBI\n\n\n\n | \n\n\n\n\nChroma far off ~20 in V and ~10 in H... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-03 00:12:00+00:00 | 24 | \n\n\n\nBI\n\n\n\n | \n\n\n\n\n\n\n\nBEAM MODE > INJECTION PHYSICS ... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-03 00:23:00+00:00 | 25 | \n\n\n\nBI\n\n\n\n | \n\n\n\n\nTL with 12b \n\n\n\n\n\n\n\n\n\n\n\n... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-03 00:25:00+00:00 | 26 | \n\n\n\nBI\n\n\n\n | \n\n\n\n\nreset BSRT intensifier on both beams... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-03 00:29:00+00:00 | 27 | \n\n\n\nBI\n\n\n\n | \n\n\n\n\nAverage emittance from Wirescanner ... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-03 00:29:00+00:00 | 28 | \n\n\n\nBI\n\n\n\n | \n\n\n\n\nIQC complains about bad scraping, bu... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-03 00:35:00+00:00 | 29 | \n\n\n\nBI\n\n\n\n | \n\n\n\n\nCalc all optics task failed. \n\n\n\... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-03 00:40:00+00:00 | 30 | \n\n\n\n 2 \n\n\n\n | \n\n\n\n\nGlobal Post Mortem Event Event Time... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
... | ... | ... | ... | ... |
2018-07-03 02:14:00+00:00 | 55 | \n\n\n\nBI\n\n\n\n | \n\n\n\n\nLHC SEQ: LOAD SPS FREQUENCY FOR PROT... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-03 02:14:00+00:00 | 56 | \n\n\n\nBI\n\n\n\n | \n\n\n\n\nARNAUD ANDRE BESSONNAT(ABESSONN) ass... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-03 02:32:00+00:00 | 57 | \n\n\n\nBI\n\n\n\n | \n\n\n\n\nEPC piquet called back. The PC is fi... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-03 02:35:00+00:00 | 58 | \n\n\n\nBI\n\n\n\n | \n\n\n\n\nprecycle RD1.LR5 \n\n\n\ncreated by ... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-03 02:37:00+00:00 | 59 | \n\n\n\nBI\n\n\n\n | \n\n\n\n\nLHC SEQ: injection handshake closed;... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-03 02:37:00+00:00 | 60 | \n\n\n\nSUP\n\n\n\n | \n\n\n\n\n\n\n\nBEAM MODE > SETUP \n\n\n\n\n\... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-03 02:41:00+00:00 | 61 | \n\n\n\nSUP\n\n\n\n | \n\n\n\n\nLHC SEQ: RF LBDS frequency checks do... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-03 02:43:00+00:00 | 62 | \n\n\n\nSUP\n\n\n\n | \n\n\n\n\nLHC SEQ: resynchronize RF beam contr... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-03 02:49:00+00:00 | 63 | \n\n\n\nSUP\n\n\n\n | \n\n\n\n\nnew part of RF preparation sequence ... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-03 03:06:00+00:00 | 64 | \n\n\n\nBI\n\n\n\n | \n\n\n\n\n\n\n\nBEAM MODE > INJECTION PROBE BE... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-03 03:11:00+00:00 | 65 | \n\n\n\n 10 \n\n\n\n | \n\n\n\n\nQPS not OK on RQ9.L1 RQ4.R1 RQ9.R5 ... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-03 03:25:00+00:00 | 66 | \n\n\n\nBI\n\n\n\n | \n\n\n\n\n\n\n\nBEAM MODE > INJECTION PHYSICS ... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-03 03:32:00+00:00 | 67 | \n\n\n\nBI\n\n\n\n | \n\n\n\n\nTL with 12b \n\n\n\n\n\n\n\n\n\n\n\n... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-03 03:47:00+00:00 | 68 | \n\n\n\nBI\n\n\n\n | \n\n\n\n\nLosses during injection 05:33:52 ... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-03 03:48:00+00:00 | 69 | \n\n\n\nBI\n\n\n\n | \n\n\n\n\n\n\n\nBEAM MODE > PREPARE RAMP \n\n... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-03 03:48:00+00:00 | 70 | \n\n\n\nBI\n\n\n\n | \n\n\n\n\nLHC BEAM Process Time :The minimum t... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-03 03:48:00+00:00 | 71 | \n\n\n\nBI\n\n\n\n | \n\n\n\n\nBSRT Emittance scan \n\n\n\n\n\n\n\n... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-03 03:49:00+00:00 | 72 | \n\n\n\nBI\n\n\n\n | \n\n\n\n\n'LHC Fast BCT v1.0.5' application ha... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-03 03:49:00+00:00 | 73 | \n\n\n\nBI\n\n\n\n | \n\n\n\n\nLHC SEQ: INJ PROT COLLIMATORS ARE OU... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-03 03:51:00+00:00 | 74 | \n\n\n\nBI\n\n\n\n | \n\n\n\n\n\n\n\nBEAM MODE > RAMP \n\n\n\n\n\n... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-03 03:51:00+00:00 | 75 | \n\n\n\nBI\n\n\n\n | \n\n\n\n\nLHC SEQ: ramp started \n\n\n\ncreate... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-03 04:12:00+00:00 | 76 | \n\n\n\nBI\n\n\n\n | \n\n\n\n\n\n\n\nBEAM MODE > FLAT TOP \n\n\n\n... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-03 04:13:00+00:00 | 77 | \n\n\n\nBI\n\n\n\n | \n\n\n\n\nLHC SEQ: END OF QCHANGE \n\n\n\ncrea... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-03 04:14:00+00:00 | 78 | \n\n\n\nBI\n\n\n\n | \n\n\n\n\n\n\n\nBEAM MODE > SQUEEZE \n\n\n\n\... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-03 04:15:00+00:00 | 79 | \n\n\n\nBI\n\n\n\n | \n\n\n\n\n'LHC Beam Quality Monitor' applicati... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-03 04:16:00+00:00 | 80 | \n\n\n\nBI\n\n\n\n | \n\n\n\n\nBSRT Emittance scan B1H blew up dur... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-03 04:18:00+00:00 | 81 | \n\n\n\nBI\n\n\n\n | \n\n\n\n\nstrong snapback \n\n\n\n\n\n\n\n\n\n... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-03 04:45:00+00:00 | 82 | \n\n\n\nBI\n\n\n\n | \n\n\n\n\nEND OF SQUEEZE SEGMENT: Beta* = 75 m... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-03 04:56:00+00:00 | 83 | \n\n\n\nBI\n\n\n\n | \n\n\n\n\nBSRT Emittance scan \n\n\n\n\n\n\n\n... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
2018-07-03 04:56:00+00:00 | 84 | \n\n\n\nBI\n\n\n\n | \n\n\n\n\n\n\n\nINFO > Shift Summary \n\n\n\n... | http://elogbook.cern.ch/eLogbook/event_viewer.... |
84 rows × 4 columns
# Memory of usage in MB
aux.memory_usage().sum()/1024/1024
0.003204345703125
# Some naive filtering
aux.loc['2018-07-03 04:56:00+00:00']['link']
2018-07-03 04:56:00+00:00 http://elogbook.cern.ch/eLogbook/event_viewer....
2018-07-03 04:56:00+00:00 http://elogbook.cern.ch/eLogbook/event_viewer....
Name: link, dtype: object