{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Code for Elog scraping\n", "I am using the web site of the elogook to convert the data in a pandas dataframe." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/cvmfs/sft.cern.ch/lcg/views/LCG_95apython3/x86_64-centos7-gcc7-opt/bin/python\r\n" ] } ], "source": [ "# To get the stack you are using\n", "!which python" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'/eos/user/s/sterbini/MD_ANALYSIS/bblumi/docs/how-tos/ElogScraping'" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# To get your path on EOS of your notebook\n", "pwd" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import re\n", "import requests\n", "import pandas as pd\n", "from bs4 import BeautifulSoup\n", "\n", "def getSoup(date='20180612', shift=1, elog=60):\n", " ''' This method will convert a specific shift of the elogbook in BeautifulSoup object\n", " - date is in yyyymmdd format\n", " - shift is an integer (1= 'morning', 2='afternoon', 3='night')\n", " - elog is the elog code (e.g., 60='LHC_OP')\n", " '''\n", " address=f'http://elogbook.cern.ch/eLogbook/eLogbook.jsp?lgbk={elog}&date={date}&shift={shift}'\n", " website_url = requests.get(address).text\n", " soup = BeautifulSoup(website_url,'lxml') \n", " return {'soup':soup,'elog':elog,'date':date,'shift':shift}\n", "\n", "def getDictionary(soup):\n", " ''' This method will convert a BeautifulSoup object from elogbook to a pandas DF\n", " - soup is the dictionary coming from the getSoup method\n", " '''\n", " # Table\n", " myTable = soup['soup'].find('table',{'id':'events_table'})\n", "\n", " # Table header and column description\n", " tableHead = myTable.find('thead')\n", " tableDescription=tableHead.find_all('b')\n", " fieldNumber=len(tableDescription)\n", " myFields=[]\n", " for i in tableDescription:\n", " myFields.append(i.getText())\n", "\n", " # Table body \n", " tableBody = myTable.find('tbody',{'id':'body_events'})\n", "\n", " # From the table body to the events\n", " myEvents=list(tableBody.children)\n", " myEvents=myEvents[1::2]\n", " myEventsShort=[]\n", " dictionaryList=[]\n", " for i in myEvents:\n", " myEvent= list(i.children)\n", " myEvent=myEvent[1::2]\n", " myDictionary={}\n", " for j,z in zip(myEvent,myFields):\n", " myDictionary[z]=list(j.children)[1].getText()\n", " myLink=myEvent[1].find_all('a', href=True)\n", " myDictionary['link']='http://elogbook.cern.ch/eLogbook/'+myLink[0]['href']\n", " dictionaryList.append(myDictionary)\n", "\n", " myDF=pd.DataFrame(dictionaryList)[myFields+['link']]\n", " myDF['Time']=myDF['Time'].apply(lambda x:re.sub('[^A-Za-z0-9:./;\\-,_]+', '', x))\n", " myDF['Time']=soup['date'] + ' ' + myDF['Time']\n", " myDF['Time']=myDF['Time'].apply(lambda x : pd.Timestamp(x))\n", " myDF=myDF.set_index('Time')\n", " # this is useful for the change of the day\n", " if soup['shift']==3:\n", " aux1= myDF.between_time('21:00','00:00')\n", " aux2=myDF.between_time('00:00','07:00')\n", " aux2.index=aux2.index+pd.Timedelta('1d')\n", " myDF=pd.concat([aux1,aux2])\n", " myDF.index=myDF.index.tz_localize('CET').tz_convert('UTC')\n", " myDF.index.name=None\n", " return myDF" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# An example" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "# first I get the soup (this is a bit slow, but is comparable to the loading time of a elog page)\n", "soup=getSoup(date='20180702', shift=3, elog=60)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | # | \n", "PROTONPHY | \n", "Comment | \n", "link | \n", "
---|---|---|---|---|
2018-07-02 21:00:00+00:00 | \n", "1 | \n", "\\n\\n\\n\\nNB\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\nGuy and Michaela \\n\\n\\n\\ncreated by ... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-02 21:52:00+00:00 | \n", "2 | \n", "\\n\\n\\n\\nNB\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\nStart precycle of EIS and RD1.LR5 \\n... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-02 21:52:00+00:00 | \n", "3 | \n", "\\n\\n\\n\\n 1 \\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\nRestore RSS.A56B1, which had tripped... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-02 22:08:00+00:00 | \n", "4 | \n", "\\n\\n\\n\\nSUP\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\n\\n\\n\\nBEAM MODE > SETUP \\n\\n\\n\\n\\n\\... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-02 22:15:00+00:00 | \n", "5 | \n", "\\n\\n\\n\\nSUP\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\nPrecycle of RD1.LR5 complete \\n\\n\\n\\... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-02 22:19:00+00:00 | \n", "6 | \n", "\\n\\n\\n\\nSUP\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\nLHC SEQ: QPS configuration cross che... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-02 22:30:00+00:00 | \n", "7 | \n", "\\n\\n\\n\\nBI\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\n\\n\\n\\nBEAM MODE > INJECTION PROBE BE... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-02 22:31:00+00:00 | \n", "8 | \n", "\\n\\n\\n\\nBI\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\nDry dump, XPOC interlock B2, MKD ris... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-02 22:38:00+00:00 | \n", "9 | \n", "\\n\\n\\n\\nBI\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\nRD1 replaced. SIS needs to be update... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-02 22:39:00+00:00 | \n", "10 | \n", "\\n\\n\\n\\nBI\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\nAnother dry dump to test the XPOC is... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-02 22:39:00+00:00 | \n", "11 | \n", "\\n\\n\\n\\nBI\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\n|*** XPOC error has been reset by us... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-02 22:49:00+00:00 | \n", "12 | \n", "\\n\\n\\n\\nBI\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\nRamping the RD1.LR5 to nominal curre... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-02 23:16:00+00:00 | \n", "13 | \n", "\\n\\n\\n\\nBI\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\nSent FGC.FAULTS -> FGC_STATE to RD1 ... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-02 23:20:00+00:00 | \n", "14 | \n", "\\n\\n\\n\\nBI\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\nRD1 tripped when restetting and goin... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-02 23:25:00+00:00 | \n", "15 | \n", "\\n\\n\\n\\nBI\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\nSending a few times the off command ... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-02 23:27:00+00:00 | \n", "16 | \n", "\\n\\n\\n\\nBI\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\nLHC SEQ: injection handshake closed;... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-02 23:27:00+00:00 | \n", "17 | \n", "\\n\\n\\n\\nNB\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\n\\n\\n\\nBEAM MODE > NO BEAM \\n\\n\\n\\n\\... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-02 23:45:00+00:00 | \n", "18 | \n", "\\n\\n\\n\\nNB\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\nFGCM test analysis by Markus Zerlaut... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-02 23:54:00+00:00 | \n", "19 | \n", "\\n\\n\\n\\nNB\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\nPrecycle of RD1.LR5 complete, and se... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-02 23:55:00+00:00 | \n", "20 | \n", "\\n\\n\\n\\nSUP\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\n\\n\\n\\nBEAM MODE > SETUP \\n\\n\\n\\n\\n\\... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-02 23:57:00+00:00 | \n", "21 | \n", "\\n\\n\\n\\nBI\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\n\\n\\n\\nBEAM MODE > INJECTION PROBE BE... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-03 00:03:00+00:00 | \n", "22 | \n", "\\n\\n\\n\\nBI\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\nBad tune signal at injection, many l... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-03 00:07:00+00:00 | \n", "23 | \n", "\\n\\n\\n\\nBI\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\nChroma far off ~20 in V and ~10 in H... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-03 00:12:00+00:00 | \n", "24 | \n", "\\n\\n\\n\\nBI\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\n\\n\\n\\nBEAM MODE > INJECTION PHYSICS ... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-03 00:23:00+00:00 | \n", "25 | \n", "\\n\\n\\n\\nBI\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\nTL with 12b \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-03 00:25:00+00:00 | \n", "26 | \n", "\\n\\n\\n\\nBI\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\nreset BSRT intensifier on both beams... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-03 00:29:00+00:00 | \n", "27 | \n", "\\n\\n\\n\\nBI\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\nAverage emittance from Wirescanner ... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-03 00:29:00+00:00 | \n", "28 | \n", "\\n\\n\\n\\nBI\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\nIQC complains about bad scraping, bu... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-03 00:35:00+00:00 | \n", "29 | \n", "\\n\\n\\n\\nBI\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\nCalc all optics task failed. \\n\\n\\n\\... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-03 00:40:00+00:00 | \n", "30 | \n", "\\n\\n\\n\\n 2 \\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\nGlobal Post Mortem Event Event Time... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
2018-07-03 02:14:00+00:00 | \n", "55 | \n", "\\n\\n\\n\\nBI\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\nLHC SEQ: LOAD SPS FREQUENCY FOR PROT... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-03 02:14:00+00:00 | \n", "56 | \n", "\\n\\n\\n\\nBI\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\nARNAUD ANDRE BESSONNAT(ABESSONN) ass... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-03 02:32:00+00:00 | \n", "57 | \n", "\\n\\n\\n\\nBI\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\nEPC piquet called back. The PC is fi... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-03 02:35:00+00:00 | \n", "58 | \n", "\\n\\n\\n\\nBI\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\nprecycle RD1.LR5 \\n\\n\\n\\ncreated by ... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-03 02:37:00+00:00 | \n", "59 | \n", "\\n\\n\\n\\nBI\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\nLHC SEQ: injection handshake closed;... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-03 02:37:00+00:00 | \n", "60 | \n", "\\n\\n\\n\\nSUP\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\n\\n\\n\\nBEAM MODE > SETUP \\n\\n\\n\\n\\n\\... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-03 02:41:00+00:00 | \n", "61 | \n", "\\n\\n\\n\\nSUP\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\nLHC SEQ: RF LBDS frequency checks do... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-03 02:43:00+00:00 | \n", "62 | \n", "\\n\\n\\n\\nSUP\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\nLHC SEQ: resynchronize RF beam contr... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-03 02:49:00+00:00 | \n", "63 | \n", "\\n\\n\\n\\nSUP\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\nnew part of RF preparation sequence ... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-03 03:06:00+00:00 | \n", "64 | \n", "\\n\\n\\n\\nBI\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\n\\n\\n\\nBEAM MODE > INJECTION PROBE BE... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-03 03:11:00+00:00 | \n", "65 | \n", "\\n\\n\\n\\n 10 \\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\nQPS not OK on RQ9.L1 RQ4.R1 RQ9.R5 ... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-03 03:25:00+00:00 | \n", "66 | \n", "\\n\\n\\n\\nBI\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\n\\n\\n\\nBEAM MODE > INJECTION PHYSICS ... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-03 03:32:00+00:00 | \n", "67 | \n", "\\n\\n\\n\\nBI\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\nTL with 12b \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-03 03:47:00+00:00 | \n", "68 | \n", "\\n\\n\\n\\nBI\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\nLosses during injection 05:33:52 ... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-03 03:48:00+00:00 | \n", "69 | \n", "\\n\\n\\n\\nBI\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\n\\n\\n\\nBEAM MODE > PREPARE RAMP \\n\\n... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-03 03:48:00+00:00 | \n", "70 | \n", "\\n\\n\\n\\nBI\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\nLHC BEAM Process Time :The minimum t... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-03 03:48:00+00:00 | \n", "71 | \n", "\\n\\n\\n\\nBI\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\nBSRT Emittance scan \\n\\n\\n\\n\\n\\n\\n\\n... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-03 03:49:00+00:00 | \n", "72 | \n", "\\n\\n\\n\\nBI\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\n'LHC Fast BCT v1.0.5' application ha... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-03 03:49:00+00:00 | \n", "73 | \n", "\\n\\n\\n\\nBI\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\nLHC SEQ: INJ PROT COLLIMATORS ARE OU... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-03 03:51:00+00:00 | \n", "74 | \n", "\\n\\n\\n\\nBI\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\n\\n\\n\\nBEAM MODE > RAMP \\n\\n\\n\\n\\n\\n... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-03 03:51:00+00:00 | \n", "75 | \n", "\\n\\n\\n\\nBI\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\nLHC SEQ: ramp started \\n\\n\\n\\ncreate... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-03 04:12:00+00:00 | \n", "76 | \n", "\\n\\n\\n\\nBI\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\n\\n\\n\\nBEAM MODE > FLAT TOP \\n\\n\\n\\n... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-03 04:13:00+00:00 | \n", "77 | \n", "\\n\\n\\n\\nBI\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\nLHC SEQ: END OF QCHANGE \\n\\n\\n\\ncrea... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-03 04:14:00+00:00 | \n", "78 | \n", "\\n\\n\\n\\nBI\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\n\\n\\n\\nBEAM MODE > SQUEEZE \\n\\n\\n\\n\\... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-03 04:15:00+00:00 | \n", "79 | \n", "\\n\\n\\n\\nBI\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\n'LHC Beam Quality Monitor' applicati... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-03 04:16:00+00:00 | \n", "80 | \n", "\\n\\n\\n\\nBI\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\nBSRT Emittance scan B1H blew up dur... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-03 04:18:00+00:00 | \n", "81 | \n", "\\n\\n\\n\\nBI\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\nstrong snapback \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-03 04:45:00+00:00 | \n", "82 | \n", "\\n\\n\\n\\nBI\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\nEND OF SQUEEZE SEGMENT: Beta* = 75 m... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-03 04:56:00+00:00 | \n", "83 | \n", "\\n\\n\\n\\nBI\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\nBSRT Emittance scan \\n\\n\\n\\n\\n\\n\\n\\n... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
2018-07-03 04:56:00+00:00 | \n", "84 | \n", "\\n\\n\\n\\nBI\\n\\n\\n\\n | \n", "\\n\\n\\n\\n\\n\\n\\n\\nINFO > Shift Summary \\n\\n\\n\\n... | \n", "http://elogbook.cern.ch/eLogbook/event_viewer.... | \n", "
84 rows × 4 columns
\n", "