Introduction¶

Singapore has one of the highest smartphone penetration rates worldwide, at 149.2% (Sept 2017). In 2017 alone, we consumed over 14 Petabytes of mobile data. On average, each person would have consumed 25 GB data/year! Telecoms have since taken note of the demand and have began aggressively pushing out mobile plans with more data, albeit charging a higher price.

With this in mind, I set out to create a Telegram Bot to help consumers decide which plan is best suited for their needs.

Preparing the Data¶

M1¶

from bs4 import BeautifulSoup
import pandas as pd
import re
import datetime
from urllib.request import Request, urlopen

# Before we begin scrapping the site, lets define a function to clean the data.

# For parsing the data
def clean_text(s):
    # Check for two exceptions - Unlimited and empty
    if "Unlimited" in s:
        num = 10000
        return num

    if s == "":
        num = ""
        return num

    # Otherwise clean the string and split into cases
    new_s = re.sub("[^0-9+.]","",s)
    # For "3 + 2 GB" Case
    if new_s == "300+1":
        num = 1.3
    # For empty case
    elif new_s == "":
        num = new_s
    # For unlimited
    else:
        # For X GB + Y GB
        if "+" in new_s:
            num = float(eval(new_s))
        # For the case 300MB
        elif new_s == "300":
            num = 0.3
        else:
            num = float(new_s)
    return num

M1's web store is relatively simple to scrape data from. We begin by opening and reading the site.

site = urlopen("https://www.m1.com.sg/personal/mobile/phones/filters/all-plans/all/all/0/1500/0/0/none")
soup = BeautifulSoup(site.read(),"html.parser")
links=[]

Next we extract all the links (each containing a single phone model) from the site and store it in a list.

for link in soup.find_all("a", { "class" : "light-blue hidetag" }): 
    new_link = link['href']
    new_link_split = new_link.split()
    final_link = ""
    for i in range(0,len(new_link_split)):
        if i < len(new_link_split) - 1:
            final_link += new_link_split[i] + '%20'
        else:
            final_link += new_link_split[i]
    links.append(final_link)

We then phase each link to fill our 8 main columns. This format will also be used to store information from the other three telecoms.

df_contract = pd.DataFrame(columns=('Provider','Phone','Plan','TalkTime(Mins)','SMS/MMS','Data(GB)','PayNow($)','PerMonth($)'))
# Going through of each of the phone links
for link in links:
    site2 = urlopen("https://www.m1.com.sg" + link)
    soup2 = BeautifulSoup(site2.read(),"html.parser")

    df_model = pd.DataFrame(columns=('Provider','Phone','Plan','TalkTime(Mins)','SMS/MMS','Data(GB)','PayNow($)','PerMonth($)'))

    name = soup2.find("div",{"class":"title"})
    # Get the model name
    model = name.get_text()
    # Remove whitespaces and \ characters
    model = re.sub('\s+',' ',model)

    plans = []
    # Plan name
    for div in soup2.find_all("div", { "class" : "title color-orange font-size-14 font-weight-bold" }):
        plans.append(div.get_text())

    details = []
    import csv
    # TalkTime, SMS/MMS, Data
    for div in soup2.find_all("div", { "class" : "desc font-size-14" }):
        num = clean_text(div.get_text())
        details.append(num)

    price1 = []
    # Pay Now 
    for div in soup2.find_all("div", { "class" : "font-size-15 line-height-20 color-orange font-weight-bold" }):
        num = clean_text(div.get_text())
        price1.append(num)

    # Per Month
    price2 = []
    for div in soup2.find_all("div", { "class" : "font-size-15 line-height-20 color-3" }):
        num = clean_text(div.get_text())
        price2.append(num)
        
    # Store the data in a dataframe (by rows)
    for i in range(0,len(plans)):
        # print(i)
        # This would be price2 is missing (no monthly installments)
        if plans[i] == 'Equipment Only':
            df_model.loc[i] = ['M1',model,plans[i],details[3*i],details[3*i + 1],details[3*i + 2],price1[i],'']
        else:
            df_model.loc[i] = ['M1',model,plans[i],details[3*i],details[3*i + 1],details[3*i + 2],price1[i],price2[i]]
    # Add our data frame for this model to the full dataframe
    # print('Dataframe done.')
    df_contract = df_contract.append(df_model)

df_contract.sample(n=10)

Success! We got all the avaliable phone models for M1 as well as their relevant information.

Telegram Bot¶

A telegram bot wrapper was released recently, making it much easier

The legacy code can still be viewed at

from telegram.ext import Updater, CommandHandler, MessageHandler, Filters
import logging

# Enable logging (for troubleshooting)
logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
                    level=logging.INFO)

logger = logging.getLogger(__name__)

Next, we will define a few command handlers. These form the basis of almost every bot written in the telegram bot python wrapper.

token = '---YOUR BOT TOKEN HERE---'
def start(bot, update):
    """Send a message when the command /start is issued."""
    update.message.reply_text('Welcome to Telcobot! Choose one of the three options to begin '\
                             'the search for your ideal mobile plan!')
    
    keyboard = [[InlineKeyboardButton("Search by model", callback_data='1'),
                 InlineKeyboardButton("Search by data", callback_data='2')],
                [InlineKeyboardButton("Search by price", callback_data='3')]]

    reply_markup = InlineKeyboardMarkup(keyboard)

    update.message.reply_text('Please choose:', reply_markup=reply_markup)

def error(bot, update, error):
    """Log Errors caused by Updates."""
    logger.warning('Update "%s" caused error "%s"', update, error)


def main():
    """Start the bot."""
    # Create the EventHandler and pass it your bot's token.
    updater = Updater(token)

    # Get the dispatcher to register handlers
    dp = updater.dispatcher

    # on different commands - answer in Telegram
    dp.add_handler(CommandHandler('start', start))

    # log all errors
    dp.add_error_handler(error)

    # Start the Bot
    updater.start_polling()

    # Run the bot until you press Ctrl-C or the process receives SIGINT,
    # SIGTERM or SIGABRT. This should be used most of the time, since
    # start_polling() is non-blocking and will stop the bot gracefully.
    updater.idle()


if __name__ == '__main__':
    main()

	Provider	Phone	Plan	TalkTime(Mins)	SMS/MMS	Data(GB)	PayNow($)	PerMonth($)
0	M1	Alcatel OneTouch Pop S3	mySIM(e) 40	100	100	5	0.0	40
3	M1	iPhone 8 256GB	mySIM(e) 118	10000	10000	10000	308.0	118
7	M1	Samsung Galaxy Tab A 8.0	Reg+	400	1500	5	0.0	82
11	M1	Samsung Galaxy Tab A 7.0	BlackBerry MessageSurf+	50	10000	3	48.0	29.96
4	M1	Moto G5 Plus	Lite	100	500	0.3	198.0	28
8	M1	iPhone 8 Plus 64GB	i-Max	800	2000	7	285.0	102
1	M1	Nokia 2	mySIM(e) 70	100	100	15	0.0	70
8	M1	Nokia 8	Max	800	2000	7	0.0	102
3	M1	OPPO R11s Plus	mySIM(e) 118	10000	10000	10000	0.0	118
5	M1	LG G6+	Lite+	200	1000	3	398.0	42