-
Notifications
You must be signed in to change notification settings - Fork 1
/
scrabler.py
165 lines (131 loc) · 4.95 KB
/
scrabler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
#! /usr/bin/env python
# -*- coding: utf-8 -*-
from urllib.request import Request, urlopen
from bs4 import BeautifulSoup
import sqlite3, time
import datetime, pytz,tools
def connect_db():
"""connect_db database creator
provide data base creation, or connect to db if it existed.
view of created db table
| id | time | team1 | percent1 | team2 | percent2 |organization | status |
1 2015-05-17 10:00:00 xD 35 GeekFam 65 starladder announced
Создает базу данных, если база уже существует в каталоге, подключается к ней.
:return:
"""
con = None
con = sqlite3.connect('dota2lounge.db')
cur = con.cursor()
cur.execute('CREATE TABLE IF NOT EXISTS DAY ('
'"id" INTEGER PRIMARY KEY AUTOINCREMENT,'
'"time" varchar(30),'
'"team1" varchar(25),'
'"percent1" int(5),'
'"team2" varchar(25),'
'"percent2" int(5),'
'"organization" varchar(30),'
'"status" varchar(15));')
cur.close()
con.close()
def get_html(url):
"""Get html
Just give him url of dota2lounge, its will not working other way
Просто передавайте этой функции УРЛ дота2лоунжа и она будет довольна.
:param url: dota2loungeUrl
:return: html
"""
url_request = Request(url, headers={"User-Agent": "Mozilla/5.0"})
response = urlopen(url_request)
return response.read()
def parser_start_day(html):
"""Parser will parsing your html and write some things to db
At start of the day, it will parse html he got.
Trying to separate matches.
В начале дня пытается искать предстоящие на сегодня матчи
:param html: give to him html, please
:return:
"""
soup = BeautifulSoup(html, 'html.parser')
table = soup.find('article', id='bets')
match_time = []
teams_couple = []
teams_percents = []
organization = []
con = None
con = sqlite3.connect('dota2lounge.db')
cur = con.cursor()
for row in table.find_all('div', class_='matchmain'):
match_time.append(row.find('span', class_='match-time').text)
organization.append(row.find('div', class_='eventm').text)
for team in row.find_all('div', class_='teamtext'):
if team.parent.find('img', src='//dota2lounge.com/img/won.png'):
teams_couple.append(team.find('b').text + '-ALREADYEND')
else:
teams_couple.append(team.find('b').text)
teams_percents.append(team.find('i', class_='percent-coins').text)
# print(team.find('b').text)
# print(team.find('i', class_='percent-coins').text)
number_of_matches = len(match_time)
counter = 0
total = ()
while counter != number_of_matches:
a = tools.unix_timestamp(match_time[counter],'Etc/GMT-3')
b = teams_couple[counter * 2]
c = teams_percents[counter * 2]
d = teams_couple[counter * 2 + 1]
e = teams_percents[counter * 2 + 1]
f = organization[counter]
g = 'readyToAnnounce'
total = (a, b, int(c[:-1]), d, int(e[:-1]), f, g)
pre_total = (a, b, d)
cur.execute('SELECT * FROM DAY WHERE (time = ? and team1 = ? and team2 = ?)', pre_total)
a = cur.fetchall()
if tools.debug == 1:
print(a)
if not a:
cur.execute(
'INSERT INTO DAY (time, team1, percent1, team2, percent2, organization, status) VALUES (?,?,?,?,?,?,?)', total)
if tools.debug == 1:
print('записал')
else:
if tools.debug == 1:
print('одинаково')
counter += 1
pass
con.commit()
cur.close()
con.close()
def new_day_grab():
"""works only once at day
delete already ended matches and mathes already started
all other matches will be push to the base
:return:
"""
utc = pytz.timezone('UTC')
utc_time = datetime.datetime.now(utc)
connect_db()
parser_start_day(get_html('https://dota2lounge.com/'))
expression = ('%-ALREADYEND', '%-ALREADYEND')
con = None
con = sqlite3.connect('dota2lounge.db')
cur = con.cursor()
cur.execute('DELETE FROM DAY WHERE team1 LIKE ? or team2 LIKE ?', expression)
con.commit()
cur.execute('SELECT * FROM DAY')
a = cur.fetchall()
for x in a:
b = x[1]
if float(b) < float(utc_time.timestamp()):
l = (x[0],)
cur.execute('DELETE FROM DAY WHERE id=?', l)
con.commit()
cur.close()
con.close()
def main():
msk = pytz.timezone('Europe/Moscow')
while True:
msk_time = datetime.datetime.now(msk)
if msk_time.strftime('%H:%M:%S') == '09:00:00':
new_day_grab()
if __name__ == '__main__':
main()