-
Notifications
You must be signed in to change notification settings - Fork 0
/
database.py
122 lines (105 loc) · 4.11 KB
/
database.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# -*- coding: utf-8 -*-
# Copyright (c) 2018-2019, Yaroslav Zotov, https://github.com/qiray/
# All rights reserved.
# This file is part of MarkovTextGenerator.
# MarkovTextGenerator is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# MarkovTextGenerator is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with MarkovTextGenerator. If not, see <https://www.gnu.org/licenses/>.
'''database module'''
import sqlite3
import text
DBFILE = 'data.db'
def init_db():
"""init database"""
conn = sqlite3.connect(DBFILE)
cursor = conn.cursor()
cursor.executescript("""
CREATE TABLE IF NOT EXISTS pairs (
begin TEXT,
end TEXT,
is_begin INTEGER,
is_end INTEGER,
count INTEGER DEFAULT 0,
size INTEGER,
source INTEGER,
PRIMARY KEY (begin, end)
);
CREATE TABLE IF NOT EXISTS sources (
id INTEGER AUTO INCREMENT PRIMARY KEY,
name text
);
""")
conn.commit()
conn.close()
def start_connection():
"""Open database and return connection with cursor to it"""
conn = sqlite3.connect(DBFILE)
cursor = conn.cursor()
return conn, cursor
def end_connecion(conn):
"""Commit and close connection to database"""
conn.commit()
conn.close()
def save_source(source_name):
conn, cursor = start_connection()
cursor.execute('INSERT OR IGNORE INTO sources (name) VALUES(?);', (source_name,))
cursor.execute('SELECT last_insert_rowid() FROM sources;')
result = cursor.fetchone()
end_connecion(conn)
return result[0] if result else 1
def save_tokens(tokens, cursor, number=1):
"""Save tokens into opened database\n
start_connection() should be called before this function
end_connecion() should be called after saving all tokens
"""
#https://stackoverflow.com/questions/1711631/improve-insert-per-second-performance-of-sqlite
for token in tokens:
cursor.execute('''
INSERT OR IGNORE INTO pairs(
begin,
end,
is_begin,
is_end,
size,
source
) VALUES(?, ?, ?, ?, ?, ?);
''', (token.begin, token.end, token.is_begin, token.is_end, number, token.source))
cursor.execute('UPDATE pairs SET count = count + 1 WHERE begin = ? AND end = ? AND size = ?;',
(token.begin, token.end, number))
def get_start_token():
"""Return random start token from database"""
conn, cursor = start_connection()
cursor.execute('SELECT * from pairs WHERE is_begin = 1 ORDER BY RANDOM() LIMIT 1;')
result = cursor.fetchone()
end_connecion(conn)
return result[0] if result else ''
def get_pairs_for_list(tokens_list, number):
"""Return all pairs from database for chosen start token"""
start = tokens_list[-1]
conn, cursor = start_connection()
cursor.execute('SELECT * from pairs WHERE begin = ?;', (start,))
result = cursor.fetchall()
if not result:
start = ' '.join(text.split_into_words(' '.join(tokens_list))[-number:])
cursor.execute('SELECT * from pairs WHERE begin = ?;', (start,))
result = cursor.fetchall()
cursor.execute('SELECT SUM(count) from pairs WHERE begin = ?;', (start,))
count = cursor.fetchone()
end_connecion(conn)
if not result:
return [], 0
return result, count[0] if count else 0
def is_pair_end(pair):
'''Return true when pair is end'''
conn, cursor = start_connection()
cursor.execute('SELECT * from pairs WHERE is_end = 1 AND (end = ? OR end = ?);', (pair[0], pair[1],))
result = cursor.fetchone()
end_connecion(conn)
return False if not result else True