-
Notifications
You must be signed in to change notification settings - Fork 1
/
get_dailydev_stream.py
166 lines (152 loc) · 4.72 KB
/
get_dailydev_stream.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
import json
import requests
import pathlib
class DailyDevScraper:
def __init__(self, cookie):
self.cookie = cookie
self.headers = {
"authority": "app.daily.dev",
"accept": "*/*",
"accept-language": "tr,en-US;q=0.9,en;q=0.8",
"content-type": "application/json",
"cookie": self.cookie,
"origin": "https://app.daily.dev",
"referer": "https://app.daily.dev/",
"sec-ch-ua": '^"Google Chrome";v="111", "Not(A:Brand";v="8", "Chromium";v="111""',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"Windows"',
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-origin",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"
}
self.graphql_data = {
"query": """
query Feed(
$loggedIn: Boolean! = false
$first: Int
$after: String
$ranking: Ranking
$version: Int
$supportedTypes: [String!] = ["article", "share"]
) {
page: feed(
first: $first
after: $after
ranking: $ranking
version: $version
supportedTypes: $supportedTypes
) {
...FeedPostConnection
}
}
fragment FeedPostConnection on PostConnection {
pageInfo {
hasNextPage
endCursor
}
edges {
node {
...FeedPost
...UserPost @include(if: $loggedIn)
}
}
}
fragment FeedPost on Post {
id
title
createdAt
image
readTime
source {
...SourceShortInfo
}
sharedPost {
...SharedPostInfo
}
permalink
numComments
numUpvotes
commentsPermalink
scout {
...UserShortInfo
}
author {
...UserShortInfo
}
trending
tags
type
private
}
fragment SharedPostInfo on Post {
id
title
image
readTime
permalink
commentsPermalink
summary
createdAt
private
scout {
...UserShortInfo
}
author {
...UserShortInfo
}
type
tags
source {
...SourceShortInfo
}
}
fragment SourceShortInfo on Source {
id
handle
name
permalink
description
image
type
active
}
fragment UserShortInfo on User {
id
name
image
permalink
username
bio
}
fragment UserPost on Post {
read
upvoted
commented
bookmarked
}
""",
"variables": {
"version": 10,
"ranking": "POPULARITY",
"first": 17,
"loggedIn": True
}
}
def scrape(self, filename='daily.json'):
# Use requests.Session to reuse the connection and improve performance
session = requests.Session()
session.headers.update(self.headers)
# Use pathlib to handle paths
file_path = pathlib.Path(filename)
# Use a try-except block to handle errors
try:
response = session.post("https://app.daily.dev/api/graphql", json=self.graphql_data)
response.raise_for_status()
data = response.json()
# Use the json module's dump method to write to the file
with file_path.open("w") as f:
json.dump(data, f)
except requests.exceptions.RequestException as e:
# Print the error message if the request was unsuccessful
print(f"Request failed: {e}")