1
- import requests
2
- from bs4 import BeautifulSoup
3
- from module .headers import headers , search_type
4
1
import time
2
+
5
3
import aiohttp
6
- import asyncio
7
4
import async_timeout
8
- from concurrent .futures import ALL_COMPLETED
5
+ import requests
6
+ from bs4 import BeautifulSoup
7
+
8
+ from module .headers import headers , search_type
9
9
10
10
11
11
# 비동기 http 요청 fetch 함수 구현
@@ -25,7 +25,7 @@ def __init__(self, dc_id):
25
25
26
26
self .__g_type = self .get_gallary_type (dc_id ) # 갤러리 타입 얻어오기
27
27
28
- # 갤러리 타입 가져오기(마이너, 일반)
28
+ # 갤러리 타입 가져오기(마이너, 일반) - 생성자에서 사용하므로 동기적으로 처리
29
29
def get_gallary_type (self , dc_id ):
30
30
# url로 requests를 보내서 redirect시키는지 체크한다.
31
31
url = f'https://gall.dcinside.com/board/lists/?id={ dc_id } '
@@ -55,9 +55,9 @@ async def article_parse(self, keyword, s_type, page=1, search_pos=''):
55
55
dc_id = self .__dc_id
56
56
57
57
url = f"https://gall.dcinside.com/{ g_type } /lists/?id={ dc_id } &page={ page } &search_pos={ search_pos } &s_type={ s_type } &s_keyword={ keyword } "
58
- print (url )
58
+ # print(url)
59
59
60
- res = await fetch (session , url ) # 비동기 http 요청
60
+ res = await fetch (session , url ) # 비동기 http 요청
61
61
soup = BeautifulSoup (res , "lxml" )
62
62
63
63
article_list = soup .select (".us-post" ) # 글 박스 전부 select
@@ -83,7 +83,7 @@ async def article_parse(self, keyword, s_type, page=1, search_pos=''):
83
83
recommend = element .select (".gall_recommend" )[0 ].text
84
84
# print(link, num, title, reply, nickname, timestamp, refresh, recommend)
85
85
86
- self .__all_link [num ] = link ; # 링크 추가
86
+ self .__all_link [num ] = link # 링크 추가
87
87
88
88
article_data = {'num' : num , 'title' : title , 'reply' : reply , 'nickname' : nickname ,
89
89
'timestamp' : timestamp ,
@@ -107,10 +107,10 @@ async def page_explorer(self, keyword, s_type, search_pos=''):
107
107
article_list = soup .select (".us-post" ) # 글 박스 전부 select
108
108
article_count = len (article_list )
109
109
if article_count == 0 : # 글이 없으면
110
- page ['start' ] = 0 ;
110
+ page ['start' ] = 0
111
111
page ['end' ] = 0 # 페이지는 없음
112
112
elif article_count < 20 : # 20개 미만이면
113
- page ['start' ] = 1 ;
113
+ page ['start' ] = 1
114
114
page ['end' ] = 1 # 1페이지 밖에 없음.
115
115
else :
116
116
# 끝 보기 버튼이 있나 검사
@@ -119,14 +119,14 @@ async def page_explorer(self, keyword, s_type, search_pos=''):
119
119
if len (page_end_btn ) == 2 :
120
120
page_end_btn = page_end_btn [0 ]
121
121
final_page = int (page_end_btn ['href' ].split ('&page=' )[1 ].split ("&" )[0 ]) + 1
122
- page ['start' ] = 1 ;
122
+ page ['start' ] = 1
123
123
page ['end' ] = final_page
124
124
else :
125
125
page_box = soup .select (
126
126
'#container > section.left_content.result article > div.bottom_paging_wrap > '
127
127
'div.bottom_paging_box > a' )
128
128
129
- page ['start' ] = 1 ;
129
+ page ['start' ] = 1
130
130
if len (page_box ) == 1 :
131
131
page ['end' ] = 1
132
132
else :
@@ -197,27 +197,27 @@ def run():
197
197
198
198
search_pos = page ['next_pos' ]
199
199
200
-
201
- async def main ():
202
- parser = DCArticleParser ( dc_id = "baseball_new11" ) # 객체 생성
203
- keyword , stype = "ㅎㅇ" , search_type [ "제목+내용" ]
204
-
205
- first_page = await parser . page_explorer ( keyword , stype )
206
- first_next_pos = first_page [ "next_pos" ]
207
-
208
- tmp_pos = first_next_pos
209
- task_lst = []
210
- for i in range ( 1 , 100 ):
211
- future = asyncio . ensure_future ( parser .article_parse (keyword , stype , page = 1 , search_pos = tmp_pos )) # future = js의 promise와 유사한 것
212
- task_lst .append (future )
213
- tmp_pos = str (int (tmp_pos ) + 10000 )
214
-
215
-
216
- start = time . time ( )
217
- completed , pending = await asyncio . wait ( task_lst , return_when = ALL_COMPLETED )
218
- print ( completed )
219
- end = time . time ( )
220
- print ( f'>>> 비동기 처리 총 소요 시간: { end - start } ' )
221
-
222
- # 파이썬 3.7 이상 asyncio.run 으로 간단하게 사용 가능
223
- asyncio .run (main ())
200
+ # async def main():
201
+ # parser = DCArticleParser(dc_id="baseball_new11") # 객체 생성
202
+ # keyword, stype = "ㅎㅇ", search_type["제목+내용"]
203
+ #
204
+ # first_page = await parser.page_explorer(keyword, stype)
205
+ # first_next_pos = first_page["next_pos"]
206
+ #
207
+ # tmp_pos = first_next_pos
208
+ # task_lst = []
209
+ # for i in range(1, 100):
210
+ # future = asyncio.ensure_future(
211
+ # parser.article_parse(keyword, stype, page= 1, search_pos= tmp_pos)) # future = js의 promise와 유사한 것
212
+ # task_lst.append(future)
213
+ # tmp_pos = str(int(tmp_pos) + 10000)
214
+ #
215
+ # start = time.time()
216
+ # completed, pending = await asyncio.wait(task_lst, return_when=ALL_COMPLETED )
217
+ # print(completed )
218
+ # end = time.time( )
219
+ # print(f'>>> 비동기 처리 총 소요 시간: { end - start}' )
220
+ #
221
+ #
222
+ # # 파이썬 3.7 이상 asyncio.run 으로 간단하게 사용 가능
223
+ # asyncio.run(main())
0 commit comments