Skip to content

Commit 086569f

Browse files
committed
Stabilize3
1 parent bf1ebe1 commit 086569f

File tree

3 files changed

+355
-50
lines changed

3 files changed

+355
-50
lines changed

module/async_article_parser.py renamed to async/async_article_parser.py

Lines changed: 37 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
import requests
2-
from bs4 import BeautifulSoup
3-
from module.headers import headers, search_type
41
import time
2+
53
import aiohttp
6-
import asyncio
74
import async_timeout
8-
from concurrent.futures import ALL_COMPLETED
5+
import requests
6+
from bs4 import BeautifulSoup
7+
8+
from module.headers import headers, search_type
99

1010

1111
# 비동기 http 요청 fetch 함수 구현
@@ -25,7 +25,7 @@ def __init__(self, dc_id):
2525

2626
self.__g_type = self.get_gallary_type(dc_id) # 갤러리 타입 얻어오기
2727

28-
# 갤러리 타입 가져오기(마이너, 일반)
28+
# 갤러리 타입 가져오기(마이너, 일반) - 생성자에서 사용하므로 동기적으로 처리
2929
def get_gallary_type(self, dc_id):
3030
# url로 requests를 보내서 redirect시키는지 체크한다.
3131
url = f'https://gall.dcinside.com/board/lists/?id={dc_id}'
@@ -55,9 +55,9 @@ async def article_parse(self, keyword, s_type, page=1, search_pos=''):
5555
dc_id = self.__dc_id
5656

5757
url = f"https://gall.dcinside.com/{g_type}/lists/?id={dc_id}&page={page}&search_pos={search_pos}&s_type={s_type}&s_keyword={keyword}"
58-
print(url)
58+
# print(url)
5959

60-
res = await fetch(session, url) # 비동기 http 요청
60+
res = await fetch(session, url) # 비동기 http 요청
6161
soup = BeautifulSoup(res, "lxml")
6262

6363
article_list = soup.select(".us-post") # 글 박스 전부 select
@@ -83,7 +83,7 @@ async def article_parse(self, keyword, s_type, page=1, search_pos=''):
8383
recommend = element.select(".gall_recommend")[0].text
8484
# print(link, num, title, reply, nickname, timestamp, refresh, recommend)
8585

86-
self.__all_link[num] = link; # 링크 추가
86+
self.__all_link[num] = link # 링크 추가
8787

8888
article_data = {'num': num, 'title': title, 'reply': reply, 'nickname': nickname,
8989
'timestamp': timestamp,
@@ -107,10 +107,10 @@ async def page_explorer(self, keyword, s_type, search_pos=''):
107107
article_list = soup.select(".us-post") # 글 박스 전부 select
108108
article_count = len(article_list)
109109
if article_count == 0: # 글이 없으면
110-
page['start'] = 0;
110+
page['start'] = 0
111111
page['end'] = 0 # 페이지는 없음
112112
elif article_count < 20: # 20개 미만이면
113-
page['start'] = 1;
113+
page['start'] = 1
114114
page['end'] = 1 # 1페이지 밖에 없음.
115115
else:
116116
# 끝 보기 버튼이 있나 검사
@@ -119,14 +119,14 @@ async def page_explorer(self, keyword, s_type, search_pos=''):
119119
if len(page_end_btn) == 2:
120120
page_end_btn = page_end_btn[0]
121121
final_page = int(page_end_btn['href'].split('&page=')[1].split("&")[0]) + 1
122-
page['start'] = 1;
122+
page['start'] = 1
123123
page['end'] = final_page
124124
else:
125125
page_box = soup.select(
126126
'#container > section.left_content.result article > div.bottom_paging_wrap > '
127127
'div.bottom_paging_box > a')
128128

129-
page['start'] = 1;
129+
page['start'] = 1
130130
if len(page_box) == 1:
131131
page['end'] = 1
132132
else:
@@ -197,27 +197,27 @@ def run():
197197

198198
search_pos = page['next_pos']
199199

200-
201-
async def main():
202-
parser = DCArticleParser(dc_id="baseball_new11") # 객체 생성
203-
keyword, stype = "ㅎㅇ", search_type["제목+내용"]
204-
205-
first_page = await parser.page_explorer(keyword, stype)
206-
first_next_pos = first_page["next_pos"]
207-
208-
tmp_pos = first_next_pos
209-
task_lst = []
210-
for i in range(1,100):
211-
future = asyncio.ensure_future(parser.article_parse(keyword, stype, page = 1, search_pos = tmp_pos)) #future = js의 promise와 유사한 것
212-
task_lst.append(future)
213-
tmp_pos = str(int(tmp_pos) + 10000)
214-
215-
216-
start = time.time()
217-
completed, pending = await asyncio.wait(task_lst, return_when=ALL_COMPLETED)
218-
print(completed)
219-
end = time.time()
220-
print(f'>>> 비동기 처리 총 소요 시간: {end - start}')
221-
222-
# 파이썬 3.7 이상 asyncio.run 으로 간단하게 사용 가능
223-
asyncio.run(main())
200+
# async def main():
201+
# parser = DCArticleParser(dc_id="baseball_new11") # 객체 생성
202+
# keyword, stype = "ㅎㅇ", search_type["제목+내용"]
203+
#
204+
# first_page = await parser.page_explorer(keyword, stype)
205+
# first_next_pos = first_page["next_pos"]
206+
#
207+
# tmp_pos = first_next_pos
208+
# task_lst = []
209+
# for i in range(1, 100):
210+
# future = asyncio.ensure_future(
211+
# parser.article_parse(keyword, stype, page=1, search_pos=tmp_pos)) # future = js의 promise와 유사한 것
212+
# task_lst.append(future)
213+
# tmp_pos = str(int(tmp_pos) + 10000)
214+
#
215+
# start = time.time()
216+
# completed, pending = await asyncio.wait(task_lst, return_when=ALL_COMPLETED)
217+
# print(completed)
218+
# end = time.time()
219+
# print(f'>>> 비동기 처리 총 소요 시간: {end - start}')
220+
#
221+
#
222+
# # 파이썬 3.7 이상 asyncio.run 으로 간단하게 사용 가능
223+
# asyncio.run(main())

0 commit comments

Comments
 (0)