1. requests 라이브러리로 http요청하기
- pip install requests (cmd창에서 설치)
import requests
response = requests.get("http://www.naver.com")
html = response.text
print(html)
2. BeautifulSoup 사용하여 웹 페이지 파싱하기
- pip install beautifulsoup4
import requests
from bs4 import BeautifulSoup
response = requests.get("https://www.naver.com")
html = response.text
soup = BeautifulSoup(html, "html.parser")
word = soup.select_one('#NM_set_home_btn')
print(word.text)
3. 네이버 뉴스 해드라인 여러개가져오기 실습
import requests
from bs4 import BeautifulSoup
header = {'User-agent' : 'Mozila/2.0'}
response = requests.get("https://news.naver.com/", headers = header)
html = response.text
soup = BeautifulSoup(html, 'html.parser')
titles = soup.select('.cjs_t')
print(titles)
for title in titles:
print(title.text)
print('\n')
4. 네이버 증권 3개가져오기
import requests
from bs4 import BeautifulSoup
#리스트
codes = [
'005930',
'000660',
'035720'
]
for code in codes:
url = f"https://finance.naver.com/item/sise.naver?code={code}"
response = requests.get(url)
html = response.text
soup = BeautifulSoup(html, 'html.parser')
price = soup.select_one("#_nowVal").text
price = price.replace(',','')
print(price)
5. 검색어 결과 *페이지 까지 가져오기
import requests
from bs4 import BeautifulSoup
import pyautogui
keyword = pyautogui.prompt("검색어를입력하세요>>>")
lastPage = pyautogui.prompt("마지막페이지입력>>>")
pageNum = 1
for i in range(1, int(lastPage)*15, 15):
print(f"{pageNum}페이지입니다")
response = requests.get(f"https://search.naver.com/search.naver?where=news&sm=tab_jum&query={keyword}&start={i}")
html = response.text
soup = BeautifulSoup(html, 'html.parser')
links = soup.select('.news_tit')
for link in links:
title = link.text #태그 안의 텍스트 요소를 가져온다
url = link.attrs['href'] #href의 속성값을 가져오는 것
print(title, url)
pageNum = pageNum + 1
6. 엑셀 불러오기
import openpyxl
#1 엑셀불러오기
fpath = r'C:\startCoding\Excel만들기\참가자_data.xlsx'
wb = openpyxl.load_workbook(fpath)
# 엑셀시트선택
ws = wb['오징어게임']
#데이터 수정하기
ws['A3'] = 456
ws['B3'] = '룰랄라'
#엑셀저장하기
wb.save(fpath)
7. 엑셀만들기
import openpyxl
#엑셀파일만들기
wb = openpyxl.Workbook()
#엑셀워크시트만들기
ws = wb.create_sheet("오징어게임")
#데이터추가하기
ws['A1'] = '참가번호'
ws['B1'] = '성명'
ws['A2'] = 1
ws['B2'] = '오잉님'
#엑셀저장하기
wb.save(r'C:\startCoding\Excel만들기\참가자_data.xlsx')
8. 네이버 증권 엑셀에 넣기
import requests
from bs4 import BeautifulSoup
import openpyxl
fpath = r"C:\startCoding\Excel만들기\참가자_data.xlsx"
wb = openpyxl.load_workbook(fpath)
ws = wb.active #현재활성화된 시트 선택
#리스트
codes = [
'005930',
'000660',
'035720'
]
row = 2
for code in codes:
url = f"https://finance.naver.com/item/sise.naver?code={code}"
response = requests.get(url)
html = response.text
soup = BeautifulSoup(html, 'html.parser')
price = soup.select_one("#_nowVal").text
price = price.replace(',','')
print(price)
ws[f'B{row}'] = int(price)
row = row+1
wb.save(fpath)
9. 셀레니움 기초
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import time
def selenium_test():
#브라우저생성
browser = webdriver.Chrome('C:/chromedriver.exe')
#웹사이트열기
browser.get("https://www.naver.com")
#10초기다려주기
browser.implicitly_wait(10)
#쇼핑메뉴클릭
browser.find_element(By.CSS_SELECTOR, '#NM_FAVORITE > div.group_nav > ul.list_nav.type_fix > li:nth-child(5) > a').click()
time.sleep(2)
#검색창클릭
search = browser.find_element(By.CSS_SELECTOR, '#__next > div > div.header_header__24NVj > div > div > div._gnb_header_area_150KE > div > div._gnbLogo_gnb_logo_3eIAf > div > div._gnbSearch_gnb_search_3O1L2 > form > fieldset > div._gnbSearch_inner_2Zksb > div > input')
search.click()
#검색어입력
search.send_keys('아이폰 13')
search.send_keys(Keys.ENTER)
while(True):
pass
selenium_test()
9. 무한스크롤 적용
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import time
#브라우저생성
browser = webdriver.Chrome('C:/chromedriver.exe')
#웹사이트열기
browser.get("https://www.naver.com")
#10초기다려주기
browser.implicitly_wait(10)
#쇼핑메뉴클릭
browser.find_element(By.CSS_SELECTOR, '#NM_FAVORITE > div.group_nav > ul.list_nav.type_fix > li:nth-child(5) > a').click()
time.sleep(2)
#검색창클릭
search = browser.find_element(By.CSS_SELECTOR, '#__next > div > div.header_header__24NVj > div > div > div._gnb_header_area_150KE > div > div._gnbLogo_gnb_logo_3eIAf > div > div._gnbSearch_gnb_search_3O1L2 > form > fieldset > div._gnbSearch_inner_2Zksb > div > input')
search.click()
#검색어입력
search.send_keys('아이폰 13')
search.send_keys(Keys.ENTER)
#스크롤 전 높이
before_h = browser.execute_script("return window.scrollY")
print(before_h)
#무한 스크롤
while(True):
# 맨 아래로 스크롤을 내린다.
browser.find_element(By.CSS_SELECTOR,"body").send_keys(Keys.END)
#스크롤 사이 페이지 로딩 시간
time.sleep(2)
#스크롤 후 높이
after_h = browser.execute_script("return window.scrollY")
if after_h == before_h:
break
before_h = after_h
10. 네이버 쇼핑 물건명, 가격, 링크정보 가져오기
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import time
#브라우저생성
browser = webdriver.Chrome('C:/chromedriver.exe')
#웹사이트열기
browser.get("https://www.naver.com")
#10초기다려주기
browser.implicitly_wait(10)
#쇼핑메뉴클릭
browser.find_element(By.CSS_SELECTOR, '#NM_FAVORITE > div.group_nav > ul.list_nav.type_fix > li:nth-child(5) > a').click()
time.sleep(2)
#검색창클릭
search = browser.find_element(By.CSS_SELECTOR, '#__next > div > div.header_header__24NVj > div > div > div._gnb_header_area_150KE > div > div._gnbLogo_gnb_logo_3eIAf > div > div._gnbSearch_gnb_search_3O1L2 > form > fieldset > div._gnbSearch_inner_2Zksb > div > input')
search.click()
#검색어입력
search.send_keys('아이폰 13')
search.send_keys(Keys.ENTER)
#스크롤 전 높이
before_h = browser.execute_script("return window.scrollY")
print(before_h)
#무한 스크롤
while(True):
# 맨 아래로 스크롤을 내린다.
browser.find_element(By.CSS_SELECTOR,"body").send_keys(Keys.END)
#스크롤 사이 페이지 로딩 시간
time.sleep(2)
#스크롤 후 높이
after_h = browser.execute_script("return window.scrollY")
if after_h == before_h:
break
before_h = after_h
#상품 정보 div
items = browser.find_elements(By.CSS_SELECTOR,".basicList_info_area__TWvzp")
for item in items:
name = item.find_element(By.CSS_SELECTOR,".basicList_link__JLQJf").text
try:
price = item.find_element(By.CSS_SELECTOR,".price_num__S2p_v").text
except:
price = "판매중단"
link = item.find_element(By.CSS_SELECTOR,".basicList_title__VfX3c > a").get_attribute('href')
print(name, price, link)
'파이썬 시작하기' 카테고리의 다른 글
크롤링 (0) | 2022.12.19 |
---|---|
크롤링 (0) | 2022.12.19 |
파이썬 시작하기 #1. Life is too short, You need python (0) | 2022.12.09 |