프로그래밍/python
간단하게 내용을 스크랩하는 라이브러리 newspaper3k
이휘재123
2023. 1. 3. 16:51
반응형
https://newspaper.readthedocs.io/en/latest/
Newspaper3k: Article scraping & curation — newspaper 0.0.2 documentation
Newspaper3k: Article scraping & curation Inspired by requests for its simplicity and powered by lxml for its speed: “Newspaper is an amazing python library for extracting & curating articles.” – tweeted by Kenneth Reitz, Author of requests “Newspap
newspaper.readthedocs.io
라이브러리 설치
# Python 3 이상
pip install newspaper3k
# 그 외의 버전
pip install newspaper
스크랩 한 내용을 메모장에 저장하는 간단한 코드
from newspaper import Article
import os
url = f"https://n.news.naver.com/mnews/article/469/0000716409?sid=105"
article = Article(url, language="ko")
article.download()
article.parse()
article_title = article.title
article_text = article.text
print(f"article_title: {article_title}")
print(f"article_text: {article_text}")
# 해당 내용을 메모장에 저장
if len(article_text) > 0:
try:
# 글 저장 폴더
article_path = os.path.join(os.getcwd(), "article")
if os.path.isdir(article_path) == False:
os.mkdir(article_path)
else:
print(f"{article_path} 이미 폴더가 있습니다.")
article_file = os.path.join(article_path, f"{article_title}.txt")
print(article_file)
f = open(article_file, "w", encoding="UTF8")
f.write(f"{article_text}\n")
f.close()
print(f"{article_title}.txt 파일이 저장되었습니다.")
except Exception as e:
print(f"상점 목록 저장 실패 {str(e)}")
else:
print(f"글이 없습니다.")
정상적으로 작동 한 모습
반응형