-
Notifications
You must be signed in to change notification settings - Fork 13
/
youthop.py
44 lines (33 loc) · 1.39 KB
/
youthop.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import csv
import re
import requests
from bs4 import BeautifulSoup
csv_file = open('web_scrape.csv', 'w')
csv_writer = csv.writer(csv_file)
csv_writer.writerow(['headline', 'summary', 'Apply link', 'Website link'])
for i in range(1, 7):
code = requests.get('https://www.youthop.com/workshops/page/' + str(i))
soup = BeautifulSoup(code.text, 'html.parser')
for para in soup.find_all('div', class_='post-header'):
_a = para.a.get('href')
code = requests.get(_a)
soup = BeautifulSoup(code.text, 'html.parser')
# article headline
article_headline = soup.find(id="main")
headline = article_headline.h1.text
headline = re.sub(r'[^\x00-\x7F]+', ' ', headline)
print(headline)
# article summary paragraph
article_para = soup.find('div', class_='article-content') # opp paragraph
summary = article_para.p.text
summary = re.sub(r'[^\x00-\x7F]+', ' ', summary)
print(summary)
# article apply and official link
all_link = soup.find('div', class_='application-process') # apply now link
_a_list = all_link.find_all('a')
apply_link = _a_list[0].get('href')
print(apply_link)
web_link = _a_list[1].get('href')
print(web_link)
csv_writer.writerow([headline, summary, apply_link, web_link])
csv_file.close()