I'm a python beginner.
I'm trying to crawl google play store and export to csv file. But I got a error message.
UnicodeEncodeError: 'cp949' codec can't encode character '\u20a9' in position 90: illegal multibyte sequence
Here is my source code.
When I command print, it works. But it shows error message when exporting to csv file
please help me
from bs4 import BeautifulSoup
import urllib.request
import urllib.parse
import codecs
import json
import pickle
from datetime import datetime
import sys
import csv
import os
req = 'https://play.google.com/store/search?q=hana&c=apps&num=300'
response = urllib.request.urlopen(req)
the_page = response.read()
soup = BeautifulSoup(the_page)
#app_link = soup.find('a', {'class' : 'title'})
#app_url = app_link.get('href')
for div in soup.findAll( 'div', {'class' : 'details'} ):
title = div.find( 'a', {'class':'title'} )
#print(title.get('href'))
app_url = title.get('href')
app_details={}
g_app_url = 'https://play.google.com' + app_url
app_response = urllib.request.urlopen(g_app_url)
app_page = app_response.read()
soup = BeautifulSoup(app_page)
#print(soup)
#print( g_app_url )
title_div = soup.find( 'div', {'class':'document-title'} )
app_details['title'] = title_div.find( 'div' ).get_text().strip()
subtitle = soup.find( 'a', {'class' : 'document-subtitle primary'} )
app_details['developer'] = subtitle.get_text().strip()
app_details['developer_link'] = subtitle.get( 'href' ).strip()
price_buy_span = soup.find( 'span', {'class' : 'price buy'} )
price = price_buy_span.find_all( 'span' )[-1].get_text().strip()
price = price[:-4].strip() if price != 'Install' else 'Free'
app_details['price'] = price
rating_value_meta = soup.find( 'meta', {'itemprop' : 'ratingValue'} )
app_details['rating'] = rating_value_meta.get( 'content' ).strip()
reviewers_count_meta = soup.find( 'meta', {'itemprop' : 'ratingCount'} )
app_details['reviewers'] = reviewers_count_meta.get( 'content' ).strip()
num_downloads_div = soup.find( 'div', {'itemprop' : 'numDownloads'} )
if num_downloads_div: app_details['downloads'] = num_downloads_div.get_text().strip()
date_published_div = soup.find( 'div', {'itemprop' : 'datePublished'} )
app_details['date_published'] = date_published_div.get_text().strip()
operating_systems_div = soup.find( 'div', {'itemprop' : 'operatingSystems'} )
app_details['operating_system'] = operating_systems_div.get_text().strip()
content_rating_div = soup.find( 'div', {'itemprop' : 'contentRating'} )
app_details['content_rating'] = content_rating_div.get_text().strip()
category_span = soup.find( 'span', {'itemprop' : 'genre'} )
app_details['category'] = category_span.get_text()
#print(app_details)
with open('result.csv', 'w') as f: # Just use 'w' mode in 3.x
w = csv.DictWriter(f, app_details.keys())
w.writeheader()
w.writerow(app_details)