というわけで過去のものも一気に調べてみようかなと思い立ちましたのでpythonスクリプトを作ってみました。
適当なディレクトリに以下の
toretama.py
amazon_helper.py
を配置して
python toretama.py 201104
みたいな感じで実行します。
実際は関係ない商品も出てくるので最終的には目視確認ですけどね
それぞれのファイルは以下の通りです。
toretama.py
# -*- coding: utf-8 -*- import os import sys import urllib2 import re import xml.etree.ElementTree as etree from amazon_helper import AmazonHelper print sys.stdout.encoding, sys.getdefaultencoding() yyyy = sys.argv[1][0:4] mm = sys.argv[1][4:6] #指定月に放送されたリストを取得 url = "http://www.tv-tokyo.co.jp/wbs/trend_tamago/%s/%s/writeThisMonth.js" % (yyyy,mm) req_l = urllib2.Request(url) cont_l = urllib2.urlopen(req_l).read() pat_l = "<a href=\"(.*)\">(.*)</a> <span class=\"date\">\((.*)\)</span>" for m in re.finditer(pat_l, cont_l, re.MULTILINE): #個別ページから商品名を取得 req = urllib2.Request(m.group(1)) cont = urllib2.urlopen(req).read() cont = cont.replace("\n","") pat = "<p><strong>商品名</strong></p><p>(.*)</p><p><strong>商品の特徴</strong></p>" mlist = re.search(pat,cont) if mlist == None : print "not find" continue #amazonのAPIで商品検索 ackey = "amazonのアクセスキー" sckey = "シークレットキー" askey = "アソシエートタグ" options = {} options['SearchIndex'] = "Blended" options['ResponseGroup'] = "Small,Images" options['Keywords'] = mlist.group(1) ah = AmazonHelper(ackey,sckey,askey) url_a = ah.make_item_search(options) req_a = urllib2.Request(url_a) cont_a = urllib2.urlopen(req_a).read() dom = etree.fromstring(cont_a) namespace ='http://webservices.amazon.com/AWSECommerceService/2010-06-01' items = dom.find('.//{%s}Items' % namespace) valid = items.find('.//{%s}IsValid' % namespace) if valid.text != "True" : print "not find" continue if items.find('.//{%s}Errors' % namespace) : print "not find" continue for item in items.findall('.//{%s}Item' % namespace): print item.find('.//{%s}Title' % namespace).text print item.find('.//{%s}DetailPageURL' % namespace).text
amazon_helper.py
import urllib,hmac, hashlib, base64 from datetime import datetime class AmazonHelper: def __init__(self, access_key_id, secret_key, associate_tag=None, version='2010-06-01'): self.service = 'AWSECommerceService' self.access_key_id = access_key_id self.secret_key = secret_key self.associate_tag = associate_tag self.version = version self.uri = 'ecs.amazonaws.jp' self.end_point = '/onca/xml' self.dummy_timestamp = None def set_dummy_timestamp(self,dt): self.dummy_timestamp = dt def make_item_lookup(self, options): options['Operation'] = 'ItemLookup' return self.make_request(options) def make_item_search(self, options): options['Operation'] = 'ItemSearch' return self.make_request(options) def make_request(self, options): options['Service'] = self.service options['AWSAccessKeyId'] = self.access_key_id options['Version'] = self.version if self.dummy_timestamp: options['Timestamp'] = self.dummy_timestamp else: options['Timestamp'] = datetime.utcnow().isoformat() if self.associate_tag: options['AssociateTag'] = self.associate_tag payload = "" for v in sorted(options.items()): payload += '&%s=%s' % (v[0], urllib.quote(str(v[1]))) payload = payload[1:] strings = ['GET', self.uri, self.end_point, payload] digest = hmac.new(self.secret_key, '\n'.join(strings), hashlib.sha256).digest() signature = base64.b64encode(digest) url = "http://%s%s?%s&Signature=%s" % (self.uri, self.end_point, payload, urllib.quote_plus(signature)) return url
0 件のコメント:
コメントを投稿