Source code for veryscrape.scrapers.twingly

from urllib.parse import quote
from twingly_search import Parser

from ..scrape import SearchEngineScraper, ItemGenerator


[docs]class BlogGen(ItemGenerator):
[docs] def process_text(self, text): return text[0]
[docs] def process_time(self, text): return text[1]
[docs]class Twingly(SearchEngineScraper): source = 'blog' item_gen = BlogGen def __init__(self, api_key, *, proxy_pool=None): super(Twingly, self).__init__(proxy_pool=proxy_pool) self.api_key = api_key self.parser = Parser()
[docs] def query_string(self, query): qs = quote('{} lang:en tspan:12h page-size:10000'.format(query)) return 'https://api.twingly.com/blog/search/api/v3/search?' \ 'q=%s&apiKey=%s' % (qs, self.api_key)
[docs] def extract_urls(self, text): result = self.parser.parse(text) return tuple(zip(*list((post.url, post.published_at) for post in result.posts)))