First commit

This commit is contained in:
Alberto Villa
2017-12-09 11:14:30 +01:00
commit 68fbc47f96
3 changed files with 466 additions and 0 deletions

24
README.md Normal file
View File

@@ -0,0 +1,24 @@
# Highlights Downloader
Simple application which downloads all the highlights stored in your Instapaper folders as Markdown files.
The downloader creates a tree of folders inside `highlights`. There will be one folder for each one you have in Instapaper. The folder name will be the id of the folder in Instapaper.
Inside every folder the downloader will create a Markdown file including the title of the bookmark, url reference and all the highlights (one paragraph per each of them).
## Requirements
Install the following packages:
- `pip install httplib2`
- `pip install oauth2`
## Usage
1. Get a KEY and SECRET OAuth from [Instapaper](https://www.instapaper.com/main/request_oauth_consumer_token)
2. Modify the file `Credentials.ini` with your KEY, SECRET, LOGIN and PASSWORD
3. Call the app `python downloader.py`
You can import then these Markdown files in your favourite Notes application e.g. [Bear](http://www.bear-writer.com/)
## Credits
Highlights Downloader makes use of a modified version of the [pyinstapaper library](https://github.com/mdorn/pyinstapaper) Python wrapper to the Instapaper API; thanks [Matt Dorn](https://github.com/mdorn)!

126
downloader.py Normal file
View File

@@ -0,0 +1,126 @@
import datetime
import os
import ConfigParser
from instapaper import Instapaper, Folder
# Init instapaper with key, secret, login and password
def init():
# Read credentials from Credentials.ini file
configParser = ConfigParser.RawConfigParser()
configParser.read('Credentials.ini')
key = configParser.get('Instapaper', 'INSTAPAPER_KEY')
secret = configParser.get('Instapaper', 'INSTAPAPER_SECRET')
login = configParser.get('Login', 'INSTAPAPER_LOGIN')
password = configParser.get('Login', 'INSTAPAPER_PASSWORD')
print key, secret, login, password
# Create instance of Instapaper using the OAth credentials
instapaper = Instapaper(key, secret)
# Login with user and password
instapaper.login(login, password)
return instapaper
# Function to change to highlights folder
def change_to_highlights_folder():
# If there is no folder in the system with highlights then create it
if not os.path.exists('highlights'):
os.makedirs('highlights')
# Change to the folder
os.chdir('highlights')
# Change to folder using the folder_id
def change_to_folder(folder_id):
# Folder name = its folder_id
folder = str(folder_id)
# If there is no folder in the system for this folder_id then create it
if not os.path.exists(folder):
os.makedirs(folder)
# Change to the folder
os.chdir(folder)
def get_list_of_existing_highlights():
existing = []
# Get all .md files in current directory
for file in os.listdir('.'):
if file.endswith('.md'):
existing.append(int(os.path.splitext(file)[0]))
return existing
# Process bookmarks in one folder
def process_folder(folder):
# Show id and title of the folder
print folder.folder_id, folder.title
change_to_folder(folder.folder_id)
existing = get_list_of_existing_highlights()
# Get bookmarks from the current folder using its folder_id
# TODO: identify which bookmarks have been processed and pass their ids
# as parameter to this function as 'have'. Current library does not allow
# this functionality, needs to be updated
bookmarks = instapaper.get_bookmarks(folder=folder.folder_id, have=existing, limit=500)
process_bookmarks(bookmarks)
# Process list of bookmarks
def process_bookmarks(bookmarks):
# Show number of bookmarks to be processed
print 'Number of bookmarks: ' + str(len(bookmarks))
for bookmark in bookmarks:
process_bookmark(bookmark)
# Process the highlights of one bookmark
def process_bookmark(bookmark):
# Get the highlights
highlights = bookmark.get_highlights()
# If there is any highlight
if len(highlights) > 0:
# Check if the bookmark has been already processed
# TODO: we would not need this if the 'have' parameter is used
# in the call to get_bookmarks
if not os.path.exists(str(bookmark.bookmark_id) + ".md"):
# Show that we have found a new bookmark with highlights
print "New highlight file: " + str(bookmark.bookmark_id) + ".md"
# Create the file
new_file = open(str(bookmark.bookmark_id) + ".md", "w")
# Add the title and reference url
new_file.write('# ' + bookmark.title.encode('utf-8') + '\n')
new_file.write('[Reference]' + '(' + bookmark.url.encode('utf-8') + ')\n\n')
# Write each highlight to the file, adding a line between them
for highlight in highlights:
new_file.write(highlight.text.encode('utf-8') + '\n\n')
new_file.close()
# ----------------------------------
# Init Instapaper
instapaper = init()
# Change to highlights folder
change_to_highlights_folder()
# Get all folders
folders = instapaper.get_folders()
# Process each folder
for folder in folders:
process_folder(folder)
# Change to the root folder
os.chdir('..')

316
instapaper.py Executable file
View File

@@ -0,0 +1,316 @@
# -*- coding: utf-8 -*-
from datetime import datetime
import json
import logging
import time
import urlparse
from urllib import urlencode
import oauth2 as oauth
BASE_URL = 'https://www.instapaper.com'
API_VERSION = '1'
ACCESS_TOKEN = 'oauth/access_token'
LOGIN_URL = 'https://www.instapaper.com/user/login'
REQUEST_DELAY_SECS = 0.5
log = logging.getLogger(__name__)
class Instapaper(object):
'''Instapaper client class.
:param oauth_key str: Instapaper OAuth consumer key
:param oauth_secret str: Instapaper OAuth consumer secret
'''
def __init__(self, oauth_key, oauth_secret):
self.consumer = oauth.Consumer(oauth_key, oauth_secret)
self.oauth_client = oauth.Client(self.consumer)
self.token = None
def login(self, username, password):
'''Authenticate using XAuth variant of OAuth.
:param str username: Username or email address for the relevant account
:param str password: Password for the account
'''
response = self.request(
ACCESS_TOKEN,
{
'x_auth_mode': 'client_auth',
'x_auth_username': username,
'x_auth_password': password
},
returns_json=False
)
token = dict(urlparse.parse_qsl(response['data']))
self.token = oauth.Token(
token['oauth_token'], token['oauth_token_secret'])
self.oauth_client = oauth.Client(self.consumer, self.token)
def request(self, path, params=None, returns_json=True,
method='POST', api_version=API_VERSION):
'''Process a request using the OAuth client's request method.
:param str path: Path fragment to the API endpoint, e.g. "resource/ID"
:param dict params: Parameters to pass to request
:param str method: Optional HTTP method, normally POST for Instapaper
:param str api_version: Optional alternative API version
:returns: response headers and body
:retval: dict
'''
time.sleep(REQUEST_DELAY_SECS)
full_path = '/'.join([BASE_URL, 'api/%s' % api_version, path])
params = urlencode(params) if params else None
log.debug('URL: %s', full_path)
request_kwargs = {'method': method}
if params:
request_kwargs['body'] = params
response, content = self.oauth_client.request(
full_path, **request_kwargs)
log.debug('CONTENT: %s ...', content[:50])
if returns_json:
try:
data = json.loads(content)
if isinstance(data, list) and len(data) == 1:
# ugly -- API always returns a list even when you expect
# only one item
if data[0]['type'] == 'error':
raise Exception('Instapaper error %d: %s' % (
data[0]['error_code'],
data[0]['message'])
)
# TODO: PyInstapaperException custom class?
except ValueError:
# Instapaper API can be unpredictable/inconsistent, e.g.
# bookmarks/get_text doesn't return JSON
data = content
else:
data = content
return {
'response': response,
'data': data
}
def get_bookmarks(self, folder='unread', limit=10, have=[]):
"""Return list of user's bookmarks.
:param str folder: Optional. Possible values are unread (default),
starred, archive, or a folder_id value.
:param int limit: Optional. A number between 1 and 500, default 25.
:returns: List of user's bookmarks
:rtype: list
"""
path = 'bookmarks/list'
response = self.request(path, {'folder_id': folder, 'limit': limit, 'have': have})
items = response['data']
bookmarks = []
for item in items:
if item.get('type') == 'error':
raise Exception(item.get('message'))
elif item.get('type') == 'bookmark':
bookmarks.append(Bookmark(self, **item))
return bookmarks
def get_folders(self):
"""Return list of user's folders.
:rtype: list
"""
path = 'folders/list'
response = self.request(path)
items = response['data']
folders = []
for item in items:
if item.get('type') == 'error':
raise Exception(item.get('message'))
elif item.get('type') == 'folder':
folders.append(Folder(self, **item))
return folders
class InstapaperObject(object):
'''Base class for Instapaper objects like Bookmark.
:param client: instance of the OAuth client for making requests
:type client: ``oauth2.Client``
:param dict data: key/value pairs of object attributes, e.g. title, etc.
'''
def __init__(self, client, **data):
self.client = client
for attrib in self.ATTRIBUTES:
val = data.get(attrib)
if hasattr(self, 'TIMESTAMP_ATTRS'):
if attrib in self.TIMESTAMP_ATTRS:
try:
val = datetime.fromtimestamp(int(val))
except ValueError:
log.warn(
'Could not cast %s for %s as datetime',
val, attrib
)
setattr(self, attrib, val)
self.object_id = getattr(self, self.RESOURCE_ID_ATTRIBUTE)
for action in self.SIMPLE_ACTIONS:
setattr(self, action, lambda x: self._simple_action(x))
instance_method = getattr(self, action)
instance_method.func_defaults = (action,)
def add(self):
'''Save an object to Instapaper after instantiating it.
Example::
folder = Folder(instapaper, title='stuff')
result = folder.add()
'''
# TODO validation per object type
submit_attribs = {}
for attrib in self.ATTRIBUTES:
val = getattr(self, attrib, None)
if val:
submit_attribs[attrib] = val
path = '/'.join([self.RESOURCE, 'add'])
result = self.client.request(path, submit_attribs)
return result
def _simple_action(self, action=None):
'''Issue a request for an API method whose only param is the obj ID.
:param str action: The name of the action for the resource
:returns: Response from the API
:rtype: dict
'''
if not action:
raise Exception('No simple action defined')
path = "/".join([self.RESOURCE, action])
response = self.client.request(
path, {self.RESOURCE_ID_ATTRIBUTE: self.object_id}
)
return response
class Bookmark(InstapaperObject):
'''Object representing an Instapaper bookmark/article.'''
RESOURCE = 'bookmarks'
RESOURCE_ID_ATTRIBUTE = 'bookmark_id'
# TODO: identify which fields to convert from timestamp to Python datetime
ATTRIBUTES = [
'bookmark_id',
'title',
'description',
'hash',
'url',
'progress_timestamp',
'time',
'progress',
'starred',
'type',
'private_source'
]
TIMESTAMP_ATTRS = [
'progress_timestamp',
'time'
]
SIMPLE_ACTIONS = [
'delete',
'star',
'archive',
'unarchive',
'get_text'
]
def __str__(self):
return 'Bookmark %s: %s' % (self.object_id, self.title.encode('utf-8'))
def get_highlights(self):
'''Get highlights for Bookmark instance.
:return: list of ``Highlight`` objects
:rtype: list
'''
# NOTE: all Instapaper API methods use POST except this one!
path = '/'.join([self.RESOURCE, str(self.object_id), 'highlights'])
response = self.client.request(path, method='GET', api_version='1.1')
items = response['data']
highlights = []
for item in items:
if item.get('type') == 'error':
raise Exception(item.get('message'))
elif item.get('type') == 'highlight':
highlights.append(Highlight(self, **item))
return highlights
class Folder(InstapaperObject):
'''Object representing an Instapaper folder.'''
RESOURCE = 'folders'
RESOURCE_ID_ATTRIBUTE = 'folder_id'
ATTRIBUTES = [
'folder_id',
'title',
'display_title',
'sync_to_mobile',
'folder_id',
'position',
'type',
'slug',
]
SIMPLE_ACTIONS = [
'delete',
]
def __str__(self):
return 'Folder %s: %s' % (self.object_id, self.title)
def set_order(self, folder_ids):
"""Order the user's folders
:param list folders: List of folder IDs in the desired order.
:returns: List Folder objects in the new order.
:rtype: list
"""
# TODO
raise NotImplementedError
class Highlight(InstapaperObject):
'''Object representing an Instapaper highlight.'''
RESOURCE = 'highlights'
RESOURCE_ID_ATTRIBUTE = 'highlight_id'
ATTRIBUTES = [
'highlight_id',
'text',
'note',
'time',
'position',
'bookmark_id',
'type',
'slug',
]
TIMESTAMP_ATTRS = [
'time',
]
SIMPLE_ACTIONS = [
'delete',
]
def __str__(self):
return 'Highlight %s for Article %s' % (
self.object_id, self.bookmark_id)
def create(self):
# TODO
raise NotImplementedError