mirror of
https://github.com/nikdoof/highlightsdownloader.git
synced 2025-12-18 13:19:21 +00:00
First commit
This commit is contained in:
24
README.md
Normal file
24
README.md
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
# Highlights Downloader
|
||||||
|
|
||||||
|
Simple application which downloads all the highlights stored in your Instapaper folders as Markdown files.
|
||||||
|
|
||||||
|
The downloader creates a tree of folders inside `highlights`. There will be one folder for each one you have in Instapaper. The folder name will be the id of the folder in Instapaper.
|
||||||
|
|
||||||
|
Inside every folder the downloader will create a Markdown file including the title of the bookmark, url reference and all the highlights (one paragraph per each of them).
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
Install the following packages:
|
||||||
|
- `pip install httplib2`
|
||||||
|
- `pip install oauth2`
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
1. Get a KEY and SECRET OAuth from [Instapaper](https://www.instapaper.com/main/request_oauth_consumer_token)
|
||||||
|
2. Modify the file `Credentials.ini` with your KEY, SECRET, LOGIN and PASSWORD
|
||||||
|
3. Call the app `python downloader.py`
|
||||||
|
|
||||||
|
You can import then these Markdown files in your favourite Notes application e.g. [Bear](http://www.bear-writer.com/)
|
||||||
|
|
||||||
|
## Credits
|
||||||
|
|
||||||
|
Highlights Downloader makes use of a modified version of the [pyinstapaper library](https://github.com/mdorn/pyinstapaper) Python wrapper to the Instapaper API; thanks [Matt Dorn](https://github.com/mdorn)!
|
||||||
126
downloader.py
Normal file
126
downloader.py
Normal file
@@ -0,0 +1,126 @@
|
|||||||
|
import datetime
|
||||||
|
import os
|
||||||
|
import ConfigParser
|
||||||
|
|
||||||
|
from instapaper import Instapaper, Folder
|
||||||
|
|
||||||
|
# Init instapaper with key, secret, login and password
|
||||||
|
def init():
|
||||||
|
# Read credentials from Credentials.ini file
|
||||||
|
configParser = ConfigParser.RawConfigParser()
|
||||||
|
configParser.read('Credentials.ini')
|
||||||
|
|
||||||
|
key = configParser.get('Instapaper', 'INSTAPAPER_KEY')
|
||||||
|
secret = configParser.get('Instapaper', 'INSTAPAPER_SECRET')
|
||||||
|
login = configParser.get('Login', 'INSTAPAPER_LOGIN')
|
||||||
|
password = configParser.get('Login', 'INSTAPAPER_PASSWORD')
|
||||||
|
|
||||||
|
print key, secret, login, password
|
||||||
|
|
||||||
|
# Create instance of Instapaper using the OAth credentials
|
||||||
|
instapaper = Instapaper(key, secret)
|
||||||
|
|
||||||
|
# Login with user and password
|
||||||
|
instapaper.login(login, password)
|
||||||
|
|
||||||
|
return instapaper
|
||||||
|
|
||||||
|
# Function to change to highlights folder
|
||||||
|
def change_to_highlights_folder():
|
||||||
|
# If there is no folder in the system with highlights then create it
|
||||||
|
if not os.path.exists('highlights'):
|
||||||
|
os.makedirs('highlights')
|
||||||
|
|
||||||
|
# Change to the folder
|
||||||
|
os.chdir('highlights')
|
||||||
|
|
||||||
|
# Change to folder using the folder_id
|
||||||
|
def change_to_folder(folder_id):
|
||||||
|
# Folder name = its folder_id
|
||||||
|
folder = str(folder_id)
|
||||||
|
|
||||||
|
# If there is no folder in the system for this folder_id then create it
|
||||||
|
if not os.path.exists(folder):
|
||||||
|
os.makedirs(folder)
|
||||||
|
|
||||||
|
# Change to the folder
|
||||||
|
os.chdir(folder)
|
||||||
|
|
||||||
|
def get_list_of_existing_highlights():
|
||||||
|
existing = []
|
||||||
|
|
||||||
|
# Get all .md files in current directory
|
||||||
|
for file in os.listdir('.'):
|
||||||
|
if file.endswith('.md'):
|
||||||
|
existing.append(int(os.path.splitext(file)[0]))
|
||||||
|
|
||||||
|
return existing
|
||||||
|
|
||||||
|
# Process bookmarks in one folder
|
||||||
|
def process_folder(folder):
|
||||||
|
# Show id and title of the folder
|
||||||
|
print folder.folder_id, folder.title
|
||||||
|
|
||||||
|
change_to_folder(folder.folder_id)
|
||||||
|
|
||||||
|
existing = get_list_of_existing_highlights()
|
||||||
|
|
||||||
|
# Get bookmarks from the current folder using its folder_id
|
||||||
|
# TODO: identify which bookmarks have been processed and pass their ids
|
||||||
|
# as parameter to this function as 'have'. Current library does not allow
|
||||||
|
# this functionality, needs to be updated
|
||||||
|
bookmarks = instapaper.get_bookmarks(folder=folder.folder_id, have=existing, limit=500)
|
||||||
|
|
||||||
|
process_bookmarks(bookmarks)
|
||||||
|
|
||||||
|
# Process list of bookmarks
|
||||||
|
def process_bookmarks(bookmarks):
|
||||||
|
# Show number of bookmarks to be processed
|
||||||
|
print 'Number of bookmarks: ' + str(len(bookmarks))
|
||||||
|
|
||||||
|
for bookmark in bookmarks:
|
||||||
|
process_bookmark(bookmark)
|
||||||
|
|
||||||
|
# Process the highlights of one bookmark
|
||||||
|
def process_bookmark(bookmark):
|
||||||
|
# Get the highlights
|
||||||
|
highlights = bookmark.get_highlights()
|
||||||
|
|
||||||
|
# If there is any highlight
|
||||||
|
if len(highlights) > 0:
|
||||||
|
# Check if the bookmark has been already processed
|
||||||
|
# TODO: we would not need this if the 'have' parameter is used
|
||||||
|
# in the call to get_bookmarks
|
||||||
|
if not os.path.exists(str(bookmark.bookmark_id) + ".md"):
|
||||||
|
# Show that we have found a new bookmark with highlights
|
||||||
|
print "New highlight file: " + str(bookmark.bookmark_id) + ".md"
|
||||||
|
|
||||||
|
# Create the file
|
||||||
|
new_file = open(str(bookmark.bookmark_id) + ".md", "w")
|
||||||
|
|
||||||
|
# Add the title and reference url
|
||||||
|
new_file.write('# ' + bookmark.title.encode('utf-8') + '\n')
|
||||||
|
new_file.write('[Reference]' + '(' + bookmark.url.encode('utf-8') + ')\n\n')
|
||||||
|
|
||||||
|
# Write each highlight to the file, adding a line between them
|
||||||
|
for highlight in highlights:
|
||||||
|
new_file.write(highlight.text.encode('utf-8') + '\n\n')
|
||||||
|
new_file.close()
|
||||||
|
|
||||||
|
# ----------------------------------
|
||||||
|
# Init Instapaper
|
||||||
|
instapaper = init()
|
||||||
|
|
||||||
|
# Change to highlights folder
|
||||||
|
change_to_highlights_folder()
|
||||||
|
|
||||||
|
# Get all folders
|
||||||
|
folders = instapaper.get_folders()
|
||||||
|
|
||||||
|
# Process each folder
|
||||||
|
for folder in folders:
|
||||||
|
|
||||||
|
process_folder(folder)
|
||||||
|
|
||||||
|
# Change to the root folder
|
||||||
|
os.chdir('..')
|
||||||
316
instapaper.py
Executable file
316
instapaper.py
Executable file
@@ -0,0 +1,316 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
import urlparse
|
||||||
|
from urllib import urlencode
|
||||||
|
|
||||||
|
import oauth2 as oauth
|
||||||
|
|
||||||
|
BASE_URL = 'https://www.instapaper.com'
|
||||||
|
API_VERSION = '1'
|
||||||
|
ACCESS_TOKEN = 'oauth/access_token'
|
||||||
|
LOGIN_URL = 'https://www.instapaper.com/user/login'
|
||||||
|
REQUEST_DELAY_SECS = 0.5
|
||||||
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class Instapaper(object):
|
||||||
|
'''Instapaper client class.
|
||||||
|
|
||||||
|
:param oauth_key str: Instapaper OAuth consumer key
|
||||||
|
:param oauth_secret str: Instapaper OAuth consumer secret
|
||||||
|
'''
|
||||||
|
|
||||||
|
def __init__(self, oauth_key, oauth_secret):
|
||||||
|
self.consumer = oauth.Consumer(oauth_key, oauth_secret)
|
||||||
|
self.oauth_client = oauth.Client(self.consumer)
|
||||||
|
self.token = None
|
||||||
|
|
||||||
|
def login(self, username, password):
|
||||||
|
'''Authenticate using XAuth variant of OAuth.
|
||||||
|
|
||||||
|
:param str username: Username or email address for the relevant account
|
||||||
|
:param str password: Password for the account
|
||||||
|
'''
|
||||||
|
response = self.request(
|
||||||
|
ACCESS_TOKEN,
|
||||||
|
{
|
||||||
|
'x_auth_mode': 'client_auth',
|
||||||
|
'x_auth_username': username,
|
||||||
|
'x_auth_password': password
|
||||||
|
},
|
||||||
|
returns_json=False
|
||||||
|
)
|
||||||
|
token = dict(urlparse.parse_qsl(response['data']))
|
||||||
|
self.token = oauth.Token(
|
||||||
|
token['oauth_token'], token['oauth_token_secret'])
|
||||||
|
self.oauth_client = oauth.Client(self.consumer, self.token)
|
||||||
|
|
||||||
|
def request(self, path, params=None, returns_json=True,
|
||||||
|
method='POST', api_version=API_VERSION):
|
||||||
|
'''Process a request using the OAuth client's request method.
|
||||||
|
|
||||||
|
:param str path: Path fragment to the API endpoint, e.g. "resource/ID"
|
||||||
|
:param dict params: Parameters to pass to request
|
||||||
|
:param str method: Optional HTTP method, normally POST for Instapaper
|
||||||
|
:param str api_version: Optional alternative API version
|
||||||
|
:returns: response headers and body
|
||||||
|
:retval: dict
|
||||||
|
'''
|
||||||
|
time.sleep(REQUEST_DELAY_SECS)
|
||||||
|
full_path = '/'.join([BASE_URL, 'api/%s' % api_version, path])
|
||||||
|
params = urlencode(params) if params else None
|
||||||
|
log.debug('URL: %s', full_path)
|
||||||
|
request_kwargs = {'method': method}
|
||||||
|
if params:
|
||||||
|
request_kwargs['body'] = params
|
||||||
|
response, content = self.oauth_client.request(
|
||||||
|
full_path, **request_kwargs)
|
||||||
|
log.debug('CONTENT: %s ...', content[:50])
|
||||||
|
if returns_json:
|
||||||
|
try:
|
||||||
|
data = json.loads(content)
|
||||||
|
if isinstance(data, list) and len(data) == 1:
|
||||||
|
# ugly -- API always returns a list even when you expect
|
||||||
|
# only one item
|
||||||
|
if data[0]['type'] == 'error':
|
||||||
|
raise Exception('Instapaper error %d: %s' % (
|
||||||
|
data[0]['error_code'],
|
||||||
|
data[0]['message'])
|
||||||
|
)
|
||||||
|
# TODO: PyInstapaperException custom class?
|
||||||
|
except ValueError:
|
||||||
|
# Instapaper API can be unpredictable/inconsistent, e.g.
|
||||||
|
# bookmarks/get_text doesn't return JSON
|
||||||
|
data = content
|
||||||
|
else:
|
||||||
|
data = content
|
||||||
|
return {
|
||||||
|
'response': response,
|
||||||
|
'data': data
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_bookmarks(self, folder='unread', limit=10, have=[]):
|
||||||
|
"""Return list of user's bookmarks.
|
||||||
|
|
||||||
|
:param str folder: Optional. Possible values are unread (default),
|
||||||
|
starred, archive, or a folder_id value.
|
||||||
|
:param int limit: Optional. A number between 1 and 500, default 25.
|
||||||
|
:returns: List of user's bookmarks
|
||||||
|
:rtype: list
|
||||||
|
"""
|
||||||
|
path = 'bookmarks/list'
|
||||||
|
response = self.request(path, {'folder_id': folder, 'limit': limit, 'have': have})
|
||||||
|
items = response['data']
|
||||||
|
bookmarks = []
|
||||||
|
for item in items:
|
||||||
|
if item.get('type') == 'error':
|
||||||
|
raise Exception(item.get('message'))
|
||||||
|
elif item.get('type') == 'bookmark':
|
||||||
|
bookmarks.append(Bookmark(self, **item))
|
||||||
|
return bookmarks
|
||||||
|
|
||||||
|
def get_folders(self):
|
||||||
|
"""Return list of user's folders.
|
||||||
|
|
||||||
|
:rtype: list
|
||||||
|
"""
|
||||||
|
path = 'folders/list'
|
||||||
|
response = self.request(path)
|
||||||
|
items = response['data']
|
||||||
|
folders = []
|
||||||
|
for item in items:
|
||||||
|
if item.get('type') == 'error':
|
||||||
|
raise Exception(item.get('message'))
|
||||||
|
elif item.get('type') == 'folder':
|
||||||
|
folders.append(Folder(self, **item))
|
||||||
|
return folders
|
||||||
|
|
||||||
|
|
||||||
|
class InstapaperObject(object):
|
||||||
|
|
||||||
|
'''Base class for Instapaper objects like Bookmark.
|
||||||
|
|
||||||
|
:param client: instance of the OAuth client for making requests
|
||||||
|
:type client: ``oauth2.Client``
|
||||||
|
:param dict data: key/value pairs of object attributes, e.g. title, etc.
|
||||||
|
'''
|
||||||
|
|
||||||
|
def __init__(self, client, **data):
|
||||||
|
self.client = client
|
||||||
|
for attrib in self.ATTRIBUTES:
|
||||||
|
val = data.get(attrib)
|
||||||
|
if hasattr(self, 'TIMESTAMP_ATTRS'):
|
||||||
|
if attrib in self.TIMESTAMP_ATTRS:
|
||||||
|
try:
|
||||||
|
val = datetime.fromtimestamp(int(val))
|
||||||
|
except ValueError:
|
||||||
|
log.warn(
|
||||||
|
'Could not cast %s for %s as datetime',
|
||||||
|
val, attrib
|
||||||
|
)
|
||||||
|
setattr(self, attrib, val)
|
||||||
|
self.object_id = getattr(self, self.RESOURCE_ID_ATTRIBUTE)
|
||||||
|
for action in self.SIMPLE_ACTIONS:
|
||||||
|
setattr(self, action, lambda x: self._simple_action(x))
|
||||||
|
instance_method = getattr(self, action)
|
||||||
|
instance_method.func_defaults = (action,)
|
||||||
|
|
||||||
|
def add(self):
|
||||||
|
'''Save an object to Instapaper after instantiating it.
|
||||||
|
|
||||||
|
Example::
|
||||||
|
|
||||||
|
folder = Folder(instapaper, title='stuff')
|
||||||
|
result = folder.add()
|
||||||
|
'''
|
||||||
|
# TODO validation per object type
|
||||||
|
submit_attribs = {}
|
||||||
|
for attrib in self.ATTRIBUTES:
|
||||||
|
val = getattr(self, attrib, None)
|
||||||
|
if val:
|
||||||
|
submit_attribs[attrib] = val
|
||||||
|
path = '/'.join([self.RESOURCE, 'add'])
|
||||||
|
result = self.client.request(path, submit_attribs)
|
||||||
|
return result
|
||||||
|
|
||||||
|
def _simple_action(self, action=None):
|
||||||
|
'''Issue a request for an API method whose only param is the obj ID.
|
||||||
|
|
||||||
|
:param str action: The name of the action for the resource
|
||||||
|
:returns: Response from the API
|
||||||
|
:rtype: dict
|
||||||
|
'''
|
||||||
|
if not action:
|
||||||
|
raise Exception('No simple action defined')
|
||||||
|
path = "/".join([self.RESOURCE, action])
|
||||||
|
response = self.client.request(
|
||||||
|
path, {self.RESOURCE_ID_ATTRIBUTE: self.object_id}
|
||||||
|
)
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
class Bookmark(InstapaperObject):
|
||||||
|
|
||||||
|
'''Object representing an Instapaper bookmark/article.'''
|
||||||
|
|
||||||
|
RESOURCE = 'bookmarks'
|
||||||
|
RESOURCE_ID_ATTRIBUTE = 'bookmark_id'
|
||||||
|
# TODO: identify which fields to convert from timestamp to Python datetime
|
||||||
|
ATTRIBUTES = [
|
||||||
|
'bookmark_id',
|
||||||
|
'title',
|
||||||
|
'description',
|
||||||
|
'hash',
|
||||||
|
'url',
|
||||||
|
'progress_timestamp',
|
||||||
|
'time',
|
||||||
|
'progress',
|
||||||
|
'starred',
|
||||||
|
'type',
|
||||||
|
'private_source'
|
||||||
|
]
|
||||||
|
TIMESTAMP_ATTRS = [
|
||||||
|
'progress_timestamp',
|
||||||
|
'time'
|
||||||
|
]
|
||||||
|
SIMPLE_ACTIONS = [
|
||||||
|
'delete',
|
||||||
|
'star',
|
||||||
|
'archive',
|
||||||
|
'unarchive',
|
||||||
|
'get_text'
|
||||||
|
]
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return 'Bookmark %s: %s' % (self.object_id, self.title.encode('utf-8'))
|
||||||
|
|
||||||
|
def get_highlights(self):
|
||||||
|
'''Get highlights for Bookmark instance.
|
||||||
|
|
||||||
|
:return: list of ``Highlight`` objects
|
||||||
|
:rtype: list
|
||||||
|
'''
|
||||||
|
# NOTE: all Instapaper API methods use POST except this one!
|
||||||
|
path = '/'.join([self.RESOURCE, str(self.object_id), 'highlights'])
|
||||||
|
response = self.client.request(path, method='GET', api_version='1.1')
|
||||||
|
items = response['data']
|
||||||
|
highlights = []
|
||||||
|
for item in items:
|
||||||
|
if item.get('type') == 'error':
|
||||||
|
raise Exception(item.get('message'))
|
||||||
|
elif item.get('type') == 'highlight':
|
||||||
|
highlights.append(Highlight(self, **item))
|
||||||
|
return highlights
|
||||||
|
|
||||||
|
|
||||||
|
class Folder(InstapaperObject):
|
||||||
|
|
||||||
|
'''Object representing an Instapaper folder.'''
|
||||||
|
|
||||||
|
RESOURCE = 'folders'
|
||||||
|
RESOURCE_ID_ATTRIBUTE = 'folder_id'
|
||||||
|
ATTRIBUTES = [
|
||||||
|
'folder_id',
|
||||||
|
'title',
|
||||||
|
'display_title',
|
||||||
|
'sync_to_mobile',
|
||||||
|
'folder_id',
|
||||||
|
'position',
|
||||||
|
'type',
|
||||||
|
'slug',
|
||||||
|
]
|
||||||
|
SIMPLE_ACTIONS = [
|
||||||
|
'delete',
|
||||||
|
]
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return 'Folder %s: %s' % (self.object_id, self.title)
|
||||||
|
|
||||||
|
def set_order(self, folder_ids):
|
||||||
|
"""Order the user's folders
|
||||||
|
|
||||||
|
:param list folders: List of folder IDs in the desired order.
|
||||||
|
:returns: List Folder objects in the new order.
|
||||||
|
:rtype: list
|
||||||
|
"""
|
||||||
|
# TODO
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
|
||||||
|
class Highlight(InstapaperObject):
|
||||||
|
|
||||||
|
'''Object representing an Instapaper highlight.'''
|
||||||
|
|
||||||
|
RESOURCE = 'highlights'
|
||||||
|
RESOURCE_ID_ATTRIBUTE = 'highlight_id'
|
||||||
|
|
||||||
|
ATTRIBUTES = [
|
||||||
|
'highlight_id',
|
||||||
|
'text',
|
||||||
|
'note',
|
||||||
|
'time',
|
||||||
|
'position',
|
||||||
|
'bookmark_id',
|
||||||
|
'type',
|
||||||
|
'slug',
|
||||||
|
]
|
||||||
|
TIMESTAMP_ATTRS = [
|
||||||
|
'time',
|
||||||
|
]
|
||||||
|
SIMPLE_ACTIONS = [
|
||||||
|
'delete',
|
||||||
|
]
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return 'Highlight %s for Article %s' % (
|
||||||
|
self.object_id, self.bookmark_id)
|
||||||
|
|
||||||
|
def create(self):
|
||||||
|
# TODO
|
||||||
|
raise NotImplementedError
|
||||||
Reference in New Issue
Block a user