mirror of
https://github.com/nikdoof/highlightsdownloader.git
synced 2025-12-13 01:32:23 +00:00
First commit
This commit is contained in:
24
README.md
Normal file
24
README.md
Normal file
@@ -0,0 +1,24 @@
|
||||
# Highlights Downloader
|
||||
|
||||
Simple application which downloads all the highlights stored in your Instapaper folders as Markdown files.
|
||||
|
||||
The downloader creates a tree of folders inside `highlights`. There will be one folder for each one you have in Instapaper. The folder name will be the id of the folder in Instapaper.
|
||||
|
||||
Inside every folder the downloader will create a Markdown file including the title of the bookmark, url reference and all the highlights (one paragraph per each of them).
|
||||
|
||||
## Requirements
|
||||
Install the following packages:
|
||||
- `pip install httplib2`
|
||||
- `pip install oauth2`
|
||||
|
||||
## Usage
|
||||
|
||||
1. Get a KEY and SECRET OAuth from [Instapaper](https://www.instapaper.com/main/request_oauth_consumer_token)
|
||||
2. Modify the file `Credentials.ini` with your KEY, SECRET, LOGIN and PASSWORD
|
||||
3. Call the app `python downloader.py`
|
||||
|
||||
You can import then these Markdown files in your favourite Notes application e.g. [Bear](http://www.bear-writer.com/)
|
||||
|
||||
## Credits
|
||||
|
||||
Highlights Downloader makes use of a modified version of the [pyinstapaper library](https://github.com/mdorn/pyinstapaper) Python wrapper to the Instapaper API; thanks [Matt Dorn](https://github.com/mdorn)!
|
||||
126
downloader.py
Normal file
126
downloader.py
Normal file
@@ -0,0 +1,126 @@
|
||||
import datetime
|
||||
import os
|
||||
import ConfigParser
|
||||
|
||||
from instapaper import Instapaper, Folder
|
||||
|
||||
# Init instapaper with key, secret, login and password
|
||||
def init():
|
||||
# Read credentials from Credentials.ini file
|
||||
configParser = ConfigParser.RawConfigParser()
|
||||
configParser.read('Credentials.ini')
|
||||
|
||||
key = configParser.get('Instapaper', 'INSTAPAPER_KEY')
|
||||
secret = configParser.get('Instapaper', 'INSTAPAPER_SECRET')
|
||||
login = configParser.get('Login', 'INSTAPAPER_LOGIN')
|
||||
password = configParser.get('Login', 'INSTAPAPER_PASSWORD')
|
||||
|
||||
print key, secret, login, password
|
||||
|
||||
# Create instance of Instapaper using the OAth credentials
|
||||
instapaper = Instapaper(key, secret)
|
||||
|
||||
# Login with user and password
|
||||
instapaper.login(login, password)
|
||||
|
||||
return instapaper
|
||||
|
||||
# Function to change to highlights folder
|
||||
def change_to_highlights_folder():
|
||||
# If there is no folder in the system with highlights then create it
|
||||
if not os.path.exists('highlights'):
|
||||
os.makedirs('highlights')
|
||||
|
||||
# Change to the folder
|
||||
os.chdir('highlights')
|
||||
|
||||
# Change to folder using the folder_id
|
||||
def change_to_folder(folder_id):
|
||||
# Folder name = its folder_id
|
||||
folder = str(folder_id)
|
||||
|
||||
# If there is no folder in the system for this folder_id then create it
|
||||
if not os.path.exists(folder):
|
||||
os.makedirs(folder)
|
||||
|
||||
# Change to the folder
|
||||
os.chdir(folder)
|
||||
|
||||
def get_list_of_existing_highlights():
|
||||
existing = []
|
||||
|
||||
# Get all .md files in current directory
|
||||
for file in os.listdir('.'):
|
||||
if file.endswith('.md'):
|
||||
existing.append(int(os.path.splitext(file)[0]))
|
||||
|
||||
return existing
|
||||
|
||||
# Process bookmarks in one folder
|
||||
def process_folder(folder):
|
||||
# Show id and title of the folder
|
||||
print folder.folder_id, folder.title
|
||||
|
||||
change_to_folder(folder.folder_id)
|
||||
|
||||
existing = get_list_of_existing_highlights()
|
||||
|
||||
# Get bookmarks from the current folder using its folder_id
|
||||
# TODO: identify which bookmarks have been processed and pass their ids
|
||||
# as parameter to this function as 'have'. Current library does not allow
|
||||
# this functionality, needs to be updated
|
||||
bookmarks = instapaper.get_bookmarks(folder=folder.folder_id, have=existing, limit=500)
|
||||
|
||||
process_bookmarks(bookmarks)
|
||||
|
||||
# Process list of bookmarks
|
||||
def process_bookmarks(bookmarks):
|
||||
# Show number of bookmarks to be processed
|
||||
print 'Number of bookmarks: ' + str(len(bookmarks))
|
||||
|
||||
for bookmark in bookmarks:
|
||||
process_bookmark(bookmark)
|
||||
|
||||
# Process the highlights of one bookmark
|
||||
def process_bookmark(bookmark):
|
||||
# Get the highlights
|
||||
highlights = bookmark.get_highlights()
|
||||
|
||||
# If there is any highlight
|
||||
if len(highlights) > 0:
|
||||
# Check if the bookmark has been already processed
|
||||
# TODO: we would not need this if the 'have' parameter is used
|
||||
# in the call to get_bookmarks
|
||||
if not os.path.exists(str(bookmark.bookmark_id) + ".md"):
|
||||
# Show that we have found a new bookmark with highlights
|
||||
print "New highlight file: " + str(bookmark.bookmark_id) + ".md"
|
||||
|
||||
# Create the file
|
||||
new_file = open(str(bookmark.bookmark_id) + ".md", "w")
|
||||
|
||||
# Add the title and reference url
|
||||
new_file.write('# ' + bookmark.title.encode('utf-8') + '\n')
|
||||
new_file.write('[Reference]' + '(' + bookmark.url.encode('utf-8') + ')\n\n')
|
||||
|
||||
# Write each highlight to the file, adding a line between them
|
||||
for highlight in highlights:
|
||||
new_file.write(highlight.text.encode('utf-8') + '\n\n')
|
||||
new_file.close()
|
||||
|
||||
# ----------------------------------
|
||||
# Init Instapaper
|
||||
instapaper = init()
|
||||
|
||||
# Change to highlights folder
|
||||
change_to_highlights_folder()
|
||||
|
||||
# Get all folders
|
||||
folders = instapaper.get_folders()
|
||||
|
||||
# Process each folder
|
||||
for folder in folders:
|
||||
|
||||
process_folder(folder)
|
||||
|
||||
# Change to the root folder
|
||||
os.chdir('..')
|
||||
316
instapaper.py
Executable file
316
instapaper.py
Executable file
@@ -0,0 +1,316 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
import urlparse
|
||||
from urllib import urlencode
|
||||
|
||||
import oauth2 as oauth
|
||||
|
||||
BASE_URL = 'https://www.instapaper.com'
|
||||
API_VERSION = '1'
|
||||
ACCESS_TOKEN = 'oauth/access_token'
|
||||
LOGIN_URL = 'https://www.instapaper.com/user/login'
|
||||
REQUEST_DELAY_SECS = 0.5
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Instapaper(object):
|
||||
'''Instapaper client class.
|
||||
|
||||
:param oauth_key str: Instapaper OAuth consumer key
|
||||
:param oauth_secret str: Instapaper OAuth consumer secret
|
||||
'''
|
||||
|
||||
def __init__(self, oauth_key, oauth_secret):
|
||||
self.consumer = oauth.Consumer(oauth_key, oauth_secret)
|
||||
self.oauth_client = oauth.Client(self.consumer)
|
||||
self.token = None
|
||||
|
||||
def login(self, username, password):
|
||||
'''Authenticate using XAuth variant of OAuth.
|
||||
|
||||
:param str username: Username or email address for the relevant account
|
||||
:param str password: Password for the account
|
||||
'''
|
||||
response = self.request(
|
||||
ACCESS_TOKEN,
|
||||
{
|
||||
'x_auth_mode': 'client_auth',
|
||||
'x_auth_username': username,
|
||||
'x_auth_password': password
|
||||
},
|
||||
returns_json=False
|
||||
)
|
||||
token = dict(urlparse.parse_qsl(response['data']))
|
||||
self.token = oauth.Token(
|
||||
token['oauth_token'], token['oauth_token_secret'])
|
||||
self.oauth_client = oauth.Client(self.consumer, self.token)
|
||||
|
||||
def request(self, path, params=None, returns_json=True,
|
||||
method='POST', api_version=API_VERSION):
|
||||
'''Process a request using the OAuth client's request method.
|
||||
|
||||
:param str path: Path fragment to the API endpoint, e.g. "resource/ID"
|
||||
:param dict params: Parameters to pass to request
|
||||
:param str method: Optional HTTP method, normally POST for Instapaper
|
||||
:param str api_version: Optional alternative API version
|
||||
:returns: response headers and body
|
||||
:retval: dict
|
||||
'''
|
||||
time.sleep(REQUEST_DELAY_SECS)
|
||||
full_path = '/'.join([BASE_URL, 'api/%s' % api_version, path])
|
||||
params = urlencode(params) if params else None
|
||||
log.debug('URL: %s', full_path)
|
||||
request_kwargs = {'method': method}
|
||||
if params:
|
||||
request_kwargs['body'] = params
|
||||
response, content = self.oauth_client.request(
|
||||
full_path, **request_kwargs)
|
||||
log.debug('CONTENT: %s ...', content[:50])
|
||||
if returns_json:
|
||||
try:
|
||||
data = json.loads(content)
|
||||
if isinstance(data, list) and len(data) == 1:
|
||||
# ugly -- API always returns a list even when you expect
|
||||
# only one item
|
||||
if data[0]['type'] == 'error':
|
||||
raise Exception('Instapaper error %d: %s' % (
|
||||
data[0]['error_code'],
|
||||
data[0]['message'])
|
||||
)
|
||||
# TODO: PyInstapaperException custom class?
|
||||
except ValueError:
|
||||
# Instapaper API can be unpredictable/inconsistent, e.g.
|
||||
# bookmarks/get_text doesn't return JSON
|
||||
data = content
|
||||
else:
|
||||
data = content
|
||||
return {
|
||||
'response': response,
|
||||
'data': data
|
||||
}
|
||||
|
||||
def get_bookmarks(self, folder='unread', limit=10, have=[]):
|
||||
"""Return list of user's bookmarks.
|
||||
|
||||
:param str folder: Optional. Possible values are unread (default),
|
||||
starred, archive, or a folder_id value.
|
||||
:param int limit: Optional. A number between 1 and 500, default 25.
|
||||
:returns: List of user's bookmarks
|
||||
:rtype: list
|
||||
"""
|
||||
path = 'bookmarks/list'
|
||||
response = self.request(path, {'folder_id': folder, 'limit': limit, 'have': have})
|
||||
items = response['data']
|
||||
bookmarks = []
|
||||
for item in items:
|
||||
if item.get('type') == 'error':
|
||||
raise Exception(item.get('message'))
|
||||
elif item.get('type') == 'bookmark':
|
||||
bookmarks.append(Bookmark(self, **item))
|
||||
return bookmarks
|
||||
|
||||
def get_folders(self):
|
||||
"""Return list of user's folders.
|
||||
|
||||
:rtype: list
|
||||
"""
|
||||
path = 'folders/list'
|
||||
response = self.request(path)
|
||||
items = response['data']
|
||||
folders = []
|
||||
for item in items:
|
||||
if item.get('type') == 'error':
|
||||
raise Exception(item.get('message'))
|
||||
elif item.get('type') == 'folder':
|
||||
folders.append(Folder(self, **item))
|
||||
return folders
|
||||
|
||||
|
||||
class InstapaperObject(object):
|
||||
|
||||
'''Base class for Instapaper objects like Bookmark.
|
||||
|
||||
:param client: instance of the OAuth client for making requests
|
||||
:type client: ``oauth2.Client``
|
||||
:param dict data: key/value pairs of object attributes, e.g. title, etc.
|
||||
'''
|
||||
|
||||
def __init__(self, client, **data):
|
||||
self.client = client
|
||||
for attrib in self.ATTRIBUTES:
|
||||
val = data.get(attrib)
|
||||
if hasattr(self, 'TIMESTAMP_ATTRS'):
|
||||
if attrib in self.TIMESTAMP_ATTRS:
|
||||
try:
|
||||
val = datetime.fromtimestamp(int(val))
|
||||
except ValueError:
|
||||
log.warn(
|
||||
'Could not cast %s for %s as datetime',
|
||||
val, attrib
|
||||
)
|
||||
setattr(self, attrib, val)
|
||||
self.object_id = getattr(self, self.RESOURCE_ID_ATTRIBUTE)
|
||||
for action in self.SIMPLE_ACTIONS:
|
||||
setattr(self, action, lambda x: self._simple_action(x))
|
||||
instance_method = getattr(self, action)
|
||||
instance_method.func_defaults = (action,)
|
||||
|
||||
def add(self):
|
||||
'''Save an object to Instapaper after instantiating it.
|
||||
|
||||
Example::
|
||||
|
||||
folder = Folder(instapaper, title='stuff')
|
||||
result = folder.add()
|
||||
'''
|
||||
# TODO validation per object type
|
||||
submit_attribs = {}
|
||||
for attrib in self.ATTRIBUTES:
|
||||
val = getattr(self, attrib, None)
|
||||
if val:
|
||||
submit_attribs[attrib] = val
|
||||
path = '/'.join([self.RESOURCE, 'add'])
|
||||
result = self.client.request(path, submit_attribs)
|
||||
return result
|
||||
|
||||
def _simple_action(self, action=None):
|
||||
'''Issue a request for an API method whose only param is the obj ID.
|
||||
|
||||
:param str action: The name of the action for the resource
|
||||
:returns: Response from the API
|
||||
:rtype: dict
|
||||
'''
|
||||
if not action:
|
||||
raise Exception('No simple action defined')
|
||||
path = "/".join([self.RESOURCE, action])
|
||||
response = self.client.request(
|
||||
path, {self.RESOURCE_ID_ATTRIBUTE: self.object_id}
|
||||
)
|
||||
return response
|
||||
|
||||
|
||||
class Bookmark(InstapaperObject):
|
||||
|
||||
'''Object representing an Instapaper bookmark/article.'''
|
||||
|
||||
RESOURCE = 'bookmarks'
|
||||
RESOURCE_ID_ATTRIBUTE = 'bookmark_id'
|
||||
# TODO: identify which fields to convert from timestamp to Python datetime
|
||||
ATTRIBUTES = [
|
||||
'bookmark_id',
|
||||
'title',
|
||||
'description',
|
||||
'hash',
|
||||
'url',
|
||||
'progress_timestamp',
|
||||
'time',
|
||||
'progress',
|
||||
'starred',
|
||||
'type',
|
||||
'private_source'
|
||||
]
|
||||
TIMESTAMP_ATTRS = [
|
||||
'progress_timestamp',
|
||||
'time'
|
||||
]
|
||||
SIMPLE_ACTIONS = [
|
||||
'delete',
|
||||
'star',
|
||||
'archive',
|
||||
'unarchive',
|
||||
'get_text'
|
||||
]
|
||||
|
||||
def __str__(self):
|
||||
return 'Bookmark %s: %s' % (self.object_id, self.title.encode('utf-8'))
|
||||
|
||||
def get_highlights(self):
|
||||
'''Get highlights for Bookmark instance.
|
||||
|
||||
:return: list of ``Highlight`` objects
|
||||
:rtype: list
|
||||
'''
|
||||
# NOTE: all Instapaper API methods use POST except this one!
|
||||
path = '/'.join([self.RESOURCE, str(self.object_id), 'highlights'])
|
||||
response = self.client.request(path, method='GET', api_version='1.1')
|
||||
items = response['data']
|
||||
highlights = []
|
||||
for item in items:
|
||||
if item.get('type') == 'error':
|
||||
raise Exception(item.get('message'))
|
||||
elif item.get('type') == 'highlight':
|
||||
highlights.append(Highlight(self, **item))
|
||||
return highlights
|
||||
|
||||
|
||||
class Folder(InstapaperObject):
|
||||
|
||||
'''Object representing an Instapaper folder.'''
|
||||
|
||||
RESOURCE = 'folders'
|
||||
RESOURCE_ID_ATTRIBUTE = 'folder_id'
|
||||
ATTRIBUTES = [
|
||||
'folder_id',
|
||||
'title',
|
||||
'display_title',
|
||||
'sync_to_mobile',
|
||||
'folder_id',
|
||||
'position',
|
||||
'type',
|
||||
'slug',
|
||||
]
|
||||
SIMPLE_ACTIONS = [
|
||||
'delete',
|
||||
]
|
||||
|
||||
def __str__(self):
|
||||
return 'Folder %s: %s' % (self.object_id, self.title)
|
||||
|
||||
def set_order(self, folder_ids):
|
||||
"""Order the user's folders
|
||||
|
||||
:param list folders: List of folder IDs in the desired order.
|
||||
:returns: List Folder objects in the new order.
|
||||
:rtype: list
|
||||
"""
|
||||
# TODO
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class Highlight(InstapaperObject):
|
||||
|
||||
'''Object representing an Instapaper highlight.'''
|
||||
|
||||
RESOURCE = 'highlights'
|
||||
RESOURCE_ID_ATTRIBUTE = 'highlight_id'
|
||||
|
||||
ATTRIBUTES = [
|
||||
'highlight_id',
|
||||
'text',
|
||||
'note',
|
||||
'time',
|
||||
'position',
|
||||
'bookmark_id',
|
||||
'type',
|
||||
'slug',
|
||||
]
|
||||
TIMESTAMP_ATTRS = [
|
||||
'time',
|
||||
]
|
||||
SIMPLE_ACTIONS = [
|
||||
'delete',
|
||||
]
|
||||
|
||||
def __str__(self):
|
||||
return 'Highlight %s for Article %s' % (
|
||||
self.object_id, self.bookmark_id)
|
||||
|
||||
def create(self):
|
||||
# TODO
|
||||
raise NotImplementedError
|
||||
Reference in New Issue
Block a user