diff --git a/.travis.yml b/.travis.yml
index 3aad80ba..9f396576 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -20,9 +20,10 @@ addons:
     - ubuntu-toolchain-r-test
     - sourceline: 'deb http://downloads.skewed.de/apt/trusty trusty universe'
     packages:
+    - libqt5gui5  # pyqt5>5.11 otherwise cannot load the xcb platform plugin
+    - libflann-dev
     - python3-graph-tool
     - python-graph-tool
-    - libqt5gui5  # pyqt5>5.11 fails to load the xcb platform plugin without it
 
 install:
   - pip install --upgrade pip setuptools wheel  # install with latest tools
diff --git a/README.rst b/README.rst
index e179aa26..024d47ee 100644
--- a/README.rst
+++ b/README.rst
@@ -39,6 +39,15 @@ A (mostly unmaintained) `Matlab version <https://epfl-lts2.github.io/gspbox-html
 .. |conda| image:: https://anaconda.org/conda-forge/pygsp/badges/installer/conda.svg
    :target: https://anaconda.org/conda-forge/pygsp
 
+
+The PyGSP is a Python package to ease
+`Signal Processing on Graphs <https://arxiv.org/abs/1211.0053>`_.
+The documentation is available on
+`Read the Docs <https://pygsp.readthedocs.io>`_
+and development takes place on
+`GitHub <https://github.com/epfl-lts2/pygsp>`_.
+A (mostly unmaintained) `Matlab version <https://epfl-lts2.github.io/gspbox-html>`_ exists.
+
 The PyGSP facilitates a wide variety of operations on graphs, like computing
 their Fourier basis, filtering or interpolating signals, plotting graphs,
 signals, and filters. Its core is spectral graph theory, and many of the
diff --git a/doc/history.rst b/doc/history.rst
index 47f8f21c..4b46c05c 100644
--- a/doc/history.rst
+++ b/doc/history.rst
@@ -26,12 +26,17 @@ Unreleased
 * New implementation of the Sensor graph that is simpler and scales better.
 * A new learning module with three functions to solve standard semi-supervised
   classification and regression problems.
+* A much improved, fixed, documented, and tested NNGraph. The user can now
+  select the backend and similarity kernel. The radius can be estimated and
+  features standardized. (PR #43)
 * Import and export graphs and their signals to NetworkX and graph-tool.
 * Save and load graphs and theirs signals to / from GraphML, GML, and GEXF.
 * Documentation: path graph linked to DCT, ring graph linked to DFT.
 * We now have a gallery of examples! That is convenient for users to get a
   taste of what the library can do, and to start working from a code snippet.
 * Merged all the extra requirements in a single dev requirement.
+* A function to learn the graph from the signal has been added with doc and tests. 
+  This function can be used simply using the graph ``LearnGraph``
 
 Experimental filter API (to be tested and validated):
 
diff --git a/doc/references.bib b/doc/references.bib
index de0ade43..70603173 100644
--- a/doc/references.bib
+++ b/doc/references.bib
@@ -220,3 +220,20 @@ @article{grassi2018timevertex
   year={2018},
   journal={IEEE Transactions on Signal Processing},
 }
+
+@inproceedings{
+        kalofolias2018large,
+        title={Large Scale Graph Learning From Smooth Signals},
+        author={Vassilis Kalofolias and Nathanaël Perraudin},
+        booktitle={International Conference on Learning Representations},
+        year={2019},
+        url={https://openreview.net/forum?id=ryGkSo0qYm},
+        }
+        
+@inproceedings{kalofolias2016learn,
+  title={How to learn a graph from smooth signals},
+  author={Kalofolias, Vassilis},
+  booktitle={Artificial Intelligence and Statistics},
+  pages={920--929},
+  year={2016}
+}
diff --git a/doc/tutorials/graph_learning.py b/doc/tutorials/graph_learning.py
new file mode 100755
index 00000000..a0f28b97
--- /dev/null
+++ b/doc/tutorials/graph_learning.py
@@ -0,0 +1,139 @@
+import numpy as np
+import pygsp
+from matplotlib import pyplot as plt
+
+#%%
+n_signals = 4
+def f1(x, y): return (-np.sin((2-x-y)**2)/2 + np.cos(y*3))/2
+def f2(x, y): return np.cos((x+y)**2);
+def f3(x, y): return ((x-.5)**2 + (y-.5)**3 + x - y)*3;
+def f4(x, y): return np.sin(3*((x-.5)**2+(y-.5)**2));
+
+
+
+#%%
+n_nodes = 300
+coords = np.random.uniform(size=(n_nodes, 2))
+
+# the vectors of smooth signals will be stored as columns of X
+X = np.zeros((n_nodes, n_signals))
+for i, f in enumerate((f1, f2, f3, f4)):
+    X[:, i] = f(coords[:, 0], coords[:, 1])
+
+
+
+#%%
+plt.figure(figsize=(16, 10))
+for i, f in enumerate((f1, f2, f3, f4)):
+    plt.subplot(2, 2, i+1);
+    plt.scatter(coords[:, 0], coords[:, 1], 500, X[:, i], '.');
+    plt.title('smooth signal {}'.format(i+1));
+    plt.axis('off');
+    plt.colorbar();
+
+
+#%%
+
+k = 5
+param_opt = {'maxit':2000}
+
+G1 = pygsp.graphs.LearnedFromSmoothSignals(X[:, 0], coords=coords, k=k, param_opt=param_opt)
+G2 = pygsp.graphs.LearnedFromSmoothSignals(X[:, 1], coords=coords, k=k, param_opt=param_opt)
+G3 = pygsp.graphs.LearnedFromSmoothSignals(X[:, 2], coords=coords, k=k, param_opt=param_opt)
+G4 = pygsp.graphs.LearnedFromSmoothSignals(X[:, 3], coords=coords, k=k, param_opt=param_opt)
+
+#%%
+
+
+# G1.set_coordinates()
+plt.figure(figsize=(18, 16))
+for i, G in enumerate((G1, G2, G3, G4)):
+    _, _, weights = G.get_edge_list()
+    ax = plt.subplot(2, 2, i+1);
+    G.plot(vertex_size=50, edge_width=weights, ax=ax)
+    plt.xticks([])
+    plt.yticks([])
+    plt.title('n_vertices = {}, n_edges = {}, k = {:.1f}'.format(G.n_vertices, G.n_edges, G.n_edges * 2 / G.n_vertices))
+
+
+
+#%%
+
+G_all = pygsp.graphs.LearnedFromSmoothSignals(X, coords=coords, k=k, param_opt=param_opt)
+plt.figure(figsize=(8, 6))
+ax = plt.axes()
+G_all.plot(vertex_size=0, ax=ax)
+plt.xticks([]), plt.yticks([])
+plt.title('n_vertices = {}, n_edges = {}, k = {:.1f}'.format(G_all.n_vertices, G_all.n_edges, G_all.n_edges * 2 / G_all.n_vertices))
+
+
+
+#%%
+
+G_coords = pygsp.graphs.LearnedFromSmoothSignals(coords, coords=coords, k=k, param_opt=param_opt)
+plt.figure(figsize=(8, 6))
+ax = plt.axes()
+G_coords.plot(vertex_size=0, ax=ax, edges=True)
+
+
+
+
+#%%
+
+
+import scipy
+Z = scipy.spatial.distance_matrix(X, X)**2
+plt.imshow(Z)
+print(np.count_nonzero(Z))
+edge_mask = (Z < 1 / np.sqrt(n_nodes)).astype(np.int)
+np.fill_diagonal(edge_mask, 0)
+print(np.count_nonzero(edge_mask))
+edge_mask[:3,:3]
+
+
+
+#%%
+
+G_all = pygsp.graphs.LearnedFromSmoothSignals(X, coords=coords, k=k, 
+                                              param_opt=param_opt, 
+                                              sparse=True, edge_mask=edge_mask)
+plt.figure(figsize=(8, 6))
+ax = plt.axes()
+G_all.plot(vertex_size=0, ax=ax)
+plt.xticks([]), plt.yticks([])
+plt.title('n_vertices = {}, n_edges = {}, k = {:.1f}'.format(G_all.n_vertices, G_all.n_edges, G_all.n_edges * 2 / G_all.n_vertices))
+
+
+
+
+#%%
+
+
+
+
+
+
+#%%
+
+
+
+
+
+
+#%%
+
+
+
+
+
+
+#%%
+
+
+
+
+
+
+#%%
+
+
diff --git a/doc/tutorials/optimization.rst b/doc/tutorials/optimization.rst
index 7127a9c5..b3740d85 100644
--- a/doc/tutorials/optimization.rst
+++ b/doc/tutorials/optimization.rst
@@ -85,7 +85,7 @@ We start with the graph TV regularization. We will use the :class:`pyunlocbox.so
     >>> prob1 = pyunlocbox.solvers.solve([d, r, f], solver=solver,
     ...                                  x0=x0, rtol=0, maxit=1000)
     Solution found after 1000 iterations:
-        objective function f(sol) = 2.250584e+02
+        objective function f(sol) = 2.256055e+02
         stopping criterion: MAXIT
     >>>
     >>> fig, ax = G.plot(prob1['sol'])
@@ -107,7 +107,7 @@ This figure shows the label signal recovered by graph total variation regulariza
     >>> prob2 = pyunlocbox.solvers.solve([r, f], solver=solver,
     ...                                  x0=x0, rtol=0, maxit=1000)
     Solution found after 1000 iterations:
-        objective function f(sol) = 6.504290e+01
+        objective function f(sol) = 4.376481e+01
         stopping criterion: MAXIT
     >>>
     >>> fig, ax = G.plot(prob2['sol'])
diff --git a/playground.ipynb b/playground.ipynb
index b18ce9e2..33b82a0e 100644
--- a/playground.ipynb
+++ b/playground.ipynb
@@ -40,7 +40,7 @@
    "source": [
     "G = graphs.Logo()\n",
     "G.estimate_lmax()\n",
-    "g = filters.Heat(G, tau=100)"
+    "g = filters.Heat(G, scale=100)"
    ]
   },
   {
@@ -117,7 +117,25 @@
    ]
   }
  ],
- "metadata": {},
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.5"
+  }
+ },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
diff --git a/pygsp/_nearest_neighbor.py b/pygsp/_nearest_neighbor.py
new file mode 100644
index 00000000..eafb6ab2
--- /dev/null
+++ b/pygsp/_nearest_neighbor.py
@@ -0,0 +1,289 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import division
+
+import numpy as np
+from scipy import sparse, spatial
+from scipy.linalg import norm
+from pygsp import utils
+
+_logger = utils.build_logger(__name__)
+
+def _scipy_pdist(features, metric, order, kind, k, radius, params):
+    if params:
+        raise ValueError('unexpected parameters {}'.format(params))
+    metric = 'cityblock' if metric == 'manhattan' else metric
+    metric = 'chebyshev' if metric == 'max_dist' else metric
+    params = dict(metric=metric)
+    if metric == 'minkowski':
+        params['p'] = order
+    dist = spatial.distance.pdist(features, **params)
+    dist = spatial.distance.squareform(dist)
+    if kind == 'knn':
+        neighbors = np.argsort(dist)[:, :k+1]
+        distances = np.take_along_axis(dist, neighbors, axis=-1)
+    elif kind == 'radius':
+        distances = []
+        neighbors = []
+        for distance in dist:
+            neighbor = np.flatnonzero(distance < radius)
+            neighbors.append(neighbor)
+            distances.append(distance[neighbor])
+    return neighbors, distances
+
+
+def _scipy_kdtree(features, _, order, kind, k, radius, params):
+    if order is None:
+        raise ValueError('invalid metric for scipy-kdtree')
+    eps = params.pop('eps', 0)
+    tree = spatial.KDTree(features, **params)
+    params = dict(p=order, eps=eps)
+    if kind == 'knn':
+        params['k'] = k + 1
+    elif kind == 'radius':
+        params['k'] = None
+        params['distance_upper_bound'] = radius
+    distances, neighbors = tree.query(features, **params)
+    return neighbors, distances
+
+
+def _scipy_ckdtree(features, _, order, kind, k, radius, params):
+    if order is None:
+        raise ValueError('invalid metric for scipy-kdtree')
+    eps = params.pop('eps', 0)
+    tree = spatial.cKDTree(features, **params)
+    params = dict(p=order, eps=eps, n_jobs=-1)
+    if kind == 'knn':
+        params['k'] = k + 1
+    elif kind == 'radius':
+        params['k'] = features.shape[0]  # number of vertices
+        params['distance_upper_bound'] = radius
+    distances, neighbors = tree.query(features, **params)
+    if kind == 'knn':
+        return neighbors, distances
+    elif kind == 'radius':
+        dist = []
+        neigh = []
+        for distance, neighbor in zip(distances, neighbors):
+            mask = (distance != np.inf)
+            dist.append(distance[mask])
+            neigh.append(neighbor[mask])
+        return neigh, dist
+
+
+def _flann(features, metric, order, kind, k, radius, params):
+    if metric == 'max_dist':
+        raise ValueError('flann gives wrong results for metric="max_dist".')
+    try:
+        import cyflann as cfl
+    except Exception as e:
+        raise ImportError('Cannot import cyflann. Choose another nearest '
+                          'neighbors backend or try to install it with '
+                          'pip (or conda) install cyflann. '
+                          'Original exception: {}'.format(e))
+    cfl.set_distance_type(metric, order=order)
+    index = cfl.FLANNIndex()
+    index.build_index(features, **params)
+    # I tried changing the algorithm and testing performance on huge matrices,
+    # but the default parameters seems to work best.
+    if kind == 'knn':
+        neighbors, distances = index.nn_index(features, k+1)
+        if metric == 'euclidean':
+            np.sqrt(distances, out=distances)
+        elif metric == 'minkowski':
+            np.power(distances, 1/order, out=distances)
+    elif kind == 'radius':
+        distances = []
+        neighbors = []
+        if metric == 'euclidean':
+            radius = radius**2
+        elif metric == 'minkowski':
+            radius = radius**order
+        n_vertices, _ = features.shape
+        for vertex in range(n_vertices):
+            neighbor, distance = index.nn_radius(features[vertex, :], radius)
+            distances.append(distance)
+            neighbors.append(neighbor)
+        if metric == 'euclidean':
+            distances = list(map(np.sqrt, distances))
+        elif metric == 'minkowski':
+            distances = list(map(lambda d: np.power(d, 1/order), distances))
+    index.free_index()
+    return neighbors, distances
+
+
+def _nmslib(features, metric, order, kind, k, _, params):
+    if kind == 'radius':
+        raise ValueError('nmslib does not support kind="radius".')
+    if metric == 'minkowski':
+        raise ValueError('nmslib does not support metric="minkowski".')
+    try:
+        import nmslib as nms
+    except Exception as e:
+        raise ImportError('Cannot import nmslib. Choose another nearest '
+                          'neighbors backend or try to install it with '
+                          'pip (or conda) install nmslib. '
+                          'Original exception: {}'.format(e))
+    n_vertices, _ = features.shape
+    params_index = params.pop('index', None)
+    params_query = params.pop('query', None)
+    metric = 'l2' if metric == 'euclidean' else metric
+    metric = 'l1' if metric == 'manhattan' else metric
+    metric = 'linf' if metric == 'max_dist' else metric
+    index = nms.init(space=metric, **params)
+    index.addDataPointBatch(features)
+    index.createIndex(params_index)
+    if params_query is not None:
+        index.setQueryTimeParams(params_query)
+    results = index.knnQueryBatch(features, k=k+1)
+    neighbors, distances = zip(*results)
+    distances = np.concatenate(distances).reshape(n_vertices, k+1)
+    neighbors = np.concatenate(neighbors).reshape(n_vertices, k+1)
+    return neighbors, distances
+
+def nearest_neighbor(features, metric='euclidean', order=2, kind='knn', k=10, radius=None, backend='scipy-ckdtree', **kwargs):
+    '''Find nearest neighboors.
+    Inputs:
+        features [n, m]:
+            rows correspond to nodes, columns correspond to features/dimensions
+        k [default 10]: number of neighbors per node
+        
+    Outputs:
+        neighbors [n, k]:
+            list of nodes neighboring to node of each row
+        distances [n, k]:
+            distance of each row's node to each of its k neighbors
+        
+        
+    Parameters
+    ----------
+    features : data numpy array 
+    metric : {'euclidean', 'manhattan', 'minkowski', 'max_dist'}, optional
+        Metric used to compute pairwise distances.
+
+        * ``'euclidean'`` defines pairwise distances as
+          :math:`d(v_i, v_j) = \| x_i - x_j \|_2`.
+        * ``'manhattan'`` defines pairwise distances as
+          :math:`d(v_i, v_j) = \| x_i - x_j \|_1`.
+        * ``'minkowski'`` generalizes the above and defines distances as
+          :math:`d(v_i, v_j) = \| x_i - x_j \|_p`
+          where :math:`p` is the ``order`` of the norm.
+        * ``'max_dist'`` defines pairwise distances as
+          :math:`d(v_i, v_j) = \| x_i - x_j \|_\infty = \max(x_i - x_j)`, where
+          the maximum is taken over the elements of the vector.
+
+        More metrics may be supported for some backends.
+        Please refer to the documentation of the chosen backend.
+    kind : 'knn' or 'radius' (default 'knn')
+    k : number of nearest neighboors if 'knn' is selected
+    radius : radius of the search if 'radius' is slected
+    
+    order : float, optional
+        The order of the Minkowski distance for ``metric='minkowski'``.
+    backend : string, optional
+        * ``'scipy-pdist'`` uses :func:`scipy.spatial.distance.pdist` to
+          compute pairwise distances. The method is brute force and computes
+          all distances. That is the slowest method.
+        * ``'scipy-kdtree'`` uses :class:`scipy.spatial.KDTree`. The method
+          builds a k-d tree to prune the number of pairwise distances it has to
+          compute. That is an efficient strategy for low-dimensional spaces.
+        * ``'scipy-ckdtree'`` uses :class:`scipy.spatial.cKDTree`. The same as
+          ``'scipy-kdtree'`` but with C bindings, which should be faster.
+          That is the default.
+        * ``'flann'`` uses the `Fast Library for Approximate Nearest Neighbors
+          (FLANN) <https://github.com/mariusmuja/flann>`_. That method is an
+          approximation.
+        * ``'nmslib'`` uses the `Non-Metric Space Library (NMSLIB)
+          <https://github.com/nmslib/nmslib>`_. That method is an
+          approximation. It should be the fastest in high-dimensional spaces.
+
+        You can look at this `benchmark
+        <https://github.com/erikbern/ann-benchmarks>`_ to get an idea of the
+        relative performance of those backends. It's nonetheless wise to run
+        some tests on your own data.   
+    '''
+    if kind=='knn':
+        radius = None
+    elif kind=='radius':
+        k = None
+    else:
+        raise ValueError('"kind" must be "knn" or "radius"')
+    
+    _orders = {
+        'euclidean': 2,
+        'manhattan': 1,
+        'max_dist': np.inf,
+        'minkowski': order,
+    }
+    order = _orders.pop(metric, None)  
+    try:
+        function = globals()['_' + backend.replace('-', '_')]
+    except KeyError:
+        raise ValueError('Invalid backend "{}".'.format(backend))
+    neighbors, distances = function(features, metric, order,
+                                    kind, k, radius, kwargs)
+    return neighbors, distances
+
+
+def sparse_distance_matrix(neighbors, distances, symmetrize=True, safe=False, kind = None, k=None):
+    '''Build a sparse distance matrix from nearest neighbors'''
+    n_edges = [len(n) - 1 for n in neighbors]  # remove distance to self
+    if safe and kind is None:
+        raise ValueError('Please specify "kind" to "knn" or "radius" to use the safe mode')
+    
+    n_vertices = len(n_edges)
+    if safe and kind == 'radius':
+        n_disconnected = np.sum(np.asarray(n_edges) == 0)
+        if n_disconnected > 0:
+            _logger.warning('{} points (out of {}) have no neighboors. '
+                            'Consider increasing the radius or setting '
+                            'kind=knn.'.format(n_disconnected, n_vertices))
+
+    value = np.empty(sum(n_edges), dtype=np.float)
+    row = np.empty_like(value, dtype=np.int)
+    col = np.empty_like(value, dtype=np.int)
+    start = 0
+    for vertex in range(n_vertices):
+        if safe and kind == 'knn':
+            assert n_edges[vertex] == k
+        end = start + n_edges[vertex]
+        value[start:end] = distances[vertex][1:]
+        row[start:end] = np.full(n_edges[vertex], vertex)
+        col[start:end] = neighbors[vertex][1:]
+        start = end
+    Z = sparse.csr_matrix((value, (row, col)), (n_vertices, n_vertices))
+    if symmetrize:
+        # Enforce symmetry. May have been broken by k-NN. Checking symmetry
+        # with np.abs(Z - Z.T).sum() is as costly as the symmetrization itself.
+        Z = utils.symmetrize(Z, method='fill')
+    return Z
+
+
+def distances_from_edge_mask(X, edge_mask, form='vector'):
+    '''
+    Compute pairwise distances between rows of X, only for pairs corresponding 
+    to non-zeros of edge_mask
+
+    Inputs:
+        X [n, m]: rows are nodes, columns are features
+        edge_mask [n, n]: sparse mask, non-zeros correspond to allowed edges        
+        form: 'vector' (default) or 'matrix' to return z or Z
+    
+    Output:
+        Z [n, n]: sparse matrix with same pattern as edge_mask, containing
+            corresponding pairwise distances (NOT SQUARED)
+    
+    Important function to reduce computation in case edge mask is given
+    
+    TODO: merge with sparse_distance_matrix() and nearest_neighbor()
+    TODO: clean up and expose to the user    
+    '''
+    r, c = np.where(edge_mask)
+    z = norm(X[r] - X[c], axis=1)
+    if form == 'vector':
+        return z
+    elif form == 'matrix':
+        return sparse.csr_matrix((z, (r, c)))
+    else:
+        _logger.error('form can either be "vector" or "matrix"')
+    
\ No newline at end of file
diff --git a/pygsp/filters/filter.py b/pygsp/filters/filter.py
index fca48b37..7e36f11c 100644
--- a/pygsp/filters/filter.py
+++ b/pygsp/filters/filter.py
@@ -255,7 +255,7 @@ def filter(self, s, method='chebyshev', order=30):
         >>> _ = G.plot(s1, ax=axes[0])
         >>> _ = G.plot(s2, ax=axes[1])
         >>> print('{:.5f}'.format(np.linalg.norm(s1 - s2)))
-        0.26808
+        0.26995
 
         Perfect reconstruction with Itersine, a tight frame:
 
@@ -448,7 +448,7 @@ def estimate_frame_bounds(self, x=None):
         A=1.708, B=2.359
         >>> A, B = g.estimate_frame_bounds(G.e)
         >>> print('A={:.3f}, B={:.3f}'.format(A, B))
-        A=1.723, B=2.359
+        A=1.839, B=2.359
 
         The frame bounds can be seen in the plot of the filter bank as the
         minimum and maximum of their squared sum (the black curve):
diff --git a/pygsp/graphs/__init__.py b/pygsp/graphs/__init__.py
index 23f9ccfa..54bb92b5 100644
--- a/pygsp/graphs/__init__.py
+++ b/pygsp/graphs/__init__.py
@@ -182,6 +182,13 @@
     Sensor
     Sphere
     TwoMoons
+    
+Learning graphs from data
+------------------------
+
+.. autosummary::
+
+    LearnedGraph
 
 """
 
@@ -223,4 +230,5 @@
 __all__ = _GRAPHS + _NNGRAPHS
 
 _utils.import_classes(_GRAPHS, 'graphs', 'graphs')
+from pygsp.graphs.learned import LearnedFromSmoothSignals
 _utils.import_classes(_NNGRAPHS, 'graphs.nngraphs', 'graphs')
diff --git a/pygsp/graphs/fourier.py b/pygsp/graphs/fourier.py
index 3c215273..d699ceaa 100644
--- a/pygsp/graphs/fourier.py
+++ b/pygsp/graphs/fourier.py
@@ -85,7 +85,7 @@ def coherence(self):
         >>> graph.compute_fourier_basis()
         >>> minimum = 1 / np.sqrt(graph.n_vertices)
         >>> print('{:.2f} in [{:.2f}, 1]'.format(graph.coherence, minimum))
-        0.75 in [0.12, 1]
+        0.87 in [0.12, 1]
         >>>
         >>> # Plot the most localized eigenvector.
         >>> import matplotlib.pyplot as plt
diff --git a/pygsp/graphs/graph.py b/pygsp/graphs/graph.py
index 1c17bcab..a0cea791 100644
--- a/pygsp/graphs/graph.py
+++ b/pygsp/graphs/graph.py
@@ -2,8 +2,8 @@
 
 from __future__ import division
 
-import os
-from collections import Counter
+# import os
+# from collections import Counter
 
 import numpy as np
 from scipy import sparse
diff --git a/pygsp/graphs/learned.py b/pygsp/graphs/learned.py
new file mode 100644
index 00000000..c5f9ba4e
--- /dev/null
+++ b/pygsp/graphs/learned.py
@@ -0,0 +1,609 @@
+import numpy as np
+from scipy import sparse
+from scipy.spatial.distance import squareform
+from time import time
+from pygsp import utils
+from pygsp.utils import distanz
+from pygsp._nearest_neighbor import nearest_neighbor, sparse_distance_matrix
+from pygsp._nearest_neighbor import distances_from_edge_mask
+from . import Graph  # prevent circular import in Python < 3.5
+
+_logger = utils.build_logger(__name__)
+
+
+def isvector(x):
+    '''Test if x is a vector'''
+    if sparse.issparse(x):
+        return (np.ndim(x) == 1) or (np.ndim(x) == 2 and x.shape[1] == 1)
+    try:
+        return (np.ndim(x) == 2 and len(x.shape) == 1)
+    except:
+        return False
+
+
+def prox_sum_log(x, gamma):
+    '''Solves the proximal problem:
+        sol = argmin_z 0.5*||x - z||_2^2 - gamma * sum(log(z))
+    The solution is given by
+        sol = (x + sqrt(x.^2 + 4*gamma)) /2
+    '''
+    sol = (x + np.sqrt(x*x + 4*gamma)) /2
+    return sol
+
+
+def issymetric(W, tol=1e-7):
+    '''Test if a sparse matrix is symmetric'''
+    WT = sparse.triu(W) - sparse.triu(W.transpose())
+    return np.sum(np.abs(WT)) < tol
+
+
+def squareform_sp(w, safe=True):
+    '''Squareform function for sparse matrix'''
+    if not(sparse.issparse(w)):
+        return squareform(w)
+    if isvector(w):
+        ## VECTOR -> MATRIX
+        l = w.shape[0]
+        n = np.int(np.round((1 + np.sqrt(1+8*l))/2))
+        # check input
+        if not(l == n*(n-1)//2):
+            raise ValueError('Bad vector size!')
+
+        ind_vec, _, data = sparse.find(w)
+
+        num_nz = len(ind_vec)
+
+        # indices inside the matrix
+        ind_i = np.zeros((num_nz))
+        ind_j = np.zeros((num_nz))
+
+        curr_row = 0
+        offset = 0
+        # length of current row of matrix, counting from after the diagonal
+        sl = n - 1
+        for ii in range(num_nz):
+            ind_vec_i = ind_vec[ii]
+            # if we change row, the vector index is bigger by at least the
+            # length of the line + the offset.
+            while(ind_vec_i >= (sl + offset)):
+                offset = offset + sl
+                sl = sl - 1
+                curr_row = curr_row + 1
+        
+            ind_i[ii] = curr_row
+            ind_j[ii] = ind_vec_i - offset + (n-sl)
+        indx = np.concatenate((ind_i, ind_j))
+        indy = np.concatenate((ind_j, ind_i))
+        data = np.concatenate((data, data))
+        w = sparse.csr_matrix((data, (indx, indy)), (n, n))
+        return w
+    else:
+        ## MATRIX -> VECTOR
+        # first checks
+        assert(len(w.shape)==2)
+        m, n = w.shape
+        if not(m == n) or np.sum(np.abs(w.diagonal()))>0:
+            raise ValueError('Matrix has to be square with zero diagonal!');
+        if safe:
+            assert(issymetric(w))
+        ind_i, ind_j, s = sparse.find(w);
+        # keep only upper triangular part
+        ind_upper = ind_i < ind_j
+        ind_i = ind_i[ind_upper]+1
+        ind_j = ind_j[ind_upper]+1
+        s = s[ind_upper]
+        # compute new (vector) index from [i,j] (matrix) indices
+        new_ind = ind_j + (ind_i-1)*n - ind_i*(ind_i+1)//2-1
+        w = sparse.csr_matrix((s,(new_ind, np.zeros(new_ind.shape))), shape=(n*(n-1)//2, 1))
+    return w
+
+
+def sum_squareform(n, mask=None):
+    '''sparse matrix that sums the squareform of a vector
+    
+    Input parameters:
+          n:    size of matrix W
+          mask: if given, S only contain the columns indicated by the mask
+    
+    Output parameters:
+          S:    matrix so that S*w = sum(W) for vector w = squareform(W)
+          St:   the adjoint of S
+    
+    Creates sparse matrices S, St = S' so that
+        S*w = sum(W),       where w = squareform(W)
+    
+    The mask is used for large scale computations where only a few
+    non-zeros in W are to be summed. It needs to be the same size as w,
+    n(n-1)/2 elements.
+    
+    Properties of S:
+    * size(S) = [n, (n(n-1)/2)]     % if no mask is given.
+    * size(S, 2) = nnz(w)           % if mask is given
+    * norm(S)^2 = 2(n-1)
+    * sum(S) = 2*ones(1, n*(n-1)/2)
+    * sum(St) = sum(squareform(mask))   -- for full mask = (n-1)*ones(n,1)
+    '''
+    if mask is not None:
+        if not(mask.shape[0] == n*(n-1)//2):
+            raise ValueError('mask size has to be n(n-1)/2');
+
+        ind_vec, _ = mask.nonzero()
+
+        # final number of columns is the nnz(mask)
+        ncols = len(ind_vec)
+
+        # indices inside the matrix
+        I = np.zeros((ncols)).astype(np.int)
+        J = np.zeros((ncols)).astype(np.int)
+        
+        curr_row = 0
+        offset = 0
+        # length of current row of matrix, counting from after the diagonal
+        sl = n - 1;
+        for ii in range(ncols):
+            ind_vec_i = ind_vec[ii]
+            # if we change row, the vector index is bigger by at least the
+            # length of the line + the offset.
+            while(ind_vec_i >= (sl + offset)):
+                offset = offset + sl
+                sl = sl - 1
+                curr_row = curr_row + 1
+            I[ii] = curr_row
+            J[ii] = ind_vec_i - offset + (n-sl)
+    else:
+        # number of columns is the length of w given size of W
+        ncols = ((n-1)*n)//2;
+
+        I = np.zeros((ncols)).astype(np.int)
+        J = np.zeros((ncols)).astype(np.int)
+
+        # offset
+        k = 0;
+        for i in range(1,n):
+            I[k: k + (n-i)] = np.arange(i, n).astype(np.int)
+            k = k + (n-i)
+        k = 0;
+        for i in range(1,n):
+            J[k: k + (n-i)] = i-1
+            k = k + (n-i)
+    indx = np.concatenate((I,J))
+    indy = np.concatenate((np.arange(ncols),np.arange(ncols)))
+    data = np.ones((2*ncols))
+    S = sparse.csr_matrix((data, (indx, indy)), shape=(n,ncols), dtype=np.int)
+    St = sparse.csr_matrix((data, (indy, indx)), shape=(ncols, n), dtype=np.int)
+    return S, St
+
+
+def lin_map(X, lims_out, lims_in=None):
+    '''Map linearly from a given range to another.'''
+    
+    if lims_in is None:
+        lims_in = [np.min(X), np.max(X)];
+
+    a = lims_in[0];
+    b = lims_in[1];
+    c = lims_out[0];
+    d = lims_out[1];
+    
+    Y = (X-a) * ((d-c)/(b-a)) + c;
+    return Y
+
+
+def norm_S(S):
+    '''Norm of the matrix given by the function sum_squareform'''
+    return np.sqrt(2*np.max(np.sum(S, axis=1)))
+
+
+def sort_k_smallest(Z, k, axis=None):
+    '''Sort only the smallest k elements and return a matrix with k columns
+    
+    First keep only the k smallest elements (O(n) per row)
+    Then sort only the k smallest elements (O(k*log(k)) per row)
+    The zero of the diagonal will be now in the first column (TODO: check!!)
+     --> so get rid of it'''
+    return np.sort(np.partition(Z, k, axis=axis)[:, :k], axis=1)
+
+
+def compute_theta_bounds(Z, geom_mean=False, is_sorted=False, kmax=None):
+    '''Compute the values of parameter theta (controlling sparsity) 
+    that should be expected to give each sparsity level. Return upper 
+    and lower bounds each sparsity level k=[1, ..., n-1] neighbors/node.
+    
+    You can reduce computation by giving a maximum k needed
+    
+    Z : squared or pairwise distance matrix (zero diagonal)
+        OR [n,m] sized distance matrix with m smallest distances (nonzero) per
+        node. Not necessarily sorted, but if sorted we save computation
+    geom_mean: use geometric mean instead of arithmetic mean? default: False
+    '''
+    
+    if isvector(Z):
+        ValueError('Z must be a matrix.')
+    assert(len(Z.shape)==2)
+    
+    n, m = Z.shape;
+
+    assert(n >= m)
+    
+    if kmax is None:
+        kmax = m
+    
+    if n == m:
+        # square distance matrix contains zero diagonals and is not expected to
+        # be sorted!
+        if kmax == m:
+            Z_sorted = np.sort(Z, axis=1)[:, 1:]
+        else:
+            Z_sorted = sort_k_smallest(Z, kmax+1, axis=1)[:, 1:]
+        m -= 1
+    else:
+        # rectangular distance matrix without the zeros
+        if is_sorted:
+            assert(np.all(Z[:,0]==0))
+        else:
+            if kmax == m:
+                Z_sorted = np.sort(Z, axis=1)
+            else:
+                # only sort the smallest k elements!
+                Z_sorted = sort_k_smallest(Z, kmax, axis=1)
+    
+    B_k = np.cumsum(Z_sorted, axis=1)       # cummulative sum for each row
+    K_mat = np.tile(np.arange(1,m+1), (n, 1))
+    ## Theoretical intervals of theta for each desired sparsity level:
+    if geom_mean:
+        # try geometric mean instead of arithmetic:
+        theta_u = np.exp(np.mean(np.log(1/(np.sqrt(K_mat*Z_sorted*Z_sorted - B_k*Z_sorted+1e-7)+1e-15)), axis=0))
+    else:
+        theta_u = np.mean(1./(np.sqrt(K_mat*Z_sorted*Z_sorted - B_k*Z_sorted+1e-7)+1e-15), axis=0)
+    theta_l = np.zeros(theta_u.shape)
+    theta_l[:-1] = theta_u[1:]
+    return theta_l, theta_u, Z_sorted
+
+def gsp_compute_graph_learning_theta(Z, k, geom_mean=False, is_sorted=False):
+
+    '''
+        Z : squared or pairwise distance matrix (zero diagonal)
+            OR [n,m] sized distance matrix with m smallest distances (nonzero) 
+            per node. Not necessarily sorted, but if sorted we save computation
+    ''' 
+    
+    theta_min, theta_max, _ = compute_theta_bounds(Z, geom_mean, is_sorted)
+    theta_min = theta_min[k-1];
+    theta_max = theta_max[k-1];
+
+    if k > 1:
+        theta = np.sqrt(theta_min * theta_max);
+    else:
+        theta = theta_min * 1.1;
+    return theta, theta_min, theta_max
+
+
+def learn_graph_log_degree(Z,
+    a = 1,
+    b = 1,
+    c = 0,
+    verbosity = 1,
+    maxit = 1000,
+    tol = 1e-5,
+    step_size = .5, 
+    max_w = np.inf,
+    edge_mask = None,
+    w_0 = 0,
+    rel_edge=1e-5):
+    r"""Learn a graph from distances
+
+    This function computes a weighted
+    adjacency matrix $W$ from squared pairwise distances in $Z$, using the
+    smoothness assumption that $\text{trace}(X^TLX)$ is small, where $X$ is
+    the data (columns) changing smoothly from node to node on the graph and
+    $L = D-W$ is the combinatorial graph Laplacian. See :cite:`kalofolias2018large` 
+    and :cite:`kalofolias2016learn` for the theory behind the algorithm.
+
+    Alternatively, Z can contain other types of distances and use the
+    smoothness assumption that sum(sum(W * Z)) is small.
+
+    The minimization problem solved is
+    
+    minimize_W sum(sum(W .* Z)) - a * sum(log(sum(W))) + b * ||W||_F^2/2 + c * ||W-W_0||_F^2/2
+    
+    The algorithm used is forward-backward-forward (FBF) based primal dual 
+    optimization.
+        
+    Parameters
+    ----------
+    Z         : Distance matrices [Nnodes x Nnodes]. It can be sparse
+    a         : Weight of the connectivity term (prevents all nodes to be disconnected)
+    b         : Weight of the sparsity term
+    c         : Weight of the L2 prior (Default 0)
+    verbosity : How much should display the algorithm - 0: nothing, 
+                1: summary at convergence, 2: each steps (Default 1)
+    maxit     : maximum number of iterations (Default: 1000)
+    tol       : tolerance to stop iterating
+    step_size : Step size from the interval (0,1). Default: 0.5
+    max_w     : Maximum weight allowed for each edge (or inf)
+    edge_mask : Mask indicating the non zero edges (for the scaling version)
+    w_0       : Vector for adding prior c/2*||w - w_0||^2
+    rel_edge  : Remove all edges bellow this tolerance after convergence of the algorithm
+
+    References
+    ----------
+    See :cite:`kalofolias2018large` and :cite:`kalofolias2016learn` for more information.
+
+    """
+    
+    if isvector(Z):
+        z = Z;
+    else:
+        z = squareform_sp(Z)
+
+    l = z.shape[0] # number of edges
+    # n(n-1)/2 = l => n = (1 + sqrt(1+8*l))/ 2
+    n = int(np.round((1 + np.sqrt(1+8*l))/ 2))  # number of nodes
+
+    if not(w_0==0):
+        if c==0:
+            raise ValueError('When w_0 is specified, c should not be 0');
+        if not isvector(w_0):
+            w_0 = squareform_sp(w_0)
+    else:
+        w_0 = 0;
+
+    # if sparsity pattern is fixed we optimize with respect to a smaller number
+    # of variables, all included in w
+    if edge_mask is not None:
+        if not(isvector(edge_mask)):
+            edge_mask = squareform_sp(edge_mask)
+        # use only the non-zero elements to optimize
+        ind, _ = edge_mask.nonzero()
+
+        z = z[ind].data
+        if not(np.isscalar(w_0)):
+            w_0 = w_0[ind].data
+    
+    w = np.zeros(z.shape);
+
+    ## Needed operators
+    # S*w = sum(W)
+
+    S, St = sum_squareform(n, mask=edge_mask)
+
+    # S: edges -> nodes
+    K_op = lambda w : S.dot(w)
+
+    # S': nodes -> edges
+    Kt_op = lambda z: St.dot(z)
+
+    norm_K = norm_S(S)
+
+
+    ## Learn the graph
+    # min_{W>=0}     tr(X'*L*X) - gc * sum(log(sum(W))) + gp * norm(W-W0,'fro')^2, where L = diag(sum(W))-W
+    # min_W       I{W>=0} + W(:)'*Dx(:)  - gc * sum(log(sum(W))) + gp * norm(W-W0,'fro')^2
+    # min_W                f(W)          +       g(L_op(W))      +   h(W)
+
+    # put proximal of trace plus positivity together
+    feval = lambda w: 2*np.sum(w*z) # half should be counted
+    fprox = lambda w, gamma: np.minimum(max_w, np.maximum(0, w - 2*gamma*z)) # weighted soft thresholding op
+
+    geval = lambda v: -a * np.sum(np.log(v+1e-15))
+    # gprox = lambda v, gamma: prox_sum_log(v, gamma*a)
+    # proximal of conjugate of g: v-gamma*gprox(v/gamma, 1/gamma)
+    g_star_prox = lambda v, gamma: v - gamma*a * prox_sum_log(v/(gamma*a), 1/(gamma*a))
+
+    if w_0 == 0:
+        # "if" not needed, for c = 0 both are the same but gains speed
+        heval = lambda w: b * np.sum(w*w)
+        hgrad = lambda w: 2 * b * w
+        hbeta = 2 * b;
+    else:
+        heval = lambda w: b * np.sum(w*w) + c * np.sum((w - w_0)**2)
+        hgrad = lambda w: 2 * ((b+c) * w - c * w_0)
+        hbeta = 2 * (b+c)
+
+
+    ## Custom FBF based primal dual (see [1] = [Komodakis, Pesquet])
+    # parameters mu, epsilon for convergence (see [1])
+    mu = hbeta + norm_K;     #TODO: is it squared or not??
+    epsilon = lin_map(0.0, [0, 1/(1+mu)], [0,1]);   # in (0, 1/(1+mu) )
+
+    # INITIALIZATION
+    # primal variable ALREADY INITIALIZED
+    # dual variable
+    v_n = K_op(w)
+        
+    stat = dict()
+    stat['time'] = np.nan
+    if verbosity > 1:
+        stat['f_eval'] = np.empty((maxit))
+        stat['g_eval'] = np.empty((maxit))
+        stat['h_eval'] = np.empty((maxit))
+        stat['fgh_eval'] = np.empty((maxit))
+        stat['pos_violation'] = np.empty((maxit))
+
+    if verbosity > 1:
+        print('Relative change of primal, dual variables, and objective fun\n');
+
+    tstart = time()
+    gn = lin_map(step_size, [epsilon, (1-epsilon)/mu], [0,1]) # in [epsilon, (1-epsilon)/mu]
+    for i in range(maxit):
+        Y_n = w - gn * (hgrad(w) + Kt_op(v_n))
+        y_n = v_n + gn * (K_op(w))
+        P_n = fprox(Y_n, gn)
+        p_n = g_star_prox(y_n, gn) # = y_n - gn*g_prox(y_n/gn, 1/gn)
+        Q_n = P_n - gn * (hgrad(P_n) + Kt_op(p_n))
+        q_n = p_n + gn * (K_op(P_n))
+
+        if verbosity > 1:
+            stat['f_eval'][i] = feval(w)
+            stat['g_eval'][i] = geval(K_op(w))
+            stat['h_eval'][i] = heval(w)
+            stat['fgh_eval'][i] = stat['f_eval'][i] + stat['g_eval'][i] + stat['h_eval'][i]
+            stat['pos_violation'][i] = -np.sum(np.minimum(0,w))
+
+        rel_norm_primal = np.linalg.norm(- Y_n + Q_n)/(np.linalg.norm(w)+1e-15)
+        rel_norm_dual = np.linalg.norm(- y_n + q_n)/(np.linalg.norm(v_n)+1e-15)
+
+        if verbosity > 3:
+            print('iter {:4d}: {:6.4e}   {:6.4e}   {:6.3e}'.format(i, rel_norm_primal, rel_norm_dual, stat['fgh_eval'][i]))
+        elif verbosity > 2:
+            print('iter {:4d}: {%6.4e}   {:6.4e}   {:6.3e}'.format(i, rel_norm_primal, rel_norm_dual, stat['fgh_eval'][i]))
+        elif verbosity > 1:
+            print('iter {:4d}: {:6.4e}   {:6.4e}'.format(i, rel_norm_primal, rel_norm_dual))
+
+        w = w - Y_n + Q_n
+        v_n = v_n - y_n + q_n
+
+        if (rel_norm_primal < tol) and (rel_norm_dual < tol):
+            break
+
+    stat['time'] = time()-tstart
+    if verbosity > 0:
+        print('# iters: {:4d}. Rel primal: {:6.4e} Rel dual: {:6.4e}  OBJ {:6.3e}'.format(
+                i+1, rel_norm_primal, rel_norm_dual, feval(w) + geval(K_op(w)) + heval(w)))
+        print('Time needed is {:f} seconds'.format(stat['time']))
+    
+    # Force positivity on the solution
+    
+    w[w<rel_edge*np.max(w)]=0
+    
+    if edge_mask is not None:
+        indw = w>0
+        w = sparse.csr_matrix((w[indw], (ind[indw], np.zeros((np.sum(indw))))),  (l, 1))
+
+    if isvector(Z):
+        W = w;
+    else:
+        W = squareform_sp(w);
+
+    return W, stat
+
+
+
+class LearnedFromSmoothSignals(Graph):
+    r"""Learned graph from smooth signals.
+    
+    Return a graph learned with the function of :func:`learn_graph_log_degree`. 
+    
+    This function computes the squared pairwise distances $Z$ between the vectors
+    in $X$. Then the adjacency matrix $W$ is estimated from $Z$, using the
+    smoothness assumption that $\text{trace}(X^TLX)$ is small, where $X$ is
+    the data (columns) changing smoothly from node to node on the graph and
+    $L = D-W$ is the combinatorial graph Laplacian. See :cite:`kalofolias2018large` 
+    and :cite:`kalofolias2016learn` for the theory behind the algorithm.
+
+    Alternatively, Z can contain other types of distances and use the
+    smoothness assumption that sum(sum(W * Z)) is small.
+
+    The minimization problem solved is
+    
+    minimize_W sum(sum(W .* Z)) - a * sum(log(sum(W))) + b * ||W||_F^2/2 + c * ||W-W_0||_F^2/2
+    
+    In order to scale, this function can automatically: 
+      1. compute the optimal values of (a, b) and
+      2. use a resticted support to reduce the computational and the memory cost.
+    By default, these option are enabled and only the average number of neighboors k should be set.
+    
+    Alternatively, the values of a, b can be set.
+
+    Parameters
+    ----------
+    X : Data [Nnodes x Nsignals]
+    a : Weight of the connectivity term (leave this to None for automatic setup)
+    b : Weight of the sparsity term (leave this to None for automatic setup)
+    k : desired average number of neigboors 
+    kk: desired number of neiboors for the sparse support (Default 3k)
+    sparse: Use a sparse support. Set this to True to scale. (Default False for n<1000, True otherwise) 
+    rel_edge : Remove all edges bellow this tolerance after convergence of the algorithm
+    edge_mask: A mask of the allowed edge pattern, size [Nnodes x Nnodes]. Only edges with >0 will be learned 
+    param_nn : Parameters for the nearest neighboor search (dictionary). See :func:`nearest_neighbor`.
+    param_opt: Parameters for the optimization algorithm. See :func:`learn_graph_log_degree`.
+    
+    Examples
+    --------
+    >>> import numpy as np
+    >>> import pygsp as pg
+    >>> from matplotlib import pyplot as plt
+    >>> # A) Create a bunch of smooth signals
+    >>> n = 100 # Number of nodes
+    >>> d = 50 # Number of signals
+    >>> k = 6 # Average number of neighboors
+    >>> # A1) Create a graph 
+    >>> coords = np.random.RandomState(0).uniform(size=(n, 2))
+    >>> G = pg.graphs.NNGraph(coords,k=10, kernel='gaussian')
+    >>> 
+    >>> # A2) Create a lowpass filter
+    >>> G.estimate_lmax()
+    >>> g = pg.filters.Filter(G,lambda x:1/(1+5*x))
+    >>> 
+    >>> # A3) Create signals by filtering random noise
+    >>> S = np.random.randn(n,d)
+    >>> X = np.squeeze(g.filter(S))
+    >>> 
+    >>> # B) Learn the graph
+    >>> param_opt = {'verbosity':0}
+    >>> Glearned = pg.graphs.LearnedFromSmoothSignals(X, k=k, param_opt=param_opt)
+    >>> # plot the learned graph
+    >>> Glearned.coords = coords
+    >>> 
+    >>> # C) Plot the graph and one signal
+    >>> fig, (ax1,ax2) = plt.subplots(1,2)
+    >>> _ = G.plot_signal(X[:,1], ax=ax1,title='Signal generating graph')
+    >>> _ = Glearned.plot_signal(X[:,1], ax=ax2,title='Learned graph')
+    >>> # The two graphs are not expected to be the same!
+
+    References
+    ----------
+    See :cite:`kalofolias2018large` and :cite:`kalofolias2016learn` for more information.
+    """
+
+    # TODO: allow Z to be given by user if pre-computed
+    def __init__(self, X, a=None, b=None, k=10, kk=None, sparse=None, rel_edge=1e-5, edge_mask=None, param_nn={}, param_opt={}, **kwargs):
+        if sparse is None:
+            if X.shape[0] <= 1000:
+                sparse = False
+            else:
+                sparse = True
+                _logger.info(
+                    '''For large graphs (1000 nodes+) graph learning might be considerably slow (O(n^2) 
+                    complexity). Consider using a sparse set of allowed edges to learn. Automatic 
+                    fallback to approximate NN. To override this decision specify sparse=False. To
+                    suppress this info message, specify sparse=True.''')
+                
+        if sparse:
+            if edge_mask is None:
+                if kk is None:
+                    kk = 3*k
+                neighbors, distances = nearest_neighbor(X, k=kk, **param_nn)
+                # the original method needs squared distances!!
+                Z = sparse_distance_matrix(neighbors, distances**2)
+                edge_mask = Z>0
+                Zp = distances[:, 1:]**2
+            else:
+                '''If edge mask is given, the pairwise distance should be only
+                computed for the edges indicated by the mask'''
+                # TODO: form should not be matrix here if 
+                #   inside learn_graph_log_degree it becomes eventually vector
+                # TODO: allow different distances than euclidean!!
+                Z = distances_from_edge_mask(X, edge_mask, form='matrix')**2
+                if a is None or b is None:
+                    # TODO: implement automatic a and b selection here!
+                    _logger.error(
+                        '''If you set manually your own edge_mask, you also 
+                        have to set up a and b manually'''
+                        )
+                    # raise NotImplementedError(
+                    raise TypeError(
+                        '''If you set manually an edge_mask, you also have to 
+                           set a and b manually''')
+        else:
+            # the original method needs squared distances!!
+            Z = distanz(X.transpose())**2
+            Zp = Z
+            edge_mask = None
+        if a is None and b is None:
+            theta, theta_min, theta_max = gsp_compute_graph_learning_theta(Zp, k)
+            W, stat = learn_graph_log_degree(Z*theta, edge_mask=edge_mask, rel_edge=rel_edge, **param_opt)
+        else:
+            W, stat = learn_graph_log_degree(Z, a=a, b=b, edge_mask=edge_mask, rel_edge=rel_edge, **param_opt)
+        super(LearnedFromSmoothSignals, self).__init__(W, **kwargs)
+        self._stat = stat
diff --git a/pygsp/graphs/nngraphs/bunny.py b/pygsp/graphs/nngraphs/bunny.py
index d9dac407..cc269c10 100644
--- a/pygsp/graphs/nngraphs/bunny.py
+++ b/pygsp/graphs/nngraphs/bunny.py
@@ -34,7 +34,5 @@ def __init__(self, **kwargs):
             'distance': 8,
         }
 
-        super(Bunny, self).__init__(Xin=data['bunny'],
-                                    epsilon=0.02, NNtype='radius',
-                                    center=False, rescale=False,
+        super(Bunny, self).__init__(data['bunny'], kind='radius', radius=0.02,
                                     plotting=plotting, **kwargs)
diff --git a/pygsp/graphs/nngraphs/cube.py b/pygsp/graphs/nngraphs/cube.py
index 820e401c..b9cb1d60 100644
--- a/pygsp/graphs/nngraphs/cube.py
+++ b/pygsp/graphs/nngraphs/cube.py
@@ -10,12 +10,12 @@ class Cube(NNGraph):
 
     Parameters
     ----------
-    radius : float
-        Edge lenght (default = 1)
     nb_pts : int
         Number of vertices (default = 300)
     nb_dim : int
         Dimension (default = 3)
+    length : float
+        Edge length (default = 1)
     sampling : string
         Variance of the distance kernel (default = 'random')
         (Can now only be 'random')
@@ -35,20 +35,23 @@ class Cube(NNGraph):
     """
 
     def __init__(self,
-                 radius=1,
                  nb_pts=300,
                  nb_dim=3,
+                 length=1,
                  sampling='random',
                  seed=None,
                  **kwargs):
 
-        self.radius = radius
         self.nb_pts = nb_pts
         self.nb_dim = nb_dim
+        self.length = length
         self.sampling = sampling
         self.seed = seed
         rs = np.random.RandomState(seed)
 
+        if length != 1:
+            raise NotImplementedError('Only length=1 is implemented.')
+
         if self.nb_dim > 3:
             raise NotImplementedError("Dimension > 3 not supported yet!")
 
@@ -89,12 +92,10 @@ def __init__(self,
             'distance': 9,
         }
 
-        super(Cube, self).__init__(Xin=pts, k=10,
-                                   center=False, rescale=False,
-                                   plotting=plotting, **kwargs)
+        super(Cube, self).__init__(pts, k=10, plotting=plotting, **kwargs)
 
     def _get_extra_repr(self):
-        attrs = {'radius': '{:.2f}'.format(self.radius),
+        attrs = {'length': '{:.2e}'.format(self.length),
                  'nb_pts': self.nb_pts,
                  'nb_dim': self.nb_dim,
                  'sampling': self.sampling,
diff --git a/pygsp/graphs/nngraphs/imgpatches.py b/pygsp/graphs/nngraphs/imgpatches.py
index 73cb6abd..df681a3b 100644
--- a/pygsp/graphs/nngraphs/imgpatches.py
+++ b/pygsp/graphs/nngraphs/imgpatches.py
@@ -10,7 +10,8 @@ class ImgPatches(NNGraph):
 
     Extract a feature vector in the form of a patch for every pixel of an
     image, then construct a nearest-neighbor graph between these feature
-    vectors. The feature matrix, i.e. the patches, can be found in :attr:`Xin`.
+    vectors. The feature matrix, i.e., the patches, can be found in
+    :attr:`features`.
 
     Parameters
     ----------
@@ -39,9 +40,10 @@ class ImgPatches(NNGraph):
     >>> from skimage import data, img_as_float
     >>> img = img_as_float(data.camera()[::64, ::64])
     >>> G = graphs.ImgPatches(img, patch_shape=(3, 3))
-    >>> print('{} nodes ({} x {} pixels)'.format(G.Xin.shape[0], *img.shape))
+    >>> N, d = G.patches.shape
+    >>> print('{} nodes ({} x {} pixels)'.format(N, *img.shape))
     64 nodes (8 x 8 pixels)
-    >>> print('{} features per node'.format(G.Xin.shape[1]))
+    >>> print('{} features per node'.format(d))
     9 features per node
     >>> G.set_coordinates(kind='spring', seed=42)
     >>> fig, axes = plt.subplots(1, 2)
@@ -87,16 +89,16 @@ def __init__(self, img, patch_shape=(3, 3), **kwargs):
         # Alternative: sklearn.feature_extraction.image.extract_patches_2d.
         #              sklearn has much less dependencies than skimage.
         try:
-            import skimage
+            from skimage.util import view_as_windows
         except Exception as e:
             raise ImportError('Cannot import skimage, which is needed to '
                               'extract patches. Try to install it with '
                               'pip (or conda) install scikit-image. '
                               'Original exception: {}'.format(e))
-        patches = skimage.util.view_as_windows(img, window_shape=window_shape)
-        patches = patches.reshape((h * w, r * c * d))
+        self.patches = view_as_windows(img, window_shape)
+        self.patches = self.patches.reshape((h * w, r * c * d))
 
-        super(ImgPatches, self).__init__(patches, **kwargs)
+        super(ImgPatches, self).__init__(self.patches, **kwargs)
 
     def _get_extra_repr(self):
         attrs = dict(patch_shape=self.patch_shape)
diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py
index 252a5416..3f56734e 100644
--- a/pygsp/graphs/nngraphs/nngraph.py
+++ b/pygsp/graphs/nngraphs/nngraph.py
@@ -1,199 +1,401 @@
 # -*- coding: utf-8 -*-
 
-import traceback
+from __future__ import division
+
+from functools import partial
 
 import numpy as np
 from scipy import sparse, spatial
 
 from pygsp import utils
 from pygsp.graphs import Graph  # prevent circular import in Python < 3.5
+from pygsp._nearest_neighbor import nearest_neighbor, sparse_distance_matrix
+
 
 _logger = utils.build_logger(__name__)
 
 
-def _import_pfl():
-    try:
-        import pyflann as pfl
-    except Exception as e:
-        raise ImportError('Cannot import pyflann. Choose another nearest '
-                          'neighbors method or try to install it with '
-                          'pip (or conda) install pyflann (or pyflann3). '
-                          'Original exception: {}'.format(e))
-    return pfl
+class NNGraph(Graph):
+    r"""Nearest-neighbor graph.
+
+    The nearest-neighbor graph is built from a set of features, where the edge
+    weight between vertices :math:`v_i` and :math:`v_j` is given by
 
+    .. math:: A(i,j) = k \left( \frac{d(v_i, v_j)}{\sigma} \right),
 
-class NNGraph(Graph):
-    r"""Nearest-neighbor graph from given point cloud.
+    where :math:`d(v_i, v_j)` is a distance measure between some representation
+    (the features) of :math:`v_i` and :math:`v_j`, :math:`k` is a kernel
+    function that transforms a distance in :math:`[0, \infty]` to a similarity
+    measure generally in :math:`[0, 1]`, and :math:`\sigma` is the kernel width.
+
+    For example, the features might be the 3D coordinates of points in a point
+    cloud. Then, if ``metric='euclidean'`` and ``kernel='gaussian'`` (the
+    defaults), :math:`A(i,j) = \exp(-\log(2) \| x_i - x_j \|_2^2 / \sigma^2)`,
+    where :math:`x_i` is the 3D position of vertex :math:`v_i`.
+
+    The similarity matrix :math:`A` is sparsified by either keeping the ``k``
+    closest vertices for each vertex (if ``type='knn'``), or by setting to zero
+    the similarity when the distance is greater than ``radius`` (if ``type='radius'``).
 
     Parameters
     ----------
-    Xin : ndarray
-        Input points, Should be an `N`-by-`d` matrix, where `N` is the number
-        of nodes in the graph and `d` is the dimension of the feature space.
-    NNtype : string, optional
-        Type of nearest neighbor graph to create. The options are 'knn' for
-        k-Nearest Neighbors or 'radius' for epsilon-Nearest Neighbors (default
-        is 'knn').
-    use_flann : bool, optional
-        Use Fast Library for Approximate Nearest Neighbors (FLANN) or not.
-        (default is False)
-    center : bool, optional
-        Center the data so that it has zero mean (default is True)
-    rescale : bool, optional
-        Rescale the data so that it lies in a l2-sphere (default is True)
-    k : int, optional
-        Number of neighbors for knn (default is 10)
-    sigma : float, optional
-        Width of the similarity kernel.
-        By default, it is set to the average of the nearest neighbor distance.
-    epsilon : float, optional
-        Radius for the epsilon-neighborhood search (default is 0.01)
-    plotting : dict, optional
-        Dictionary of plotting parameters. See :obj:`pygsp.plotting`.
-        (default is {})
-    symmetrize_type : string, optional
-        Type of symmetrization to use for the adjacency matrix. See
-        :func:`pygsp.utils.symmetrization` for the options.
-        (default is 'average')
-    dist_type : string, optional
-        Type of distance to compute. See
-        :func:`pyflann.index.set_distance_type` for possible options.
-        (default is 'euclidean')
+    features : ndarray
+        An `N`-by-`d` matrix, where `N` is the number of nodes in the graph and
+        `d` is the number of features.
+    standardize : bool, optional
+        Whether to rescale the features so that each feature has a mean of 0
+        and standard deviation of 1 (unit variance).
+    metric : {'euclidean', 'manhattan', 'minkowski', 'max_dist'}, optional
+        Metric used to compute pairwise distances.
+
+        * ``'euclidean'`` defines pairwise distances as
+          :math:`d(v_i, v_j) = \| x_i - x_j \|_2`.
+        * ``'manhattan'`` defines pairwise distances as
+          :math:`d(v_i, v_j) = \| x_i - x_j \|_1`.
+        * ``'minkowski'`` generalizes the above and defines distances as
+          :math:`d(v_i, v_j) = \| x_i - x_j \|_p`
+          where :math:`p` is the ``order`` of the norm.
+        * ``'max_dist'`` defines pairwise distances as
+          :math:`d(v_i, v_j) = \| x_i - x_j \|_\infty = \max(x_i - x_j)`, where
+          the maximum is taken over the elements of the vector.
+
+        More metrics may be supported for some backends.
+        Please refer to the documentation of the chosen backend.
     order : float, optional
-        Only used if dist_type is 'minkowski'; represents the order of the
-        Minkowski distance. (default is 0)
+        The order of the Minkowski distance for ``metric='minkowski'``.
+    kind : {'knn', 'radius'}, optional
+        Kind of nearest neighbor graph to create. Either ``'knn'`` for
+        k-nearest neighbors or ``'radius'`` for epsilon-nearest neighbors.
+    k : int, optional
+        Number of neighbors considered when building a k-NN graph with
+        ``type='knn'``.
+    radius : float or {'estimate', 'estimate-knn'}, optional
+        Radius of the ball when building a radius graph with ``type='radius'``.
+        It is hard to set an optimal radius. If too small, some vertices won't
+        be connected to any other vertex. If too high, vertices will be
+        connected to many other vertices and the graph won't be sparse (high
+        average degree).  If no good radius is known a priori, we can estimate
+        one. ``'estimate'`` sets the radius as the expected average distance
+        between vertices for a uniform sampling of the ambient space.
+        ``'estimate-knn'`` first builds a knn graph and sets the radius to the
+        average distance. ``'estimate-knn'`` usually gives a better estimation
+        but is more costly. ``'estimate'`` can be better in low dimension.
+    kernel : string or function
+        The function :math:`k` that transforms a distance to a similarity.
+        The following kernels are pre-defined.
+
+        * ``'gaussian'`` defines the Gaussian, also known as the radial basis
+          function (RBF), kernel :math:`k(d) = \exp(-\log(2) d^2)`.
+        * ``'exponential'`` defines the kernel :math:`k(d) = \exp(-\log(2) d)`.
+        * ``'rectangular'`` returns 1 if :math:`d < 1` and 0 otherwise.
+        * ``'triangular'`` defines the kernel :math:`k(d) = \max(1 - d/2, 0)`.
+        * Other kernels are ``'tricube'``, ``'triweight'``, ``'quartic'``,
+          ``'epanechnikov'``, ``'logistic'``, and ``'sigmoid'``.
+          See `Wikipedia <https://en.wikipedia.org/wiki/Kernel_(statistics)>`_.
+
+        Another option is to pass a function that takes a vector of pairwise
+        distances and returns the similarities. All the predefined kernels
+        return a similarity of 0.5 when the distance is one.
+        An example of custom kernel is ``kernel=lambda d: d.min() / d``.
+    kernel_width : float, optional
+        Control the width, also known as the bandwidth, :math:`\sigma` of the
+        kernel. It scales the distances as ``distances / kernel_width`` before
+        calling the kernel function.
+        By default, it is set to the average of all computed distances for
+        ``kind='knn'`` and to half the radius for ``kind='radius'``.
+    backend : string, optional
+        * ``'scipy-pdist'`` uses :func:`scipy.spatial.distance.pdist` to
+          compute pairwise distances. The method is brute force and computes
+          all distances. That is the slowest method.
+        * ``'scipy-kdtree'`` uses :class:`scipy.spatial.KDTree`. The method
+          builds a k-d tree to prune the number of pairwise distances it has to
+          compute. That is an efficient strategy for low-dimensional spaces.
+        * ``'scipy-ckdtree'`` uses :class:`scipy.spatial.cKDTree`. The same as
+          ``'scipy-kdtree'`` but with C bindings, which should be faster.
+          That is the default.
+        * ``'flann'`` uses the `Fast Library for Approximate Nearest Neighbors
+          (FLANN) <https://github.com/mariusmuja/flann>`_. That method is an
+          approximation.
+        * ``'nmslib'`` uses the `Non-Metric Space Library (NMSLIB)
+          <https://github.com/nmslib/nmslib>`_. That method is an
+          approximation. It should be the fastest in high-dimensional spaces.
+
+        You can look at this `benchmark
+        <https://github.com/erikbern/ann-benchmarks>`_ to get an idea of the
+        relative performance of those backends. It's nonetheless wise to run
+        some tests on your own data.
+    kwargs : dict
+        Parameters to be passed to the :class:`Graph` constructor or the
+        backend library.
 
     Examples
     --------
+
+    Construction of a graph from a set of features.
+
     >>> import matplotlib.pyplot as plt
-    >>> X = np.random.RandomState(42).uniform(size=(30, 2))
-    >>> G = graphs.NNGraph(X)
+    >>> rs = np.random.RandomState(42)
+    >>> features = rs.uniform(size=(30, 2))
+    >>> G = graphs.NNGraph(features)
     >>> fig, axes = plt.subplots(1, 2)
     >>> _ = axes[0].spy(G.W, markersize=5)
     >>> _ = G.plot(ax=axes[1])
 
-    """
+    Radius versus knn graph.
+
+    >>> features = rs.uniform(size=(100, 3))
+    >>> fig, ax = plt.subplots()
+    >>> G = graphs.NNGraph(features, kind='radius', radius=0.2964)
+    >>> label = 'radius graph ({} edges)'.format(G.n_edges)
+    >>> _ = ax.hist(G.W.data, bins=20, label=label, alpha=0.5)
+    >>> G = graphs.NNGraph(features, kind='knn', k=6)
+    >>> label = 'knn graph ({} edges)'.format(G.n_edges)
+    >>> _ = ax.hist(G.W.data, bins=20, label=label, alpha=0.5)
+    >>> _ = ax.legend()
+    >>> _ = ax.set_title('edge weights')
+
+    Control of the sparsity of knn and radius graphs.
+
+    >>> features = rs.uniform(size=(100, 3))
+    >>> n_edges = dict(knn=[], radius=[])
+    >>> n_neighbors = np.arange(1, 100, 5)
+    >>> radiuses = np.arange(0.05, 1.5, 0.05)
+    >>> for k in n_neighbors:
+    ...     G = graphs.NNGraph(features, kind='knn', k=k)
+    ...     n_edges['knn'].append(G.n_edges)
+    >>> for radius in radiuses:
+    ...     G = graphs.NNGraph(features, kind='radius', radius=radius)
+    ...     n_edges['radius'].append(G.n_edges)
+    >>> fig, axes = plt.subplots(1, 2, sharey=True)
+    >>> _ = axes[0].plot(n_neighbors, n_edges['knn'])
+    >>> _ = axes[1].plot(radiuses, n_edges['radius'])
+    >>> _ = axes[0].set_ylabel('number of edges')
+    >>> _ = axes[0].set_xlabel('number of neighbors (knn graph)')
+    >>> _ = axes[1].set_xlabel('radius (radius graph)')
+    >>> _ = fig.suptitle('Sparsity')
+
+    Choice of metric and the curse of dimensionality.
 
-    def __init__(self, Xin, NNtype='knn', use_flann=False, center=True,
-                 rescale=True, k=10, sigma=None, epsilon=0.01,
-                 plotting={}, symmetrize_type='average', dist_type='euclidean',
-                 order=0, **kwargs):
+    >>> fig, axes = plt.subplots(1, 2)
+    >>> for dim, ax in zip([3, 30], axes):
+    ...     features = rs.uniform(size=(100, dim))
+    ...     for metric in ['euclidean', 'manhattan', 'max_dist', 'cosine']:
+    ...         G = graphs.NNGraph(features, metric=metric,
+    ...                            backend='scipy-pdist')
+    ...         _ = ax.hist(G.W.data, bins=20, label=metric, alpha=0.5)
+    ...     _ = ax.legend()
+    ...     _ = ax.set_title('edge weights, {} dimensions'.format(dim))
 
-        self.Xin = Xin
-        self.NNtype = NNtype
-        self.use_flann = use_flann
-        self.center = center
-        self.rescale = rescale
-        self.k = k
-        self.sigma = sigma
-        self.epsilon = epsilon
-        self.symmetrize_type = symmetrize_type
-        self.dist_type = dist_type
-        self.order = order
+    Choice of kernel.
 
-        N, d = np.shape(self.Xin)
-        Xout = self.Xin
-
-        if k >= N:
-            raise ValueError('The number of neighbors (k={}) must be smaller '
-                             'than the number of nodes ({}).'.format(k, N))
-
-        if self.center:
-            Xout = self.Xin - np.kron(np.ones((N, 1)),
-                                      np.mean(self.Xin, axis=0))
-
-        if self.rescale:
-            bounding_radius = 0.5 * np.linalg.norm(np.amax(Xout, axis=0) -
-                                                   np.amin(Xout, axis=0), 2)
-            scale = np.power(N, 1. / float(min(d, 3))) / 10.
-            Xout *= scale / bounding_radius
-
-        # Translate distance type string to corresponding Minkowski order.
-        dist_translation = {"euclidean": 2,
-                            "manhattan": 1,
-                            "max_dist": np.inf,
-                            "minkowski": order
-                            }
-
-        if self.NNtype == 'knn':
-            spi = np.zeros((N * k))
-            spj = np.zeros((N * k))
-            spv = np.zeros((N * k))
-
-            if self.use_flann:
-                pfl = _import_pfl()
-                pfl.set_distance_type(dist_type, order=order)
-                flann = pfl.FLANN()
-
-                # Default FLANN parameters (I tried changing the algorithm and
-                # testing performance on huge matrices, but the default one
-                # seems to work best).
-                NN, D = flann.nn(Xout, Xout, num_neighbors=(k + 1),
-                                 algorithm='kdtree')
-
-            else:
-                kdt = spatial.KDTree(Xout)
-                D, NN = kdt.query(Xout, k=(k + 1),
-                                  p=dist_translation[dist_type])
-
-            if self.sigma is None:
-                self.sigma = np.mean(D[:, 1:])  # Discard distance to self.
-
-            for i in range(N):
-                spi[i * k:(i + 1) * k] = np.kron(np.ones((k)), i)
-                spj[i * k:(i + 1) * k] = NN[i, 1:]
-                spv[i * k:(i + 1) * k] = np.exp(-np.power(D[i, 1:], 2) /
-                                                float(self.sigma))
-
-        elif self.NNtype == 'radius':
-
-            kdt = spatial.KDTree(Xout)
-            D, NN = kdt.query(Xout, k=None, distance_upper_bound=epsilon,
-                              p=dist_translation[dist_type])
-            if self.sigma is None:
-                # Discard distance to self.
-                self.sigma = np.mean([np.mean(d[1:]) for d in D])
-            count = 0
-            for i in range(N):
-                count = count + len(NN[i])
-
-            spi = np.zeros((count))
-            spj = np.zeros((count))
-            spv = np.zeros((count))
-
-            start = 0
-            for i in range(N):
-                leng = len(NN[i]) - 1
-                spi[start:start + leng] = np.kron(np.ones((leng)), i)
-                spj[start:start + leng] = NN[i][1:]
-                spv[start:start + leng] = np.exp(-np.power(D[i][1:], 2) /
-                                                 float(self.sigma))
-                start = start + leng
+    >>> fig, axes = plt.subplots(1, 2)
+    >>> width = 0.3
+    >>> distances = np.linspace(0, 1, 200)
+    >>> for name, kernel in graphs.NNGraph._kernels.items():
+    ...     _ = axes[0].plot(distances, kernel(distances / width), label=name)
+    >>> _ = axes[0].set_xlabel('distance [0, inf]')
+    >>> _ = axes[0].set_ylabel('similarity [0, 1]')
+    >>> _ = axes[0].legend(loc='upper right')
+    >>> features = rs.uniform(size=(100, 3))
+    >>> for kernel in ['gaussian', 'triangular', 'tricube', 'exponential']:
+    ...     G = graphs.NNGraph(features, kernel=kernel)
+    ...     _ = axes[1].hist(G.W.data, bins=20, label=kernel, alpha=0.5)
+    >>> _ = axes[1].legend()
+    >>> _ = axes[1].set_title('edge weights')
+
+    Choice of kernel width.
+
+    >>> fig, axes = plt.subplots()
+    >>> for width in [.2, .3, .4, .6, .8, None]:
+    ...     G = graphs.NNGraph(features, kernel_width=width)
+    ...     label = 'width = {:.2f}'.format(G.kernel_width)
+    ...     _ = axes.hist(G.W.data, bins=20, label=label, alpha=0.5)
+    >>> _ = axes.legend(loc='upper left')
+    >>> _ = axes.set_title('edge weights')
+
+    Choice of backend. Compare on your data!
+
+    >>> import time
+    >>> sizes = [300, 1000, 3000]
+    >>> dims = [3, 100]
+    >>> backends = ['scipy-pdist', 'scipy-kdtree', 'scipy-ckdtree', 'flann',
+    ...             'nmslib']
+    >>> times = np.full((len(sizes), len(dims), len(backends)), np.nan)
+    >>> for i, size in enumerate(sizes):
+    ...     for j, dim in enumerate(dims):
+    ...         for k, backend in enumerate(backends):
+    ...             if (size * dim) > 1e4 and backend == 'scipy-kdtree':
+    ...                 continue  # too slow
+    ...             features = rs.uniform(size=(size, dim))
+    ...             start = time.time()
+    ...             _ = graphs.NNGraph(features, backend=backend)
+    ...             times[i][j][k] = time.time() - start
+    >>> fig, axes = plt.subplots(1, 2, sharey=True)
+    >>> for j, (dim, ax) in enumerate(zip(dims, axes)):
+    ...     for k, backend in enumerate(backends):
+    ...         _ = ax.loglog(sizes, times[:, j, k], '.-', label=backend)
+    ...         _ = ax.set_title('{} dimensions'.format(dim))
+    ...         _ = ax.set_xlabel('number of vertices')
+    >>> _ = axes[0].set_ylabel('execution time [s]')
+    >>> _ = axes[1].legend(loc='upper left')
 
+    """
+
+    def __init__(self, features, standardize=False,
+                 metric='euclidean', order=3,
+                 kind='knn', k=10, radius='estimate-knn',
+                 kernel='gaussian', kernel_width=None,
+                 backend='scipy-ckdtree',
+                 **kwargs):
+
+        # features is stored in coords, potentially standardized
+        self.standardize = standardize
+        self.metric = metric
+        self.kind = kind
+        self.kernel = kernel
+        self.k = k
+        self.backend = backend
+
+        features = np.asanyarray(features)
+        if features.ndim != 2:
+            raise ValueError('features should be #vertices x dimensionality')
+        n_vertices, dimensionality = features.shape
+
+        params_graph = dict()
+        for key in ['lap_type', 'plotting']:
+            try:
+                params_graph[key] = kwargs.pop(key)
+            except KeyError:
+                pass
+
+        if standardize:
+            # Don't alter the original data (users would be surprised).
+            features = features - np.mean(features, axis=0)
+            features /= np.std(features, axis=0)
+
+        # Order consistent with metric (used by kdtree and ckdtree).
+        _orders = {
+            'euclidean': 2,
+            'manhattan': 1,
+            'max_dist': np.inf,
+            'minkowski': order,
+        }
+        order = _orders.pop(metric, None)
+
+        if kind == 'knn':
+            if not 1 <= k < n_vertices:
+                raise ValueError('The number of neighbors (k={}) must be '
+                                 'greater than 0 and smaller than the number '
+                                 'of vertices ({}).'.format(k, n_vertices))
+            radius = None
+        elif kind == 'radius':
+            if radius == 'estimate':
+                maximums = np.amax(features, axis=0)
+                minimums = np.amin(features, axis=0)
+                distance_max = np.linalg.norm(maximums - minimums, order)
+                radius = distance_max / np.power(n_vertices, 1/dimensionality)
+            elif radius == 'estimate-knn':
+                graph = NNGraph(features, standardize=standardize,
+                                metric=metric, order=order, kind='knn', k=k,
+                                kernel_width=None, backend=backend, **kwargs)
+                radius = graph.kernel_width
+            elif radius <= 0:
+                raise ValueError('The radius must be greater than 0.')
+            self.k = None
         else:
-            raise ValueError('Unknown NNtype {}'.format(self.NNtype))
+            raise ValueError('Invalid kind "{}".'.format(kind))
+
+        neighbors, distances = nearest_neighbor(features, metric=metric, order=order,
+                                        kind=kind, k=k, radius=radius, backend=backend, **kwargs)
+        W = sparse_distance_matrix(neighbors, distances, symmetrize=True, safe=True, kind = kind, k=k)
 
-        W = sparse.csc_matrix((spv, (spi, spj)), shape=(N, N))
+        if kernel_width is None:
+            if kind == 'knn':
+                kernel_width = np.mean(W.data) if W.nnz > 0 else np.nan
+            elif kind == 'radius':
+                kernel_width = radius / 2
 
-        # Sanity check
-        if np.shape(W)[0] != np.shape(W)[1]:
-            raise ValueError('Weight matrix W is not square')
+        if not callable(kernel):
+            try:
+                kernel = self._kernels[kernel]
+            except KeyError:
+                raise ValueError('Unknown kernel {}.'.format(kernel))
 
-        # Enforce symmetry. Note that checking symmetry with
-        # np.abs(W - W.T).sum() is as costly as the symmetrization itself.
-        W = utils.symmetrize(W, method=symmetrize_type)
+        assert np.all(W.data >= 0), 'Distance must be in [0, inf].'
+        W.data = kernel(W.data / kernel_width)
+        if not np.all((W.data >= 0) & (W.data <= 1)):
+            _logger.warning('Kernel returned similarity not in [0, 1].')
+
+        self.order = order
+        self.radius = radius
+        self.kernel_width = kernel_width
 
-        super(NNGraph, self).__init__(W, plotting=plotting,
-                                      coords=Xout, **kwargs)
+        super(NNGraph, self).__init__(W, coords=features, **params_graph)
 
     def _get_extra_repr(self):
-        return {'NNtype': self.NNtype,
-                'use_flann': self.use_flann,
-                'center': self.center,
-                'rescale': self.rescale,
-                'k': self.k,
-                'sigma': '{:.2f}'.format(self.sigma),
-                'epsilon': '{:.2f}'.format(self.epsilon),
-                'symmetrize_type': self.symmetrize_type,
-                'dist_type': self.dist_type,
-                'order': self.order}
+        attrs = {
+            'standardize': self.standardize,
+            'metric': self.metric,
+            'order': self.order,
+            'kind': self.kind,
+        }
+        if self.k is not None:
+            attrs['k'] = self.k
+        if self.radius is not None:
+            attrs['radius'] = '{:.2e}'.format(self.radius)
+        attrs.update({
+            'kernel': '{}'.format(self.kernel),
+            'kernel_width': '{:.2e}'.format(self.kernel_width),
+            'backend': self.backend,
+        })
+        return attrs
+
+    @staticmethod
+    def _kernel_rectangular(distance):
+        return (distance < 1).astype(np.float)
+
+    @staticmethod
+    def _kernel_triangular(distance, value_at_one=0.5):
+        distance = value_at_one * distance
+        return np.maximum(1 - distance, 0)
+
+    @staticmethod
+    def _kernel_exponential(distance, power=1, value_at_one=0.5):
+        cst = np.log(value_at_one)
+        return np.exp(cst * distance**power)
+
+    @staticmethod
+    def _kernel_powers(distance, pow1, pow2, value_at_one=0.5):
+        cst = (1 - value_at_one**(1/pow2))**(1/pow1)
+        distance = np.clip(cst * distance, 0, 1)
+        return (1 - distance**pow1)**pow2
+
+    @staticmethod
+    def _kernel_logistic(distance, value_at_one=0.5):
+        cst = 4 / value_at_one - 2
+        cst = np.log(0.5 * (cst + np.sqrt(cst**2 - 4)))
+        distance = cst * distance
+        return 4 / (np.exp(distance) + 2 + np.exp(-distance))
+
+    @staticmethod
+    def _kernel_sigmoid(distance, value_at_one=0.5):
+        cst = 2 / value_at_one
+        cst = np.log(0.5 * (cst + np.sqrt(cst**2 - 4)))
+        distance = cst * distance
+        return 2 / (np.exp(distance) + np.exp(-distance))
+
+    _kernels = {
+        'rectangular': _kernel_rectangular.__func__,
+        'triangular': _kernel_triangular.__func__,
+
+        'exponential': _kernel_exponential.__func__,
+        'gaussian': partial(_kernel_exponential.__func__, power=2),
+
+        'tricube': partial(_kernel_powers.__func__, pow1=3, pow2=3),
+        'triweight': partial(_kernel_powers.__func__, pow1=2, pow2=3),
+        'quartic': partial(_kernel_powers.__func__, pow1=2, pow2=2),
+        'epanechnikov': partial(_kernel_powers.__func__, pow1=2, pow2=1),
+
+        'logistic': _kernel_logistic.__func__,
+        'sigmoid': _kernel_sigmoid.__func__,
+    }
diff --git a/pygsp/graphs/nngraphs/sensor.py b/pygsp/graphs/nngraphs/sensor.py
index ac623a50..09764b02 100644
--- a/pygsp/graphs/nngraphs/sensor.py
+++ b/pygsp/graphs/nngraphs/sensor.py
@@ -75,9 +75,7 @@ def __init__(self, N=64, k=6, distributed=False, seed=None, **kwargs):
 
             coords = rs.uniform(0, 1, (N, 2))
 
-        super(Sensor, self).__init__(Xin=coords, k=k,
-                                     rescale=False, center=False,
-                                     plotting=plotting, **kwargs)
+        super(Sensor, self).__init__(coords, k=k, plotting=plotting, **kwargs)
 
     def _get_extra_repr(self):
         return {'k': self.k,
diff --git a/pygsp/graphs/nngraphs/sphere.py b/pygsp/graphs/nngraphs/sphere.py
index e375b5b4..1262b922 100644
--- a/pygsp/graphs/nngraphs/sphere.py
+++ b/pygsp/graphs/nngraphs/sphere.py
@@ -10,12 +10,12 @@ class Sphere(NNGraph):
 
     Parameters
     ----------
-    radius : float
-        Radius of the sphere (default = 1)
     nb_pts : int
         Number of vertices (default = 300)
     nb_dim : int
         Dimension (default = 3)
+    diameter : float
+        Radius of the sphere (default = 2)
     sampling : string
         Variance of the distance kernel (default = 'random')
         (Can now only be 'random')
@@ -35,14 +35,14 @@ class Sphere(NNGraph):
     """
 
     def __init__(self,
-                 radius=1,
                  nb_pts=300,
                  nb_dim=3,
+                 diameter=2,
                  sampling='random',
                  seed=None,
                  **kwargs):
 
-        self.radius = radius
+        self.diameter = diameter
         self.nb_pts = nb_pts
         self.nb_dim = nb_dim
         self.sampling = sampling
@@ -55,6 +55,7 @@ def __init__(self,
 
             for i in range(self.nb_pts):
                 pts[i] /= np.linalg.norm(pts[i])
+                pts[i] *= (diameter / 2)
 
         else:
 
@@ -64,12 +65,10 @@ def __init__(self,
             'vertex_size': 80,
         }
 
-        super(Sphere, self).__init__(Xin=pts, k=10,
-                                     center=False, rescale=False,
-                                     plotting=plotting, **kwargs)
+        super(Sphere, self).__init__(pts, k=10, plotting=plotting, **kwargs)
 
     def _get_extra_repr(self):
-        attrs = {'radius': '{:.2f}'.format(self.radius),
+        attrs = {'diameter': '{:.2e}'.format(self.diameter),
                  'nb_pts': self.nb_pts,
                  'nb_dim': self.nb_dim,
                  'sampling': self.sampling,
diff --git a/pygsp/graphs/nngraphs/twomoons.py b/pygsp/graphs/nngraphs/twomoons.py
index af87d0d8..1d7cdde1 100644
--- a/pygsp/graphs/nngraphs/twomoons.py
+++ b/pygsp/graphs/nngraphs/twomoons.py
@@ -16,7 +16,7 @@ class TwoMoons(NNGraph):
         two_moons graph or a synthesized one (default is 'standard').
         'standard' : Create a two_moons graph from a based graph.
         'synthesized' : Create a synthesized two_moon
-    sigmag : float
+    kernel_width : float
         Variance of the distance kernel (default = 0.05)
     dim : int
         The dimensionality of the points (default = 2).
@@ -24,7 +24,7 @@ class TwoMoons(NNGraph):
     N : int
         Number of vertices (default = 2000)
         Only valid for moontype == 'synthesized'.
-    sigmad : float
+    variance : float
         Variance of the data (do not set it too high or you won't see anything)
         (default = 0.05)
         Only valid for moontype == 'synthesized'.
@@ -44,11 +44,11 @@ class TwoMoons(NNGraph):
 
     """
 
-    def _create_arc_moon(self, N, sigmad, distance, number, seed):
+    def _create_arc_moon(self, N, variance, distance, number, seed):
         rs = np.random.RandomState(seed)
         phi = rs.rand(N, 1) * np.pi
         r = 1
-        rb = sigmad * rs.normal(size=(N, 1))
+        rb = variance * rs.normal(size=(N, 1))
         ab = rs.rand(N, 1) * 2 * np.pi
         b = rb * np.exp(1j * ab)
         bx = np.real(b)
@@ -63,29 +63,29 @@ def _create_arc_moon(self, N, sigmad, distance, number, seed):
 
         return np.concatenate((moonx, moony), axis=1)
 
-    def __init__(self, moontype='standard', dim=2, sigmag=0.05,
-                 N=400, sigmad=0.07, distance=0.5, seed=None, **kwargs):
+    def __init__(self, moontype='standard', dim=2, kernel_width=0.05,
+                 N=400, variance=0.07, distance=0.5, seed=None, **kwargs):
 
         self.moontype = moontype
         self.dim = dim
-        self.sigmag = sigmag
-        self.sigmad = sigmad
+        self.kernel_width = kernel_width
+        self.variance = variance
         self.distance = distance
         self.seed = seed
 
         if moontype == 'standard':
             N1, N2 = 1000, 1000
             data = utils.loadmat('pointclouds/two_moons')
-            Xin = data['features'][:dim].T
+            coords = data['features'][:dim].T
 
         elif moontype == 'synthesized':
             N1 = N // 2
             N2 = N - N1
 
-            coords1 = self._create_arc_moon(N1, sigmad, distance, 1, seed)
-            coords2 = self._create_arc_moon(N2, sigmad, distance, 2, seed)
+            coords1 = self._create_arc_moon(N1, variance, distance, 1, seed)
+            coords2 = self._create_arc_moon(N2, variance, distance, 2, seed)
 
-            Xin = np.concatenate((coords1, coords2))
+            coords = np.concatenate((coords1, coords2))
 
         else:
             raise ValueError('Unknown moontype {}'.format(moontype))
@@ -96,15 +96,14 @@ def __init__(self, moontype='standard', dim=2, sigmag=0.05,
             'vertex_size': 30,
         }
 
-        super(TwoMoons, self).__init__(Xin=Xin, sigma=sigmag, k=5,
-                                       center=False, rescale=False,
+        super(TwoMoons, self).__init__(coords, kernel_width=kernel_width, k=5,
                                        plotting=plotting, **kwargs)
 
     def _get_extra_repr(self):
         attrs = {'moontype': self.moontype,
                  'dim': self.dim,
-                 'sigmag': '{:.2f}'.format(self.sigmag),
-                 'sigmad': '{:.2f}'.format(self.sigmad),
+                 'kernel_width': '{:.2f}'.format(self.kernel_width),
+                 'variance': '{:.2f}'.format(self.variance),
                  'distance': '{:.2f}'.format(self.distance),
                  'seed': self.seed}
         attrs.update(super(TwoMoons, self)._get_extra_repr())
diff --git a/pygsp/reduction.py b/pygsp/reduction.py
index 50a8ae67..184d4b35 100644
--- a/pygsp/reduction.py
+++ b/pygsp/reduction.py
@@ -56,7 +56,7 @@ def graph_sparsify(M, epsilon, maxiter=10):
     Examples
     --------
     >>> from pygsp import reduction
-    >>> G = graphs.Sensor(256, k=20, distributed=True)
+    >>> G = graphs.Sensor(256, k=20, distributed=True, seed=0)
     >>> epsilon = 0.4
     >>> G2 = reduction.graph_sparsify(G, epsilon)
 
diff --git a/pygsp/tests/test_all.py b/pygsp/tests/test_all.py
index 2d52a185..7b8aaef8 100755
--- a/pygsp/tests/test_all.py
+++ b/pygsp/tests/test_all.py
@@ -14,9 +14,13 @@
 from pygsp.tests import test_docstrings
 from pygsp.tests import test_plotting
 from pygsp.tests import test_learning
+from pygsp.tests import test_nearest_neighbor
+from pygsp.tests import test_learn_graph
 
 
 suites = []
+suites.append(test_learn_graph.suite)
+suites.append(test_nearest_neighbor.suite)
 suites.append(test_graphs.suite)
 suites.append(test_filters.suite)
 suites.append(test_utils.suite)
diff --git a/pygsp/tests/test_docstrings.py b/pygsp/tests/test_docstrings.py
index d8f7ff45..279b65f2 100644
--- a/pygsp/tests/test_docstrings.py
+++ b/pygsp/tests/test_docstrings.py
@@ -41,3 +41,4 @@ def setup(doctest):
 suite_tutorials = test_docstrings('.', '.rst')
 
 suite = unittest.TestSuite([suite_reference, suite_tutorials])
+
diff --git a/pygsp/tests/test_filters.py b/pygsp/tests/test_filters.py
index 0bf066f5..d51243b5 100644
--- a/pygsp/tests/test_filters.py
+++ b/pygsp/tests/test_filters.py
@@ -149,7 +149,7 @@ def test_frame(self):
         def get_frame(freq_response):
             return self._G.U.dot(np.diag(freq_response).dot(self._G.U.T))
         gL = np.concatenate([get_frame(gl) for gl in g.evaluate(self._G.e)])
-        np.testing.assert_allclose(gL1, gL)
+        np.testing.assert_allclose(gL1, gL, atol=1e-10)
         np.testing.assert_allclose(gL2, gL, atol=1e-10)
 
     def test_complement(self, frame_bound=2.5):
@@ -350,3 +350,4 @@ def test_approximations(self):
 
 
 suite = unittest.TestLoader().loadTestsFromTestCase(TestCase)
+
diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py
index d1e1c1af..040b87c3 100644
--- a/pygsp/tests/test_graphs.py
+++ b/pygsp/tests/test_graphs.py
@@ -9,6 +9,7 @@
 
 import os
 import random
+
 import sys
 import unittest
 
@@ -16,11 +17,15 @@
 import scipy.linalg
 from scipy import sparse
 import networkx as nx
-import graph_tool as gt
-import graph_tool.generation
 from skimage import data, img_as_float
 
 from pygsp import graphs
+from pygsp import filters
+
+import graph_tool as gt
+import graph_tool.generation
+
+
 
 
 class TestCase(unittest.TestCase):
@@ -448,6 +453,96 @@ def test_set_coordinates(self):
         G.set_coordinates('community2D')
         self.assertRaises(ValueError, G.set_coordinates, 'invalid')
 
+
+    def test_nngraph(self, n_vertices=24):
+        """Test all the combinations of metric, kind, backend."""
+        Graph = graphs.NNGraph
+        data = np.random.RandomState(42).uniform(size=(n_vertices, 3))
+        metrics = ['euclidean', 'manhattan', 'max_dist', 'minkowski']
+        # Not testing , 'flann', 'nmslib' as they are tested in test_nearest_neighbor
+        backends = ['scipy-kdtree', 'scipy-ckdtree'] 
+
+        for metric in metrics:
+            for kind in ['knn', 'radius']:
+                params = dict(features=data, metric=metric, kind=kind, k=6)
+                ref = Graph(backend='scipy-pdist', **params)
+                for backend in backends:
+                    graph = Graph(**params)
+                    np.testing.assert_allclose(graph.W.toarray(),
+                                                   ref.W.toarray(), rtol=1e-5)
+
+        # Distances between points on a circle.
+        angles = [0, 2 * np.pi / n_vertices]
+        points = np.stack([np.cos(angles), np.sin(angles)], axis=1)
+        distance = np.linalg.norm(points[0] - points[1])
+        weight = np.exp(-np.log(2) * distance**2)
+        column = np.zeros(n_vertices)
+        column[1] = weight
+        column[-1] = weight
+        adjacency = scipy.linalg.circulant(column)
+        data = graphs.Ring(n_vertices).coords
+        for kind in ['knn', 'radius']:
+            for backend in backends + ['scipy-pdist']:
+                graph = Graph(data, kind=kind, k=2, radius=1.01*distance,
+                              kernel_width=1, backend=backend)
+                np.testing.assert_allclose(graph.W.toarray(), adjacency)
+
+        graph = Graph(data, kind='radius', radius='estimate')
+        np.testing.assert_allclose(graph.radius, np.sqrt(8 / n_vertices))
+        graph = Graph(data, kind='radius', k=2, radius='estimate-knn')
+        np.testing.assert_allclose(graph.radius, distance)
+
+        graph = Graph(data, standardize=True)
+        np.testing.assert_allclose(np.mean(graph.coords, axis=0), 0, atol=1e-7)
+        np.testing.assert_allclose(np.std(graph.coords, axis=0), 1)
+
+        # Invalid parameters.
+        self.assertRaises(ValueError, Graph, np.ones(n_vertices))
+        self.assertRaises(ValueError, Graph, np.ones((n_vertices, 3, 4)))
+        self.assertRaises(ValueError, Graph, data, metric='invalid')
+        self.assertRaises(ValueError, Graph, data, kind='invalid')
+        self.assertRaises(ValueError, Graph, data, kernel='invalid')
+        self.assertRaises(ValueError, Graph, data, backend='invalid')
+        self.assertRaises(ValueError, Graph, data, kind='knn', k=0)
+        self.assertRaises(ValueError, Graph, data, kind='knn', k=n_vertices)
+        self.assertRaises(ValueError, Graph, data, kind='radius', radius=0)
+
+        # Empty graph.
+        if sys.version_info > (3, 4):  # no assertLogs in python 2.7
+            for backend in backends + ['scipy-pdist']:
+                if backend == 'nmslib':
+                    continue  # nmslib doesn't support radius
+                with self.assertLogs(level='WARNING'):
+                    graph = Graph(data, kind='radius', radius=1e-9,
+                                  backend=backend)
+                self.assertEqual(graph.n_edges, 0)
+
+        # Backend parameters.
+        Graph(data, lap_type='normalized')
+        Graph(data, plotting=dict(vertex_size=10))
+        Graph(data, backend='flann', algorithm='kmeans')
+        Graph(data, backend='nmslib', method='vptree')
+        Graph(data, backend='nmslib', index=dict(post=2))
+        Graph(data, backend='nmslib', query=dict(efSearch=100))
+        for backend in ['scipy-kdtree', 'scipy-ckdtree']:
+            Graph(data, backend=backend, eps=1e-2)
+            Graph(data, backend=backend, leafsize=9)
+        self.assertRaises(ValueError, Graph, data, backend='scipy-pdist', a=0)
+
+        # Kernels.
+        for name, kernel in graphs.NNGraph._kernels.items():
+            similarity = 0 if name == 'rectangular' else 0.5
+            np.testing.assert_allclose(kernel(np.ones(10)), similarity)
+            np.testing.assert_allclose(kernel(np.zeros(10)), 1)
+        Graph(data, kernel=lambda d: d.min()/d)
+        if sys.version_info > (3, 4):  # no assertLogs in python 2.7
+            with self.assertLogs(level='WARNING'):
+                Graph(data, kernel=lambda d: 1/d)
+
+        # Attributes.
+        self.assertEqual(Graph(data, kind='knn').radius, None)
+        self.assertEqual(Graph(data, kind='radius').k, None)
+
     def test_line_graph(self):
         adjacency = [
             [0, 1, 1, 3],
@@ -492,24 +587,6 @@ def test_subgraph(self, n_vertices=100):
         self.assertIs(graph.lap_type, self._G.lap_type)
         self.assertEqual(graph.plotting, self._G.plotting)
 
-    def test_nngraph(self, n_vertices=30):
-        rs = np.random.RandomState(42)
-        Xin = rs.normal(size=(n_vertices, 3))
-        dist_types = ['euclidean', 'manhattan', 'max_dist', 'minkowski']
-
-        for dist_type in dist_types:
-
-            # Only p-norms with 1<=p<=infinity permitted.
-            if dist_type != 'minkowski':
-                graphs.NNGraph(Xin, NNtype='radius', dist_type=dist_type)
-                graphs.NNGraph(Xin, NNtype='knn', dist_type=dist_type)
-
-            # Distance type unsupported in the C bindings,
-            # use the C++ bindings instead.
-            if dist_type != 'max_dist':
-                graphs.NNGraph(Xin, use_flann=True, NNtype='knn',
-                               dist_type=dist_type)
-
     def test_bunny(self):
         graphs.Bunny()
 
@@ -635,6 +712,35 @@ def test_grid2d_diagonals(self):
         self.assertEqual(G.W[9, 16], 1.0)
         self.assertEqual(G.W[20, 27], 1.0)
 
+    def test_learn_graph(self):
+        from pygsp.graphs import LearnedFromSmoothSignals
+        from pygsp.utils import distanz
+        from pygsp.graphs.learned import gsp_compute_graph_learning_theta
+        
+        # Create a bunch of signals
+        n=100
+        d = 400
+        k = 6
+        coords = np.random.RandomState(0).uniform(size=(n, 2))
+        G = graphs.NNGraph(coords,k=10, kernel='gaussian')
+        G.compute_fourier_basis()
+        g = filters.Filter(G,lambda x:1/(1+5*x))
+        S = np.random.randn(n,d)
+        X = np.squeeze(g.filter(S))
+
+        # Test if large scale and small scale are close
+        G_learned1 = LearnedFromSmoothSignals(X, k=k, sparse=False)
+        G_learned2 = LearnedFromSmoothSignals(X,k=k, sparse=True)
+        assert(np.sum(np.abs(G_learned2.W.todense()-G_learned1.W.todense()))/np.sum(np.abs(G_learned1.W.todense()))<1e-4)
+
+        # Test automatic parameters tuning
+        Z = distanz(X.transpose())
+        theta, theta_min, theta_max = gsp_compute_graph_learning_theta(Z, k)
+        a = 1/theta
+        b = a
+        G_learned3 = LearnedFromSmoothSignals(X,a=a,b=b, sparse=False)
+        assert(np.sum(np.abs(G_learned3.W.todense()-G_learned1.W.todense()))/np.sum(np.abs(G_learned1.W.todense()))<1e-3)
+
 suite_graphs = unittest.TestLoader().loadTestsFromTestCase(TestCase)
 
 
@@ -897,3 +1003,4 @@ def test_import_errors(self):
 
 suite_import_export = unittest.TestLoader().loadTestsFromTestCase(TestImportExport)
 suite = unittest.TestSuite([suite_graphs, suite_import_export])
+
diff --git a/pygsp/tests/test_learn_graph.py b/pygsp/tests/test_learn_graph.py
new file mode 100644
index 00000000..6558d410
--- /dev/null
+++ b/pygsp/tests/test_learn_graph.py
@@ -0,0 +1,112 @@
+from __future__ import division
+
+import unittest
+import numpy as np
+from pygsp._nearest_neighbor import nearest_neighbor as nn
+from pygsp._nearest_neighbor import sparse_distance_matrix
+from scipy import sparse
+import pygsp as pg
+from pygsp.utils import distanz
+from pygsp.graphs.learned import *
+
+class TestCase(unittest.TestCase):
+    def test_prox_sum_log(self):
+        np.testing.assert_array_less(0,prox_sum_log(np.random.randn((10)),0.1))
+    def test_isvector(self):
+        assert(isvector(4)==False)
+        assert(isvector([4,4])==False)
+        assert(isvector(np.array([4,4,4]))==True)
+        assert(isvector(np.zeros([3,4]))==False)
+        assert(isvector(np.zeros([3]))==True)
+        assert(isvector(np.zeros([3,0]))==False)
+        assert(isvector(np.zeros([3,1]))==False)
+        assert(isvector(sparse.csr_matrix((10,1)))==True)
+        
+    def test_issymetric(self):
+        data = np.random.RandomState(42).uniform(size=(100, 3))
+        # neighbors, distances = nn(data, backend='flann')
+        neighbors, distances = nn(data)
+        W = sparse_distance_matrix(neighbors, distances)
+        assert(issymetric(W))
+        W3 = W.copy()
+        W3[:,0] = -W3[:,0]
+        assert(issymetric(W3)==False)
+        
+    def test_squareform_sp(self):
+        data = np.random.RandomState(42).uniform(size=(100, 3))
+        # neighbors, distances = nn(data, backend='flann')
+        neighbors, distances = nn(data)
+        W = sparse_distance_matrix(neighbors, distances)
+        w = squareform_sp(W)
+        W2 = squareform_sp(w)
+        np.testing.assert_array_almost_equal(W.todense(), W2.todense())
+        
+    def test_sum_squareform(self):
+        nt =10
+        # a) Create a random symetric matrix
+        A = np.random.rand(nt,nt)
+        A = A +A.transpose()
+        A = A - np.diag(np.diag(A))
+        # b) Make the test
+        res1 = np.sum(A, axis=1)
+        a = squareform_sp(A)
+        S,St = sum_squareform(nt)
+        res2 = S.dot(a)
+        np.testing.assert_allclose(res1,res2)
+        np.testing.assert_array_equal(S.transpose().todense(), St.todense())
+        
+    def test_sum_squareform_sparse(self):
+        data = np.random.uniform(size=(100, 3))
+        # neighbors, distances = nn(data, backend='flann')
+        neighbors, distances = nn(data)
+        W = sparse_distance_matrix(neighbors, distances)
+        nt = W.shape[0]
+        # b) Make the test
+        res1 = np.squeeze(np.asarray(np.sum(W.todense(), axis=1)))
+        w = squareform_sp(W)
+        S,St = sum_squareform(nt, w)
+        wfull = w.data
+        res2 = S.dot(wfull)
+        np.testing.assert_allclose(res1,res2)
+        np.testing.assert_array_equal(S.transpose().todense(), St.todense())
+        
+    def test_linmap(self):
+        np.testing.assert_array_almost_equal(lin_map(np.arange(10)/10,[0,10],[0,1]),np.arange(10))
+        np.testing.assert_array_almost_equal(lin_map(np.arange(11)/10,[0,10]),np.arange(11))
+        
+    def test_norm_S(self):
+        edge_mask = None
+        for n in range(15):
+            S, St = sum_squareform(10, mask=edge_mask)
+            res1 = np.linalg.norm(S.todense(),2)
+            res2 = norm_S(S)
+            np.testing.assert_allclose(res1,res2)
+            
+    def test_learn_graph_log(self):
+        # Create a bunch of signals
+        n=100
+        d = 400
+        G = pg.graphs.Sensor(N=n,k=6, seed=0)
+        G.compute_fourier_basis()
+        # g = pg.filters.Heat(G, scale=5)
+        g = pg.filters.Filter(G,lambda x:1/(1+5*x))
+        S = np.random.randn(n,d)
+        X = np.squeeze(g.filter(S))
+        
+        Z = distanz(X.transpose())
+        k=6
+        theta, theta_min, theta_max = gsp_compute_graph_learning_theta(Z, k)
+        learned_W, _ = learn_graph_log_degree(theta*Z, verbosity=0)
+        
+        neighbors, distances = nn(X, k=3*k, kind='knn')  
+        np.testing.assert_equal(distances[:,0],0)
+        dmat = distances[:,1:]
+        theta2, theta_min2, theta_max2 = gsp_compute_graph_learning_theta(dmat, k)
+        np.testing.assert_allclose(theta, theta2)
+        np.testing.assert_allclose(theta_min, theta_min2)
+        np.testing.assert_allclose(theta_max, theta_max2)
+        W = sparse_distance_matrix(neighbors, distances)
+        learned_W2, _ = learn_graph_log_degree(W*theta2, edge_mask=W>0, verbosity=0)
+        assert(np.sum(np.abs(learned_W2.todense()-learned_W))/np.sum(np.abs(learned_W))<3e-3)
+        
+suite = unittest.TestLoader().loadTestsFromTestCase(TestCase)
\ No newline at end of file
diff --git a/pygsp/tests/test_nearest_neighbor.py b/pygsp/tests/test_nearest_neighbor.py
new file mode 100644
index 00000000..c11d38b7
--- /dev/null
+++ b/pygsp/tests/test_nearest_neighbor.py
@@ -0,0 +1,79 @@
+import unittest
+import numpy as np
+from pygsp._nearest_neighbor import nearest_neighbor, sparse_distance_matrix
+
+class TestCase(unittest.TestCase):
+    def test_nngraph(self, n_vertices=100):
+        data = np.random.RandomState(42).uniform(size=(n_vertices, 3))
+        metrics = ['euclidean', 'manhattan', 'max_dist', 'minkowski']
+        backends = ['scipy-kdtree', 'scipy-ckdtree', 'flann', 'nmslib']
+
+        for metric in metrics:
+            for kind in ['knn', 'radius']:
+                for backend in backends:
+                    params = dict(features=data, metric=metric, kind=kind, radius=0.25, k=8)
+                    
+                    ref_nn, ref_d = nearest_neighbor(backend='scipy-pdist', **params)
+                    # Unsupported combinations.
+                    if backend == 'flann' and metric == 'max_dist':
+                        self.assertRaises(ValueError, nearest_neighbor, data,
+                                          metric=metric, backend=backend)
+                    elif backend == 'nmslib' and metric == 'minkowski':
+                        self.assertRaises(ValueError, nearest_neighbor, data,
+                                          metric=metric, backend=backend)
+                    elif backend == 'nmslib' and kind == 'radius':
+                        self.assertRaises(ValueError, nearest_neighbor, data,
+                                          kind=kind, backend=backend)
+                    else:
+                        params['backend'] = backend
+                        if backend == 'flann':
+                            other_nn, other_d = nearest_neighbor(random_seed=44, target_precision=1, **params)
+                        else:
+                            other_nn, other_d = nearest_neighbor(**params)
+                        print(kind, backend)
+                        if backend == 'flann':
+                            for a,b in zip(ref_nn, other_nn):
+                                assert(len(set(a)-set(b))<=2)
+
+                            for a,b in zip(ref_d, other_d):
+                                np.testing.assert_allclose(np.mean(a).astype(np.float32),np.mean(b), atol=2e-2)                            
+                        else:
+                            for a,b,c,d in zip(ref_nn, other_nn, ref_d, other_d):
+                                e = set(a)-set(b)
+                                assert(len(e)<=1)
+                                if len(e)==0:
+                                    np.testing.assert_allclose(np.sort(c),np.sort(d), rtol=1e-5)
+
+    def test_sparse_distance_matrix(self):
+        data = np.random.RandomState(42).uniform(size=(200, 3))
+        neighbors, distances = nearest_neighbor(data)
+        W = sparse_distance_matrix(neighbors, distances, symmetrize=True)
+        # Assert symetry
+        np.testing.assert_allclose(W.todense(), W.T.todense())
+        # positivity
+        np.testing.assert_array_equal(W.todense()>=0, True)
+        # 0 diag
+        np.testing.assert_array_equal(np.diag(W.todense())==0, True)
+
+        # Assert that it is not symmetric anymore
+        W = sparse_distance_matrix(neighbors, distances, symmetrize=False)
+        assert(np.sum(np.abs(W.todense()-W.T.todense()))>0.1)
+        # positivity
+        np.testing.assert_array_equal(W.todense()>=0, True)
+        # 0 diag
+        np.testing.assert_array_equal(np.diag(W.todense())==0, True)
+        # everything is used once
+        np.testing.assert_allclose(np.sum(W.todense()), np.sum(distances))
+
+        # simple test with a kernel
+        W = sparse_distance_matrix(neighbors, 1/(1+distances), symmetrize=True)
+        # Assert symetry
+        np.testing.assert_allclose(W.todense(), W.T.todense())
+        # positivity
+        np.testing.assert_array_equal(W.todense()>=0, True)
+        # 0 diag
+        np.testing.assert_array_equal(np.diag(W.todense())==0, True)
+
+
+suite = unittest.TestLoader().loadTestsFromTestCase(TestCase)
+
diff --git a/pygsp/tests/test_plotting.py b/pygsp/tests/test_plotting.py
index 822d3127..70b56464 100644
--- a/pygsp/tests/test_plotting.py
+++ b/pygsp/tests/test_plotting.py
@@ -39,6 +39,7 @@ def test_plot_graphs(self):
             'FullConnected',
             'RandomRegular',
             'StochasticBlockModel',
+            'LearnGraph',
             }
 
         # Coordinates are not in 2D or 3D.
@@ -50,8 +51,8 @@ def test_plot_graphs(self):
 
             # Classes who require parameters.
             if classname == 'NNGraph':
-                Xin = np.arange(90).reshape(30, 3)
-                Gs.append(Graph(Xin))
+                features = np.random.RandomState(42).normal(size=(30, 3))
+                Gs.append(Graph(features))
             elif classname in ['ImgPatches', 'Grid2dImgPatches']:
                 Gs.append(Graph(img=self._img, patch_shape=(3, 3)))
             elif classname == 'LineGraph':
@@ -169,4 +170,4 @@ def test_unknown_backend(self):
         self.assertRaises(ValueError, G.plot, backend='abc')
 
 
-suite = unittest.TestLoader().loadTestsFromTestCase(TestCase)
+suite = unittest.TestLoader().loadTestsFromTestCase(TestCase)
\ No newline at end of file
diff --git a/pygsp/utils.py b/pygsp/utils.py
index 94474eb1..4dd0f6ef 100644
--- a/pygsp/utils.py
+++ b/pygsp/utils.py
@@ -26,12 +26,12 @@ def build_logger(name):
         formatter = logging.Formatter(
             "%(asctime)s:[%(levelname)s](%(name)s.%(funcName)s): %(message)s")
 
-        steam_handler = logging.StreamHandler()
-        steam_handler.setLevel(logging.DEBUG)
-        steam_handler.setFormatter(formatter)
+        stream_handler = logging.StreamHandler()
+        stream_handler.setLevel(logging.DEBUG)
+        stream_handler.setFormatter(formatter)
 
         logger.setLevel(logging.DEBUG)
-        logger.addHandler(steam_handler)
+        logger.addHandler(stream_handler)
 
     return logger
 
@@ -91,7 +91,7 @@ def loadmat(path):
 
 def distanz(x, y=None):
     r"""
-    Calculate the distance between two colon vectors.
+    Calculate the distance between two vectors.
 
     Parameters
     ----------
@@ -115,19 +115,16 @@ def distanz(x, y=None):
            [2., 1., 0.]])
 
     """
-    try:
-        x.shape[1]
-    except IndexError:
-        x = x.reshape(1, x.shape[0])
+    if len(x.shape)<2:
+        x = np.expand_dims(x, axis=0)
 
     if y is None:
         y = x
-
+        diag_zero = True
     else:
-        try:
-            y.shape[1]
-        except IndexError:
-            y = y.reshape(1, y.shape[0])
+        diag_zero = False
+        if len(y.shape)<2:
+            y = np.expand_dims(y, axis=0)
 
     rx, cx = x.shape
     ry, cy = y.shape
@@ -142,7 +139,10 @@ def distanz(x, y=None):
 
     d = abs(np.kron(np.ones((cy, 1)), xx).T +
             np.kron(np.ones((cx, 1)), yy) - 2 * xy)
-
+    
+    if diag_zero:
+        # Remove diagonal errors
+        d = d-np.diag(np.diag(d))
     return np.sqrt(d)
 
 
diff --git a/setup.py b/setup.py
index eb555161..fa092a00 100644
--- a/setup.py
+++ b/setup.py
@@ -41,8 +41,8 @@
             # Construct patch graphs from images.
             'scikit-image',
             # Approximate nearest neighbors for kNN graphs.
-            'pyflann; python_version == "2.*"',
-            'pyflann3; python_version == "3.*"',
+            'cyflann',
+            'nmslib',
             # Convex optimization on graph.
             'pyunlocbox',
             # Plot graphs, signals, and filters.