diff --git a/CHANGES.rst b/CHANGES.rst
index 352e52e95e042248d32ee85e36c42b4c7ba2ebc4..2e7459b141cbace952d5bdabc3086d877418fedd 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -11,6 +11,8 @@ Features added
 * Support for coverage.py 4.0+ can be enabled by adding the plugin
   "Cython.Coverage" to the ".coveragerc" config file.
 
+* Tracing is supported in ``nogil`` functions/sections.
+
 Bugs fixed
 ----------
 
diff --git a/Cython/Compiler/Code.pxd b/Cython/Compiler/Code.pxd
index 9d1d651b0c56a17f4d9c1f01a7492b0735e477d9..46087da07d476afef27552f142a1c870200c8f2b 100644
--- a/Cython/Compiler/Code.pxd
+++ b/Cython/Compiler/Code.pxd
@@ -31,9 +31,10 @@ cdef class FunctionState:
 
     cdef public object return_from_error_cleanup_label # not used in __init__ ?
 
-    cdef public bint in_try_finally
     cdef public object exc_vars
+    cdef public bint in_try_finally
     cdef public bint can_trace
+    cdef public bint gil_owned
 
     cdef public list temps_allocated
     cdef public dict temps_free
diff --git a/Cython/Compiler/Code.py b/Cython/Compiler/Code.py
index eb85040b1e08e83511b2387db51f19908b674404..5eaa1bf77693c6775efdf58187bdd64c07e91a75 100644
--- a/Cython/Compiler/Code.py
+++ b/Cython/Compiler/Code.py
@@ -518,6 +518,7 @@ class FunctionState(object):
         self.in_try_finally = 0
         self.exc_vars = None
         self.can_trace = False
+        self.gil_owned = True
 
         self.temps_allocated = [] # of (name, type, manage_ref, static)
         self.temps_free = {} # (type, manage_ref) -> list of free vars with same type/managed status
@@ -1572,7 +1573,8 @@ class CCodeWriter(object):
         if (self.funcstate and self.funcstate.can_trace
                 and self.globalstate.directives['linetrace']):
             self.indent()
-            self.write('__Pyx_TraceLine(%d)\n' % self.marker[0])
+            self.write('__Pyx_TraceLine(%d,%d)\n' % (
+                self.marker[0], not self.funcstate.gil_owned))
         self.last_marker_line = self.marker[0]
         self.marker = None
 
@@ -2093,17 +2095,18 @@ class CCodeWriter(object):
         self.globalstate.use_utility_code(
             UtilityCode.load_cached("WriteUnraisableException", "Exceptions.c"))
 
-    def put_trace_declarations(self, codeobj=None):
-        self.putln('__Pyx_TraceDeclarations(%s)' % (codeobj or 'NULL'))
+    def put_trace_declarations(self, codeobj=None, nogil=False):
+        self.putln('__Pyx_TraceDeclarations(%s, %d)' % (codeobj or 'NULL', nogil))
 
-    def put_trace_call(self, name, pos):
-        self.putln('__Pyx_TraceCall("%s", %s[%s], %s);' % (name, Naming.filetable_cname, self.lookup_filename(pos[0]), pos[1]))
+    def put_trace_call(self, name, pos, nogil=False):
+        self.putln('__Pyx_TraceCall("%s", %s[%s], %s, %d);' % (
+            name, Naming.filetable_cname, self.lookup_filename(pos[0]), pos[1], nogil))
 
     def put_trace_exception(self):
         self.putln("__Pyx_TraceException();")
 
-    def put_trace_return(self, retvalue_cname):
-        self.putln("__Pyx_TraceReturn(%s);" % retvalue_cname)
+    def put_trace_return(self, retvalue_cname, nogil=False):
+        self.putln("__Pyx_TraceReturn(%s, %d);" % (retvalue_cname, nogil))
 
     def putln_openmp(self, string):
         self.putln("#ifdef _OPENMP")
diff --git a/Cython/Compiler/Nodes.py b/Cython/Compiler/Nodes.py
index 472ea2be3910fcd197dfc0b4fb20cd27d3eb8698..dd12eebd7ffe2802cc75add8593732e56ec4c11a 100644
--- a/Cython/Compiler/Nodes.py
+++ b/Cython/Compiler/Nodes.py
@@ -1698,9 +1698,6 @@ class FuncDefNode(StatNode, BlockNode):
 
         profile = code.globalstate.directives['profile']
         linetrace = code.globalstate.directives['linetrace']
-        if (linetrace or profile) and lenv.nogil:
-            warning(self.pos, "Cannot profile nogil function.", 1)
-            profile = linetrace = False
         if profile or linetrace:
             code.globalstate.use_utility_code(
                 UtilityCode.load_cached("Profile", "Profile.c"))
@@ -1708,6 +1705,7 @@ class FuncDefNode(StatNode, BlockNode):
         # Generate C code for header and body of function
         code.enter_cfunc_scope()
         code.return_from_error_cleanup_label = code.new_label()
+        code.funcstate.gil_owned = not lenv.nogil
 
         # ----- Top-level constants used by this function
         code.mark_pos(self.pos)
@@ -1764,7 +1762,7 @@ class FuncDefNode(StatNode, BlockNode):
 
         if profile or linetrace:
             code_object = self.code_object.calculate_result_code(code) if self.code_object else None
-            code.put_trace_declarations(code_object)
+            code.put_trace_declarations(code_object, nogil=not code.funcstate.gil_owned)
 
         # ----- Extern library function declarations
         lenv.generate_library_function_declarations(code)
@@ -1775,10 +1773,9 @@ class FuncDefNode(StatNode, BlockNode):
         # See if we need to acquire the GIL for variable declarations, or for
         # refnanny only
 
-        # Profiling or closures are not currently possible for cdef nogil
-        # functions, but check them anyway
-        have_object_args = (self.needs_closure or self.needs_outer_scope or
-                            profile or linetrace)
+        # Closures are not currently possible for cdef nogil functions,
+        # but check them anyway
+        have_object_args = self.needs_closure or self.needs_outer_scope
         for arg in lenv.arg_entries:
             if arg.type.is_pyobject:
                 have_object_args = True
@@ -1796,6 +1793,7 @@ class FuncDefNode(StatNode, BlockNode):
 
         if acquire_gil or acquire_gil_for_var_decls_only:
             code.put_ensure_gil()
+            code.funcstate.gil_owned = True
         elif lenv.nogil and lenv.has_with_gil_block:
             code.declare_gilstate()
 
@@ -1855,7 +1853,7 @@ class FuncDefNode(StatNode, BlockNode):
         if profile or linetrace:
             # this looks a bit late, but if we don't get here due to a
             # fatal error before hand, it's not really worth tracing
-            code.put_trace_call(self.entry.name, self.pos)
+            code.put_trace_call(self.entry.name, self.pos, nogil=not code.funcstate.gil_owned)
             code.funcstate.can_trace = True
         # ----- Fetch arguments
         self.generate_argument_parsing_code(env, code)
@@ -1874,8 +1872,7 @@ class FuncDefNode(StatNode, BlockNode):
             #       incref our arguments
             elif (is_cdef and entry.type.is_memoryviewslice and
                   len(entry.cf_assignments) > 1):
-                code.put_incref_memoryviewslice(entry.cname,
-                                                have_gil=not lenv.nogil)
+                code.put_incref_memoryviewslice(entry.cname, have_gil=code.funcstate.gil_owned)
         for entry in lenv.var_entries:
             if entry.is_arg and len(entry.cf_assignments) > 1:
                 code.put_var_incref(entry)
@@ -1894,6 +1891,7 @@ class FuncDefNode(StatNode, BlockNode):
 
         if acquire_gil_for_var_decls_only:
             code.put_release_ensured_gil()
+            code.funcstate.gil_owned = False
 
         # -------------------------
         # ----- Function body -----
@@ -2054,9 +2052,9 @@ class FuncDefNode(StatNode, BlockNode):
         if profile or linetrace:
             code.funcstate.can_trace = False
             if self.return_type.is_pyobject:
-                code.put_trace_return(Naming.retval_cname)
+                code.put_trace_return(Naming.retval_cname, nogil=not code.funcstate.gil_owned)
             else:
-                code.put_trace_return("Py_None")
+                code.put_trace_return("Py_None", nogil=not code.funcstate.gil_owned)
 
         if not lenv.nogil:
             # GIL holding function
@@ -2065,6 +2063,7 @@ class FuncDefNode(StatNode, BlockNode):
         if acquire_gil or (lenv.nogil and lenv.has_with_gil_block):
             # release the GIL (note that with-gil blocks acquire it on exit in their EnsureGILNode)
             code.put_release_ensured_gil()
+            code.funcstate.gil_owned = False
 
         if not self.return_type.is_void:
             code.putln("return %s;" % Naming.retval_cname)
@@ -7073,21 +7072,20 @@ class GILStatNode(NogilTryFinallyStatNode):
         else:
             variable = None
 
-        old_trace_config = code.funcstate.can_trace
+        old_gil_config = code.funcstate.gil_owned
         if self.state == 'gil':
             code.put_ensure_gil(variable=variable)
-            # FIXME: not that easy, tracing may not be possible at all here
-            #code.funcstate.can_trace = True
+            code.funcstate.gil_owned = True
         else:
             code.put_release_gil(variable=variable)
-            code.funcstate.can_trace = False
+            code.funcstate.gil_owned = False
 
         TryFinallyStatNode.generate_execution_code(self, code)
 
         if self.state_temp:
             self.state_temp.release(code)
 
-        code.funcstate.can_trace = old_trace_config
+        code.funcstate.gil_owned = old_gil_config
         code.end_block()
 
 
diff --git a/Cython/Utility/Profile.c b/Cython/Utility/Profile.c
index 43a065317459345120491129e05e68be21faa735..48a09ea3ee40af17a1fe210347e2dd827ee94d2d 100644
--- a/Cython/Utility/Profile.c
+++ b/Cython/Utility/Profile.c
@@ -12,6 +12,10 @@
   #define CYTHON_TRACE 0
 #endif
 
+#ifndef CYTHON_TRACE_NOGIL
+  #define CYTHON_TRACE_NOGIL 0
+#endif
+
 #if CYTHON_TRACE
   #undef CYTHON_PROFILE_REUSE_FRAME
 #endif
@@ -28,25 +32,47 @@
 
   #if CYTHON_PROFILE_REUSE_FRAME
     #define CYTHON_FRAME_MODIFIER static
-    #define CYTHON_FRAME_DEL
+    #define CYTHON_FRAME_DEL(frame)
   #else
     #define CYTHON_FRAME_MODIFIER
-    #define CYTHON_FRAME_DEL Py_CLEAR($frame_cname)
+    #define CYTHON_FRAME_DEL(frame) Py_CLEAR(frame)
   #endif
 
-  #define __Pyx_TraceDeclarations(codeobj)                            \
+  #define __Pyx_TraceDeclarations(codeobj, nogil)                     \
   static PyCodeObject *$frame_code_cname = NULL;                      \
   CYTHON_FRAME_MODIFIER PyFrameObject *$frame_cname = NULL;           \
   int __Pyx_use_tracing = 0;                                          \
   if (codeobj) $frame_code_cname = (PyCodeObject*) codeobj;
 
-  #define __Pyx_TraceCall(funcname, srcfile, firstlineno)                            \
-  {   PyThreadState* tstate = PyThreadState_GET();                                   \
-      if (unlikely(tstate->use_tracing) && !tstate->tracing &&                       \
-              (tstate->c_profilefunc || (CYTHON_TRACE && tstate->c_tracefunc))) {    \
+  #ifdef WITH_THREAD
+  #define __Pyx_TraceCall(funcname, srcfile, firstlineno, nogil)                         \
+  if (nogil) {                                                                           \
+      if (CYTHON_TRACE_NOGIL) {                                                          \
+          PyThreadState *tstate;                                                         \
+          PyGILState_STATE state = PyGILState_Ensure();                                  \
+          tstate = PyThreadState_GET();                                                  \
+          if (unlikely(tstate->use_tracing) && !tstate->tracing &&                       \
+                  (tstate->c_profilefunc || (CYTHON_TRACE && tstate->c_tracefunc))) {    \
+              __Pyx_use_tracing = __Pyx_TraceSetupAndCall(&$frame_code_cname, &$frame_cname, funcname, srcfile, firstlineno);  \
+          }                                                                              \
+          PyGILState_Release(state);                                                     \
+      }                                                                                  \
+  } else {                                                                               \
+      PyThreadState* tstate = PyThreadState_GET();                                       \
+      if (unlikely(tstate->use_tracing) && !tstate->tracing &&                           \
+              (tstate->c_profilefunc || (CYTHON_TRACE && tstate->c_tracefunc))) {        \
           __Pyx_use_tracing = __Pyx_TraceSetupAndCall(&$frame_code_cname, &$frame_cname, funcname, srcfile, firstlineno);  \
-      }                                                                              \
+      }                                                                                  \
   }
+  #else
+  #define __Pyx_TraceCall(funcname, srcfile, firstlineno, nogil)                         \
+  {   PyThreadState* tstate = PyThreadState_GET();                                       \
+      if (unlikely(tstate->use_tracing) && !tstate->tracing &&                           \
+              (tstate->c_profilefunc || (CYTHON_TRACE && tstate->c_tracefunc))) {        \
+          __Pyx_use_tracing = __Pyx_TraceSetupAndCall(&$frame_code_cname, &$frame_cname, funcname, srcfile, firstlineno);  \
+      }                                                                                  \
+  }
+  #endif
 
   #define __Pyx_TraceException()                                                           \
   if (likely(!__Pyx_use_tracing)); else {                                                  \
@@ -69,36 +95,60 @@
       }                                                                                    \
   }
 
-  #define __Pyx_TraceReturn(result)                                                       \
+  static void __Pyx_call_return_trace_func(PyThreadState *tstate, PyFrameObject *frame, PyObject *result) {
+      PyObject *type, *value, *traceback;
+      PyErr_Fetch(&type, &value, &traceback);
+      tstate->tracing++;
+      tstate->use_tracing = 0;
+      if (CYTHON_TRACE && tstate->c_tracefunc)
+          tstate->c_tracefunc(tstate->c_traceobj, frame, PyTrace_RETURN, result);
+      if (tstate->c_profilefunc)
+          tstate->c_profilefunc(tstate->c_profileobj, frame, PyTrace_RETURN, result);
+      CYTHON_FRAME_DEL(frame);
+      tstate->use_tracing = 1;
+      tstate->tracing--;
+      PyErr_Restore(type, value, traceback);
+  }
+
+  #ifdef WITH_THREAD
+  #define __Pyx_TraceReturn(result, nogil)                                                \
+  if (likely(!__Pyx_use_tracing)); else {                                                 \
+      if (nogil) {                                                                        \
+          if (CYTHON_TRACE_NOGIL) {                                                       \
+              PyThreadState *tstate;                                                      \
+              PyGILState_STATE state = PyGILState_Ensure();                               \
+              tstate = PyThreadState_GET();                                               \
+              if (tstate->use_tracing) {                                                  \
+                  __Pyx_call_return_trace_func(tstate, $frame_cname, (PyObject*)result);  \
+              }                                                                           \
+              PyGILState_Release(state);                                                  \
+          }                                                                               \
+      } else {                                                                            \
+          PyThreadState* tstate = PyThreadState_GET();                                    \
+          if (tstate->use_tracing) {                                                      \
+              __Pyx_call_return_trace_func(tstate, $frame_cname, (PyObject*)result);      \
+          }                                                                               \
+      }                                                                                   \
+  }
+  #else
+  #define __Pyx_TraceReturn(result, nogil)                                                \
   if (likely(!__Pyx_use_tracing)); else {                                                 \
       PyThreadState* tstate = PyThreadState_GET();                                        \
       if (tstate->use_tracing) {                                                          \
-          PyObject *type, *value, *traceback;                                             \
-          PyErr_Fetch(&type, &value, &traceback);                                         \
-          tstate->tracing++;                                                              \
-          tstate->use_tracing = 0;                                                        \
-          if (CYTHON_TRACE && tstate->c_tracefunc)                                        \
-              tstate->c_tracefunc(                                                        \
-                  tstate->c_traceobj, $frame_cname, PyTrace_RETURN, (PyObject*)result);   \
-          if (tstate->c_profilefunc)                                                      \
-              tstate->c_profilefunc(                                                      \
-                  tstate->c_profileobj, $frame_cname, PyTrace_RETURN, (PyObject*)result); \
-          CYTHON_FRAME_DEL;                                                               \
-          tstate->use_tracing = 1;                                                        \
-          tstate->tracing--;                                                              \
-          PyErr_Restore(type, value, traceback);                                          \
+          __Pyx_call_return_trace_func(tstate, $frame_cname, (PyObject*)result);          \
       }                                                                                   \
   }
+  #endif
 
   static PyCodeObject *__Pyx_createFrameCodeObject(const char *funcname, const char *srcfile, int firstlineno); /*proto*/
   static int __Pyx_TraceSetupAndCall(PyCodeObject** code, PyFrameObject** frame, const char *funcname, const char *srcfile, int firstlineno); /*proto*/
 
 #else
 
-  #define __Pyx_TraceDeclarations(codeobj)
-  #define __Pyx_TraceCall(funcname, srcfile, firstlineno)
+  #define __Pyx_TraceDeclarations(codeobj, nogil)
+  #define __Pyx_TraceCall(funcname, srcfile, firstlineno, nogil)
   #define __Pyx_TraceException()
-  #define __Pyx_TraceReturn(result)
+  #define __Pyx_TraceReturn(result, nogil)
 
 #endif /* CYTHON_PROFILE */
 
@@ -117,15 +167,37 @@
       PyErr_Restore(type, value, traceback);
   }
 
-  #define __Pyx_TraceLine(lineno)                                                          \
+  #ifdef WITH_THREAD
+  #define __Pyx_TraceLine(lineno, nogil)                                                   \
+  if (likely(!__Pyx_use_tracing)); else {                                                  \
+      if (nogil) {                                                                         \
+          if (CYTHON_TRACE_NOGIL) {                                                        \
+              PyThreadState *tstate;                                                       \
+              PyGILState_STATE state = PyGILState_Ensure();                                \
+              tstate = PyThreadState_GET();                                                \
+              if (unlikely(tstate->use_tracing && tstate->c_tracefunc)) {                  \
+                  __Pyx_call_line_trace_func(tstate, $frame_cname, lineno);                \
+              }                                                                            \
+              PyGILState_Release(state);                                                   \
+          }                                                                                \
+      } else {                                                                             \
+          PyThreadState* tstate = PyThreadState_GET();                                     \
+          if (unlikely(tstate->use_tracing && tstate->c_tracefunc)) {                      \
+              __Pyx_call_line_trace_func(tstate, $frame_cname, lineno);                    \
+          }                                                                                \
+      }                                                                                    \
+  }
+  #else
+  #define __Pyx_TraceLine(lineno, nogil)                                                   \
   if (likely(!__Pyx_use_tracing)); else {                                                  \
       PyThreadState* tstate = PyThreadState_GET();                                         \
       if (unlikely(tstate->use_tracing && tstate->c_tracefunc)) {                          \
           __Pyx_call_line_trace_func(tstate, $frame_cname, lineno);                        \
       }                                                                                    \
   }
+  #endif
 #else
-  #define __Pyx_TraceLine(lineno)
+  #define __Pyx_TraceLine(lineno, nogil)
 #endif
 
 /////////////// Profile ///////////////
diff --git a/tests/run/coverage_nogil.srctree b/tests/run/coverage_nogil.srctree
index f47068896b6444a11bf14a8cb3e006cb494bf570..07da14a1f953506f30b48e29eac2a4b32e87e301 100644
--- a/tests/run/coverage_nogil.srctree
+++ b/tests/run/coverage_nogil.srctree
@@ -23,7 +23,7 @@ plugins = Cython.Coverage
 
 ######## coverage_test_nogil.pyx ########
 # cython: linetrace=True
-# distutils: define_macros=CYTHON_TRACE=1
+# distutils: define_macros=CYTHON_TRACE=1 CYTHON_TRACE_NOGIL=1
 
 cdef int func1(int a, int b) nogil:
     cdef int x                   #  5
@@ -85,8 +85,8 @@ def run_coverage(module):
     executed = set(exec_lines) - set(missing_lines)
     # check that everything that runs with the gil owned was executed
     assert all(line in executed for line in [13, 17, 18, 20]), '%s / %s' % (exec_lines, missing_lines)
-    # currently, we do not trace nogil code lines, but that should eventually be implemented
-    # we also don't trace 'with gil' blocks in 'nogil' functions
+    # check that everything that runs in nogil sections was executed
+    assert all(line in executed for line in [6, 7, 8, 9]), '%s / %s' % (exec_lines, missing_lines)
 
 
 if __name__ == '__main__':